# Chi_square_test


In [13]:
# Import Libraries
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency, chi2

In [14]:

# Input Data (Contingency Table)
data = np.array([
    [50, 70],   # Very Satisfied
    [80, 100],  # Satisfied
    [60, 90],   # Neutral
    [30, 50],   # Unsatisfied
    [20, 50]    # Very Unsatisfied
])

# Create DataFrame for better readability
df = pd.DataFrame(
    data,
    columns=["Smart Thermostat", "Smart Light"],
    index=["Very Satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very Unsatisfied"]
)

print("Contingency Table:")
print(df)


Contingency Table:
                  Smart Thermostat  Smart Light
Very Satisfied                  50           70
Satisfied                       80          100
Neutral                         60           90
Unsatisfied                     30           50
Very Unsatisfied                20           50


In [15]:

# State Hypotheses
print("Hypotheses:")
print("H0: There is NO association between device type and customer satisfaction.")
print("H1: There IS an association between device type and customer satisfaction.")


Hypotheses:
H0: There is NO association between device type and customer satisfaction.
H1: There IS an association between device type and customer satisfaction.


In [16]:

# Perform Chi-Square Test
chi2_stat, p_val, dof, expected = chi2_contingency(data)

print(f"Chi-Square Statistic = {chi2_stat:.4f}")
print(f"Degrees of Freedom = {dof}")
print("Expected Frequencies:")
print(pd.DataFrame(expected, index=df.index, columns=df.columns))


Chi-Square Statistic = 5.6382
Degrees of Freedom = 4
Expected Frequencies:
                  Smart Thermostat  Smart Light
Very Satisfied                48.0         72.0
Satisfied                     72.0        108.0
Neutral                       60.0         90.0
Unsatisfied                   32.0         48.0
Very Unsatisfied              28.0         42.0


In [17]:

#Critical Value
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, dof)

print(f"Critical Value (alpha=0.05, df={dof}) = {critical_value:.4f}")
print(f"P-value = {p_val:.4f}")


Critical Value (alpha=0.05, df=4) = 9.4877
P-value = 0.2278


In [18]:

#Decision & Conclusion
if chi2_stat > critical_value:
    print("Decision: Reject H0")
    print("Conclusion: There IS a significant association between device type and customer satisfaction.")
else:
    print("Decision: Fail to Reject H0")
    print("Conclusion: There is NO significant association between device type and customer satisfaction.")


Decision: Fail to Reject H0
Conclusion: There is NO significant association between device type and customer satisfaction.


# Hypothesis_testing

In [19]:
import math
from scipy.stats import norm

In [20]:
# Problem Data
sample_mean = 3050        # x̄ = observed sample mean weekly cost
n = 25                    # sample size
X_mean = 600              # average units produced
X_std = 25                # std deviation of units
fixed_cost = 1000
variable_cost = 5

In [9]:
# Theoretical Mean Cost (μ)
mu = fixed_cost + variable_cost * X_mean
print(f"Theoretical mean cost (μ) = {mu}")

Theoretical mean cost (μ) = 4000


In [10]:
# Population Standard Deviation (σ)
sigma = variable_cost * X_std
print(f"Standard deviation (σ) = {sigma}")

Standard deviation (σ) = 125


In [11]:
# Standard Error (SE)
SE = sigma / math.sqrt(n)
print(f"Standard Error (SE) = {SE:.2f}")

Standard Error (SE) = 25.00


In [12]:
# Hypotheses
print("\nHypotheses:")
print("H0: The weekly operating cost = theoretical cost model (no increase).")
print("H1: The weekly operating cost > theoretical cost model (costs are higher).")



Hypotheses:
H0: The weekly operating cost = theoretical cost model (no increase).
H1: The weekly operating cost > theoretical cost model (costs are higher).
