In [3]:
import numpy as np
import scipy.stats as stats

In [5]:
# --- Given Data (Observed Frequencies) ---
# Rows represent Satisfaction Levels: Very Satisfied, Satisfied, Neutral, Unsatisfied, Very Unsatisfied
# Columns represent Device Types: Smart Thermostat, Smart Light
observed_data = np.array([
    [50, 70],   # Very Satisfied
    [80, 100],  # Satisfied
    [60, 90],   # Neutral
    [30, 50],   # Unsatisfied
    [20, 50]    # Very Unsatisfied
])

alpha = 0.05  # Significance level
print("--- Chi-Square Test for Independence: Device Type vs. Customer Satisfaction ---")
print("\nObserved Frequencies Table:")
print(observed_data)

--- Chi-Square Test for Independence: Device Type vs. Customer Satisfaction ---

Observed Frequencies Table:
[[ 50  70]
 [ 80 100]
 [ 60  90]
 [ 30  50]
 [ 20  50]]


In [7]:
# --- Task 1: State the Hypotheses ---
print("\n--- Task 1: Hypotheses Statement ---")
print("Null Hypothesis (H0): There is no significant association between the type of smart home device purchased and the customer's satisfaction level.")
print("    (i.e., Device Type and Customer Satisfaction are independent.)")
print("Alternative Hypothesis (Ha): There is a significant association between the type of smart home device purchased and the customer's satisfaction level.")
print("    (i.e., Device Type and Customer Satisfaction are dependent.)")


--- Task 1: Hypotheses Statement ---
Null Hypothesis (H0): There is no significant association between the type of smart home device purchased and the customer's satisfaction level.
    (i.e., Device Type and Customer Satisfaction are independent.)
Alternative Hypothesis (Ha): There is a significant association between the type of smart home device purchased and the customer's satisfaction level.
    (i.e., Device Type and Customer Satisfaction are dependent.)


In [9]:
# --- Task 2: Compute the Chi-Square Statistic ---

# Calculate row totals, column totals, and grand total
row_totals = np.sum(observed_data, axis=1)
col_totals = np.sum(observed_data, axis=0)
grand_total = np.sum(observed_data)

print(f"\nRow Totals: {row_totals}")
print(f"Column Totals: {col_totals}")
print(f"Grand Total: {grand_total}")

# Calculate Expected Frequencies
# E_ij = (Row Total_i * Column Total_j) / Grand Total
expected_data = np.zeros_like(observed_data, dtype=float)
num_rows, num_cols = observed_data.shape

for i in range(num_rows):
    for j in range(num_cols):
        expected_data[i, j] = (row_totals[i] * col_totals[j]) / grand_total

print("\nExpected Frequencies Table:")
print(np.round(expected_data, 2)) # Round for display purposes

# Calculate the Chi-Square Test Statistic
# Chi-Square = Sum((Observed - Expected)^2 / Expected) for all cells
chi_square_statistic = np.sum((observed_data - expected_data)**2 / expected_data)

print(f"\n--- Task 2: Chi-Square Statistic Calculation ---")
print(f"Calculated Chi-Square Statistic (χ²): {chi_square_statistic:.4f}")


Row Totals: [120 180 150  80  70]
Column Totals: [240 360]
Grand Total: 600

Expected Frequencies Table:
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]

--- Task 2: Chi-Square Statistic Calculation ---
Calculated Chi-Square Statistic (χ²): 5.6382


In [11]:
# --- Task 3: Determine the Critical Value ---

# Calculate Degrees of Freedom (df)
# df = (number of rows - 1) * (number of columns - 1)
df = (num_rows - 1) * (num_cols - 1)

# Determine the critical value from the Chi-Square distribution
# For a Chi-Square test of independence, it's always a right-tailed test.
critical_value = stats.chi2.ppf(1 - alpha, df)

print(f"\n--- Task 3: Critical Value Determination ---")
print(f"Degrees of Freedom (df): {df}")
print(f"Significance Level (α): {alpha}")
print(f"Critical Value (χ²_critical): {critical_value:.4f}")


--- Task 3: Critical Value Determination ---
Degrees of Freedom (df): 4
Significance Level (α): 0.05
Critical Value (χ²_critical): 9.4877


In [13]:
# --- Task 4: Make a Decision ---
print(f"\n--- Task 4: Decision ---")
print(f"Calculated Chi-Square Statistic (χ²_calculated): {chi_square_statistic:.4f}")
print(f"Critical Value (χ²_critical): {critical_value:.4f}")

if chi_square_statistic > critical_value:
    decision = "Reject the Null Hypothesis (H0)"
    print(f"Decision: {decision} because the Calculated Chi-Square Statistic ({chi_square_statistic:.4f}) > Critical Value ({critical_value:.4f}).")
else:
    decision = "Fail to Reject the Null Hypothesis (H0)"
    print(f"Decision: {decision} because the Calculated Chi-Square Statistic ({chi_square_statistic:.4f}) <= Critical Value ({critical_value:.4f}).")



--- Task 4: Decision ---
Calculated Chi-Square Statistic (χ²_calculated): 5.6382
Critical Value (χ²_critical): 9.4877
Decision: Fail to Reject the Null Hypothesis (H0) because the Calculated Chi-Square Statistic (5.6382) <= Critical Value (9.4877).


In [15]:
# --- Task 5: Conclusion ---
print(f"\n--- Conclusion ---")
if chi_square_statistic > critical_value:
    print("Based on the Chi-Square Test of Independence, with a significance level of 0.05, we reject the null hypothesis.")
    print("This indicates that there is a statistically significant association between the type of smart home device purchased and the customer's satisfaction level.")
    print("In other words, customer satisfaction levels are dependent on the type of device (Smart Thermostat vs. Smart Light).")
else:
    print("Based on the Chi-Square Test of Independence, with a significance level of 0.05, we fail to reject the null hypothesis.")
    print("This indicates that there is no statistically significant association between the type of smart home device purchased and the customer's satisfaction level.")
    print("In other words, customer satisfaction levels are independent of the type of device (Smart Thermostat vs. Smart Light).")

# Optional: Calculate the p-value for additional context
p_value = stats.chi2.sf(chi_square_statistic, df) # sf is survival function (1 - cdf)
print(f"\nFor additional context, the p-value is: {p_value:.4f}")
if p_value < alpha:
    print(f"Since p-value ({p_value:.4f}) < alpha ({alpha}), we reject H0.")
else:
    print(f"Since p-value ({p_value:.4f}) >= alpha ({alpha}), we fail to reject H0.")

# Check assumptions for Chi-Square Test:
# All expected cell frequencies should be at least 5.
# If any expected cell frequency is less than 5, the Chi-Square approximation might not be valid.
min_expected_frequency = np.min(expected_data)
print(f"\n--- Assumption Check ---")
print(f"Minimum Expected Frequency: {min_expected_frequency:.2f}")
if min_expected_frequency < 5:
    print("Warning: One or more expected frequencies are less than 5. The Chi-Square approximation may not be accurate.")
else:
    print("All expected frequencies are 5 or greater, so the Chi-Square test assumptions are met.")


--- Conclusion ---
Based on the Chi-Square Test of Independence, with a significance level of 0.05, we fail to reject the null hypothesis.
This indicates that there is no statistically significant association between the type of smart home device purchased and the customer's satisfaction level.
In other words, customer satisfaction levels are independent of the type of device (Smart Thermostat vs. Smart Light).

For additional context, the p-value is: 0.2278
Since p-value (0.2278) >= alpha (0.05), we fail to reject H0.

--- Assumption Check ---
Minimum Expected Frequency: 28.00
All expected frequencies are 5 or greater, so the Chi-Square test assumptions are met.
