In [21]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from scipy.stats import chi2  

# Data provided in the contingency table
data = {'Satisfaction': ['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied'],
        'Smart Thermostat': [50, 80, 60, 30, 20],
        'Smart Light': [70, 100, 90, 50, 50]}

df = pd.DataFrame(data)
df = df.set_index('Satisfaction')

# 1. State the Hypotheses:

# Null Hypothesis (H0): There is no association between device type and customer satisfaction.
# Alternative Hypothesis (H1): There is an association between device type and customer satisfaction.

print("Hypotheses:")
print("Null Hypothesis (H0): There is no association between device type and customer satisfaction.")
print("Alternative Hypothesis (H1): There is an association between device type and customer satisfaction.\n")


Hypotheses:
Null Hypothesis (H0): There is no association between device type and customer satisfaction.
Alternative Hypothesis (H1): There is an association between device type and customer satisfaction.



In [23]:
# 2. Compute the Chi-Square Statistic:
chi2_statistic, p, dof, expected = chi2_contingency(df) 


print("Chi-Square Statistic:", chi2)
print("P-value:", p)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:\n", expected)


Chi-Square Statistic: <scipy.stats._continuous_distns.chi2_gen object at 0x00000213D69CBB30>
P-value: 0.22784371130697179
Degrees of Freedom: 4
Expected Frequencies:
 [[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.  42.]]


In [27]:
# 3. Determine the Critical Value:
alpha = 0.05  # Significance level

critical_value = chi2.ppf(1 - alpha, dof) #ppf : Percent Point Function  

print("\nCritical Value (alpha = 0.05):", critical_value)



Critical Value (alpha = 0.05): 9.487729036781154


In [31]:
# 4. Make a Decision:

print("\nDecision:")
if chi2_statistic > critical_value:  # Compare chi2_statistic (float) with critical_value (float)
    print("Reject the null hypothesis. There is a significant association between device type and customer satisfaction.")
else:
    print("Fail to reject the null hypothesis. There is no significant association between device type and customer satisfaction.")
    
# Further Interpretation (using p-value):

print("\nFurther Interpretation (using p-value):")
if p < alpha:
    print("Reject the null hypothesis. There is a significant association between device type and customer satisfaction.")
else:
    print("Fail to reject the null hypothesis. There is no significant association between device type and customer satisfaction.")


# Display the DataFrame (optional)
print("\nContingency Table:")
print(df)


# Example of how to access expected frequencies for further analysis
expected_df = pd.DataFrame(expected, index=df.index, columns=df.columns)
print("\nExpected Frequencies Table:")
print(expected_df)


Decision:
Fail to reject the null hypothesis. There is no significant association between device type and customer satisfaction.

Further Interpretation (using p-value):
Fail to reject the null hypothesis. There is no significant association between device type and customer satisfaction.

Contingency Table:
                  Smart Thermostat  Smart Light
Satisfaction                                   
Very Satisfied                  50           70
Satisfied                       80          100
Neutral                         60           90
Unsatisfied                     30           50
Very Unsatisfied                20           50

Expected Frequencies Table:
                  Smart Thermostat  Smart Light
Satisfaction                                   
Very Satisfied                48.0         72.0
Satisfied                     72.0        108.0
Neutral                       60.0         90.0
Unsatisfied                   32.0         48.0
Very Unsatisfied              28.0   