In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [18]:
from scipy.stats import chisquare # Statistical Test
from scipy.stats import chi2 # Distribution

In [9]:
chi_stats, p_value = chisquare(
  [28,22], # Observed values
  [25,25] # Expected values 
)

In [11]:
print(f"P-Value: {p_value}")
print(f"Chi-Squared: {chi_stats}")

P-Value: 0.3961439091520741
Chi-Squared: 0.72


In [13]:
(28-25)**2/25 + (22-25)**2/25

0.72

In [19]:
# P-value
1 - chi2.cdf(0.72, df = 1)

0.3961439091520741

In [6]:
alpha = 0.05
if p_value <= alpha:
  print("Coin is Biased")
else:
  print("Coin is Fair")

Coin is Fair


In [12]:
chi_stats, p_value = chisquare(
  [45,5], # Observed values
  [25,25] # Expected values 
)
print(f"P-Value: {p_value}")
print(f"Chi-Squared: {chi_stats}")
alpha = 0.05
if p_value <= alpha:
  print("Coin is Biased") # Reject Null Hypothesis
else:
  print("Coin is Fair") # Fail to Reject Null Hypothesis

P-Value: 1.5417257900280013e-08
Chi-Squared: 32.0
Coin is Biased


In [14]:
(45-25)**2/25 + (5-25)**2/25

32.0

In [20]:
# P-value
1 - chi2.cdf(32.0, df = 1)

1.5417257914762672e-08

### Independance

In [21]:
from scipy.stats import chi2_contingency # Statistical Test for Independence

In [23]:
observed = [
  [527,72],
  [206,102]
]

chi_stats, p_value, dof, expected = chi2_contingency(observed)
print(f"P-Value: {p_value}")
print(f"Chi-Squared: {chi_stats}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected: {expected}")

P-Value: 4.268230756875865e-14
Chi-Squared: 57.04098674049609
Degrees of Freedom: 1
Expected: [[484.08710033 114.91289967]
 [248.91289967  59.08710033]]


## Aerofit Dataset

In [24]:
df = pd.read_csv('./aerofit_treadmill.csv')
df.head()

Unnamed: 0,Product,Age,Gender,Education,MaritalStatus,Usage,Fitness,Income,Miles
0,KP281,18,Male,14,Single,3,4,29562,112
1,KP281,19,Male,15,Single,2,3,31836,75
2,KP281,19,Female,14,Partnered,4,3,30699,66
3,KP281,19,Male,12,Single,3,3,32973,85
4,KP281,20,Male,13,Partnered,4,2,35247,47


In [25]:
gender_Product = pd.crosstab(index = df['Gender'], columns = df['Product'])
gender_Product

Product,KP281,KP481,KP781
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,40,29,7
Male,40,31,33


In [26]:
chi_stats, p_value, dof, expected = chi2_contingency(gender_Product)
print(f"P-Value: {p_value}")
print(f"Chi-Squared: {chi_stats}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected: {expected}")

P-Value: 0.0015617972833158714
Chi-Squared: 12.923836032388664
Degrees of Freedom: 2
Expected: [[33.77777778 25.33333333 16.88888889]
 [46.22222222 34.66666667 23.11111111]]


- 1. H0: Independence, H1: Dependence (gender and product)
- 2. Test statistic: Chi2
- 3. Rigth-tailed test
- 4. Compute p-value, 0.0015
- 5. Compare p-value with alpha, 0.05 -> Reject H0

In [32]:
chi_stats, p_value, dof, expected = chi2_contingency([  
  [40,	29	,20],
  [40,	31	,33],
])
p_value

0.34954582054758343