# Chi Square Test Of Independence

In [None]:
# Import Packages

import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.stats import chi2,chi2_contingency

### Dataset1

In [None]:
# Import Dataset

titanic = pd.read_csv("titanic.csv")
titanic.head()

In [None]:
# We will use the Chi-Square test for independence to see if the survival rate of passengers is independent of the passenger 
# class.

contingency_table = pd.crosstab(titanic["Survived"],titanic["Pclass"])
contingency_table

In [None]:
# Perform Chi Square Test

chi2,p_value,dof,expected = chi2_contingency(contingency_table)

print("Chi Square Statistic : {:.2f}".format(chi2))
print("p_value : {:.2f}".format(p_value))
print("Degree Of Freedom : {}".format(dof))
print("Expected Frequencies : {}".format(expected))

In [None]:
# Interpret Result

alpha = 0.05

if(p_value < alpha):
    print("We reject the null hypothesis. There is a significant association between passenger class and survival rate.")
else:
    print("We fail to reject the null hypothesis.There is no significant association between passenger class and survival rate.")

### Dataset2

In [None]:
# Import Dataset

df = pd.read_csv("ChiSquareTestOfIndependence.csv")

In [None]:
# Return Top 5 Rows From Dataset

df.head()

In [None]:
# Step-1 : Make Contingency Table (Observed Frequency)

countingTab = pd.crosstab(df.City,df.Brand,margins=True)
countingTab

In [None]:
# Step-2 : Calculate Expected Frequency

cities = list(df["City"].unique())
brands = list(df["Brand"].unique())
exp1 = {}

for i in cities:
    exp2 = {}
    for j in brands:
        exp2[j] = countingTab.transpose()[i]["All"] * countingTab[j]["All"] / countingTab["All"]["All"]
    exp1[i] = exp2
print(exp1)

In [None]:
# Step-3 : Calculate Chi Square

chiSquareCal = 0
for i in cities:
    for j in brands:
        chiSquareCal += (countingTab.transpose()[i][j] - exp1[i][j])**2 / exp1[i][j]
print(chiSquareCal)    

In [None]:
# Step-4 Calculate Degree Of Freedom

dof = (len(cities)-1) * (len(brands)-1)
print(dof)

In [None]:
# Step-5 Tabulated Value Of Chi Square

chiSquareTab = stats.chi2.ppf(0.95,df=dof)
print(chiSquareTab)

In [None]:
# Interpret Result

if(chiSquareCal > chiSquareTab):
    print("Reject Null Hypthesis")
else:
    print("Accept Null Hypothesis")

In [None]:
# Shortcut To Chi Square Test

countTab = np.array([countingTab.transpose()["Chennai"][0:3].values,countingTab.transpose()["Mumbai"][0:3].values])
chi2_contingency(countTab)