In [1]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import chi2_contingency
from scipy.stats.contingency import association
from IPython.display import display
# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline

In [2]:
# Create a contingency table
# In the two-dimensional case, the table is often described as an “R x C table”.
observed = np.array([[10, 20, 30, 40, 50], [51, 38, 32, 18, 11]])

In [3]:
# Perform the chi-square test of independence
chi2, p, dof, expected = chi2_contingency(observed)

# Print the results
print("Chi-square statistic:", round(chi2, 5))
print("p-value:", round(p, 5))
print("Degrees of freedom:", dof)
print("Expected frequencies:\n", expected)

Chi-square statistic: 66.48735
p-value: 0.0
Degrees of freedom: 4
Expected frequencies:
 [[30.5 29.  31.  29.  30.5]
 [30.5 29.  31.  29.  30.5]]


In [4]:
# Create a contingency table
# In the two-dimensional case, the table is often described as an “R x C table”.
observed = np.array([[10, 20, 30, 40, 50], [9, 21, 37, 45, 47]])

In [5]:
# Perform the chi-square test of independence
chi2, p, dof, expected = chi2_contingency(observed)

# Print the results
print("Chi-square statistic:", round(chi2, 5))
print("p-value:", round(p, 5))
print("Degrees of freedom:", dof)
print("Expected frequencies:\n", expected)

Chi-square statistic: 0.93392
p-value: 0.91964
Degrees of freedom: 4
Expected frequencies:
 [[ 9.22330097 19.90291262 32.52427184 41.26213592 47.08737864]
 [ 9.77669903 21.09708738 34.47572816 43.73786408 49.91262136]]


In [6]:
# no of hours a student studies
# in a week vs expected no of hours
observed_data = [10, 20, 30, 40, 50]
expected_data = [51, 38, 32, 18, 11]

In [7]:
# Chi-Square Goodness of Fit Test
chi_square, p_value = stats.chisquare(
    observed_data, expected_data)
  
# chi square test statistic and p value
print("Chi-square statistic:", round(chi_square, 5))
print("p-value:", round(p_value, 5))

Chi-square statistic: 206.77372
p-value: 0.0


In [8]:
# the degrees of freedom, df, formula is
nrows = 2
ncols = len(observed_data)
df = (nrows - 1) * (ncols - 1)
print("Degrees of freedom:", round(df, 5))

# significance level
alpha = 0.05

# find Chi-Square critical value
critical_value = stats.chi2.ppf(q = 1-alpha, # Find the critical value for 95% confidence*
                      df = df)   # Df = number of variable categories - 1

print("Critical value:", round(critical_value, 5))

conclusion = "Failed to reject the null hypothesis."
if chi_square > critical_value:
    conclusion = "Null Hypothesis is rejected."
        
print("Chisquare-score is:", round(chi_square, 5), " and critical value is:", round(critical_value, 5))
print(conclusion)

Degrees of freedom: 4
Critical value: 9.48773
Chisquare-score is: 206.77372  and critical value is: 9.48773
Null Hypothesis is rejected.


In [9]:
p_value = 1 - stats.chi2.cdf(x=chi_square,  # Find the p-value
                             df = df) # Df = number of variable categories - 1

print("p-value:", round(p_value, 5))

conclusion = "Failed to reject the null hypothesis."
if p_value <= alpha:
    conclusion = "Null Hypothesis is rejected."
        
print("Chisquare-score is:", chi_square, " and p-value is:", p_value)
print(conclusion)

p-value: 0.0
Chisquare-score is: 206.77371626481533  and p-value is: 0.0
Null Hypothesis is rejected.


In [10]:
observed = np.array([[100, 150], [203, 322], [420, 700], [320, 210]])
print(observed)
vcramer = association(observed, method="cramer")

print("V-Cramer:", round(vcramer, 5))

[[100 150]
 [203 322]
 [420 700]
 [320 210]]
V-Cramer: 0.18618
