In [4]:
# chi-squared test with similar proportions
import pandas as pd
from scipy.stats import chi2_contingency
from scipy.stats import chi2



In [5]:
# contingency table
table = [[10, 20, 30],
         [6,  9,  17]]
print(table)

stat, p, dof, expected = chi2_contingency(table)
print('degrees-of-freedom=%d' % dof)
print()
print('expected:')
print(expected)
print()

# interpret test-statistic
prob = 0.95
critical = chi2.ppf(prob, dof)
print('probability=%.3f, critical=%.3f, stat=%.3f' % (prob, critical, stat))
if abs(stat) >= critical:
    print('Dependent (reject H0) - test:  abs(stat) >= critical - succeeded')
else:
    print('Independent (fail to reject H0 - test:  abs(stat) >= critical - failed')
print()

# interpret p-value
alpha = 1.0 - prob
print('alpha=%.3f, p=%.3f' % (alpha, p))
if p <= alpha:
    print('Dependent (reject H0) - test: p <= alpha - succeeded')
else:
    print('Independent (fail to reject H0) - test: p <= alpha - failed')
print()


[[10, 20, 30], [6, 9, 17]]
degrees-of-freedom=2

expected:
[[10.43478261 18.91304348 30.65217391]
 [ 5.56521739 10.08695652 16.34782609]]

probability=0.950, critical=5.991, stat=0.272
Independent (fail to reject H0 - test:  abs(stat) >= critical - failed

alpha=0.050, p=0.873
Independent (fail to reject H0) - test: p <= alpha - failed



In [6]:
class PearsonChiSquared:
    def __init__(self, df):
        self.df = df

    def getColumns(self):
        return self.df.columns
    
    def chiSquared(self, column_name1, column_name2):
        table = self.df.xs([column_name1, column_name2], axis='columns')
        stat, p, dof, expected = chi2_contingency(table)
        print('degrees-of-freedom=%d' % dof)
        print()
        print('expected:')
        print(expected)
        print()

        # interpret test-statistic
        prob = 0.95
        critical = chi2.ppf(prob, dof)
        print('probability=%.3f, critical=%.3f, stat=%.3f' % (prob, critical, stat))
        if abs(stat) >= critical:
            print('Dependent (reject H0) - test:  abs(stat) >= critical - succeeded')
        else:
            print('Independent (fail to reject H0 - test:  abs(stat) >= critical - failed')
        print()

        # interpret p-value
        alpha = 1.0 - prob
        print('alpha=%.3f, p=%.3f' % (alpha, p))
        if p <= alpha:
            print('Dependent (reject H0) - test: p <= alpha - succeeded')
        else:
            print('Independent (fail to reject H0) - test: p <= alpha - failed')
        print()

        print('1-p =', 1-p)
        if 1 - p > prob:
            print('Dependent (reject H0) - test: p <= alpha - succeeded')
        else:
            print('Independent (fail to reject H0) - test: p <= alpha - failed')
        print()

# contingency table

df = pd.DataFrame({'a' : [10, 20, 30], 'b' : [6,  9,  17]})
pcs = PearsonChiSquared(df)
pcs.chiSquared('a', 'b')


degrees-of-freedom=2

expected:
[[10.43478261  5.56521739]
 [18.91304348 10.08695652]
 [30.65217391 16.34782609]]

probability=0.950, critical=5.991, stat=0.272
Independent (fail to reject H0 - test:  abs(stat) >= critical - failed

alpha=0.050, p=0.873
Independent (fail to reject H0) - test: p <= alpha - failed

1-p = 0.126971716619927
Independent (fail to reject H0) - test: p <= alpha - failed

