In [91]:
from datetime import datetime, timedelta
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, chi2_contingency

In [5]:
random.seed(20)

In [88]:
a = np.random.binomial(10000, .6, 10)
b = np.random.binomial(10000, .8, 10)

In [36]:
class Test(object):

    def __init__(self, data):   
        self.data = data
        self.MakeModel()
        self.actual = self.TestStatistic(data)

    def MakeModel(self):
        pass

    def TestStatistic(self, data):
        pass

    def RunModel(self):
        pass

    def p_value(self, iters=1000):
        test_stats = [self.TestStatistic(self.RunModel()) for _ in range(iters)]
        count = sum(1 for x in test_stats if x >= self.actual)
        return count/iters

In [39]:
class CoinTest(Test):

    def MakeModel(self):
        heads, tails = self.data
        self.n = heads + tails

    def TestStatistic(self, data):
        heads, tails = data
        test_stat = abs(heads - tails)
        return test_stat

    def RunModel(self):
        sample = [random.choice([0, 1]) for _ in range(self.n)]
        heads, tails = sample.count(0), sample.count(1)
        return heads, tails


In [40]:
CoinTest((140, 110)).p_value(iters=1000)

0.074

In [45]:
class PermuteTest(CoinTest):

    def MakeModel(self):
        group1, group2 = self.data
        self.n, self.m = len(group1), len(group2)
        self.pool = np.hstack((group1, group2))
    
    def RunModel(self):
        np.random.shuffle(self.pool)
        data = self.pool[:self.n], self.pool[self.n:]
        return data

In [46]:
class DiffMeansPermute(PermuteTest):

    def TestStatistic(self, data):
        group1, group2 = data
        test_stat = abs(group1.mean() - group2.mean())
        return test_stat

In [89]:
DiffMeansPermute((np.array(a), np.array(b))).p_value(iters=1000)

0.0

In [48]:
class Chi2Test(PermuteTest):

    def TestStatistic(self, data):
        group1, group2 = data
        test_stat = ((group1 - group2)**2 / group2).sum()
        return test_stat

In [90]:
Chi2Test((np.array(a), np.array(b))).p_value(iters=1000)

0.024

In [86]:
chi2_contingency

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])