A notebook engulfing the statistical functions and class created by the author for this project

In [4]:
import pandas as pd
import numpy as np 

In [1]:
## A function for Cumulative distribution function
## for its application in estimating the relative risk

def cdf(sample, value):
  count = 0
  for i in sample:
    if i <= value:
      count += 1
  return (count * 100) / len(sample)

In [2]:
## Cohen's d - An estimator of effect size

def cohen_d(group1, group2):
  diff = group1.mean() - group2.mean()
  var1 = group1.var()
  var2 = group2.var()
  n1, n2 = len(group1), len(group2)
  pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2)
  d = diff / np.sqrt(pooled_var)
  return d

In [3]:
## A class for Hypothesis testing that has a 
## method for Permutation test

class HypothesisTest():

  def __init__(self, data):
    self.data = data
    self.actual = self.PermutationTest(data)

  def TestStat(self, data):
    data1, data2 = data

    n1 = len(data1)
    n2 = len(data2)

    var1 = data1.var()
    var2 = data2.var()

    diff = data1.mean() - data2.mean()
    pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2)

    d = diff / np.sqrt(pooled_var)
    return d

  def RunModel(self):
    data1, data2 = self.data

    sample1 = data1.sample(n=7, replace = True)
    sample2 = data2.sample(n=7, replace = True)

    return (sample1, sample2)

  def PermutationTest(self, data):
    data1, data2 = data
    test_stat = data1.mean() - data2.mean()
    return test_stat

  def PValue(self, iterations = 3000):
    self.test_stats = [self.PermutationTest(self.RunModel())
                       for _ in range(iterations)]

    count = sum(1 for x in self.test_stats if x <= 0.0)
    return count / iterations