In [1]:
from __future__ import division
import math
import numpy as np
import numpy.random
from hypergeometric import hypergeometric_optim, trihypergeometric_optim

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import pandas as pd


In [4]:
simTable = pd.DataFrame(columns=('vote margin', 'null margin', 'sample size', 'popsize', 'invalid_rate', \
                                 '1% rejection rate - tri', '5% rejection rate - tri', '10% rejection rate - tri', \
                                 '1% rejection rate - hyp', '5% rejection rate - hyp', '10% rejection rate - hyp')
                       )

c_values = [0, 50, 100]
margin = [0.01, 0.05, 0.1]
sample_rate = [0.01, 0.05, 0.1, 0.2]
invalid_rate = [0.1, 0.25, 0.5]
popsize = 1000
reps = 100
np.random.seed(837459382)

for m in margin:
    for r in invalid_rate:
        print("Margin=", m, ", invalid rate=", r)
        vote_margin = (1-r)*popsize * (m/2)
        population = [0]*(int((1-r)/2*popsize - vote_margin)) + \
                     [1]*(int((1-r)/2*popsize + vote_margin)) + \
                     [np.nan]*int(r*popsize)
        population = np.array(population)
        
        for s in sample_rate:
            for c in c_values:
                pvalues_tri = np.zeros(reps)
                pvalues_hyp = np.zeros(reps)
                size = s*popsize
                
                for i in range(reps):
                    sam = np.random.choice(population, size)
                    pvalues_tri[i] = trihypergeometric_optim(sam, popsize, null_margin=c)
                    pvalues_hyp[i] = hypergeometric_optim(sam, popsize, null_margin=c)
                
                simTable.loc[len(simTable)] =  m, c, size, popsize, r, \
                                               np.mean(pvalues_tri <= 0.01), np.mean(pvalues_tri <= 0.05),\
                                               np.mean(pvalues_tri <= 0.1), np.mean(pvalues_hyp <= 0.01), \
                                               np.mean(pvalues_hyp <= 0.05), np.mean(pvalues_hyp <= 0.1)

Margin= 0.01 , invalid rate= 0.1




Margin= 0.01 , invalid rate= 0.25
Margin= 0.01 , invalid rate= 0.5
Margin= 0.05 , invalid rate= 0.1
Margin= 0.05 , invalid rate= 0.25
Margin= 0.05 , invalid rate= 0.5
Margin= 0.1 , invalid rate= 0.1
Margin= 0.1 , invalid rate= 0.25
Margin= 0.1 , invalid rate= 0.5


In [5]:
simTable

Unnamed: 0,vote margin,null margin,sample size,popsize,invalid_rate,1% rejection rate - tri,5% rejection rate - tri,10% rejection rate - tri,1% rejection rate - hyp,5% rejection rate - hyp,10% rejection rate - hyp
0,0.01,0.0,10.0,1000.0,0.10,0.01,0.03,0.08,0.03,0.04,0.08
1,0.01,50.0,10.0,1000.0,0.10,0.01,0.01,0.04,0.00,0.00,0.00
2,0.01,100.0,10.0,1000.0,0.10,0.00,0.01,0.02,0.00,0.00,0.00
3,0.01,0.0,50.0,1000.0,0.10,0.00,0.03,0.09,0.00,0.05,0.09
4,0.01,50.0,50.0,1000.0,0.10,0.01,0.02,0.03,0.00,0.00,0.00
5,0.01,100.0,50.0,1000.0,0.10,0.00,0.00,0.01,0.00,0.00,0.00
6,0.01,0.0,100.0,1000.0,0.10,0.00,0.02,0.07,0.00,0.02,0.07
7,0.01,50.0,100.0,1000.0,0.10,0.00,0.01,0.01,0.00,0.00,0.00
8,0.01,100.0,100.0,1000.0,0.10,0.00,0.01,0.02,0.00,0.00,0.00
9,0.01,0.0,200.0,1000.0,0.10,0.02,0.07,0.17,0.03,0.07,0.17


In [6]:
simTable.to_csv("power-results.csv")