In [1]:
from __future__ import division
import math
import numpy as np
import numpy.random
from hypergeometric import trihypergeometric_optim

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import pandas as pd


In [2]:
simTable = pd.DataFrame(columns=('vote margin', 'null margin',
                                 'null_is_true', 'invalid_rate',
                                 '1% rejection rate', '5% rejection rate',
                                 '10% rejection rate'))

c_rates = [-0.2, -0.1, -0.05, -0.01, 0, 0.01, 0.05, 0.1, 0.2]
diluted_margin = [0.01, 0.05, 0.1, 0.2]
invalid_rate = [0.1, 0.2]
popsize = 1000
size = 0.2*popsize
reps = 1000
np.random.seed(837459382)

for m in diluted_margin:
    for r in invalid_rate:
        print("Diluted margin=", m, ", invalid rate=", r)
        vote_margin = popsize * m/2
        population = [0]*(int((1-r)/2*popsize - vote_margin)) + \
                     [1]*(int((1-r)/2*popsize + vote_margin)) + \
                     [np.nan]*int(r*popsize)
        population = np.array(population)
        
        for c in c_rates:
            c_value = int(c*popsize)
            pvalues = np.zeros(reps)
                
            for i in range(reps):
                sam = np.random.choice(population, size)
                pvalues[i] = trihypergeometric_optim(sam, popsize, null_margin=c_value)
                
            simTable.loc[len(simTable)] =  m, c, \
                                           (m <= c), r, \
                                           np.mean(pvalues <= 0.01), \
                                           np.mean(pvalues <= 0.05), \
                                           np.mean(pvalues <= 0.1)

Diluted margin= 0.01 , invalid rate= 0.1




Diluted margin= 0.01 , invalid rate= 0.2
Diluted margin= 0.05 , invalid rate= 0.1
Diluted margin= 0.05 , invalid rate= 0.2
Diluted margin= 0.1 , invalid rate= 0.1
Diluted margin= 0.1 , invalid rate= 0.2
Diluted margin= 0.2 , invalid rate= 0.1
Diluted margin= 0.2 , invalid rate= 0.2


In [3]:
simTable

Unnamed: 0,vote margin,null margin,null_is_true,invalid_rate,1% rejection rate,5% rejection rate,10% rejection rate
0,0.01,-0.20,0.0,0.1,0.832,0.948,0.973
1,0.01,-0.10,0.0,0.1,0.244,0.495,0.641
2,0.01,-0.05,0.0,0.1,0.085,0.248,0.362
3,0.01,-0.01,0.0,0.1,0.018,0.087,0.162
4,0.01,0.00,0.0,0.1,0.016,0.071,0.149
5,0.01,0.01,1.0,0.1,0.015,0.065,0.120
6,0.01,0.05,1.0,0.1,0.001,0.014,0.042
7,0.01,0.10,1.0,0.1,0.000,0.001,0.004
8,0.01,0.20,1.0,0.1,0.000,0.000,0.000
9,0.01,-0.20,0.0,0.2,0.841,0.951,0.976


In [4]:
simTable.to_csv("type1-error-results.csv")