In [1]:
from __future__ import division
import numpy as np
from prng import lcgRandom, MT19937
from sample import PIKK
from scipy.misc import comb, factorial
import pandas as pd

In [2]:
def getEmpiricalDistr(randomObject, n, k, reps=10**7):
    uniqueSamples = dict()

    for i in range(reps): # use range in python 3, xrange in python 2
        sam = frozenset(PIKK(n, k, randomObject))
        if sam in uniqueSamples:
            uniqueSamples[sam] += 1
        else:
            uniqueSamples[sam] = 1
    return uniqueSamples
    

def getItemCounts(uniqueSamples):
    itemCounts = dict()
    for u,v in uniqueSamples.items():
        for i in u:
            if i in itemCounts:
                itemCounts[i] += v
            else:
                itemCounts[i] = v
    return itemCounts


def printItemFreq(itemCounts, verbose=False):
    for i in itemCounts.keys():
        itemCounts[i] /= reps
        if verbose:
            print(i, itemCounts[i])
    return itemCounts


def printMaxProbRatio(itemCounts, verbose=False):
    freq = list(itemCounts.values())
    pr = np.amax(freq)/np.amin(freq)
    if verbose:
        print("Max ratio of selection probs: " + str(pr))
    return pr

In [3]:
# Boilerplate stuff

reps = int(10**7)
n = [13, 30, 90]
k = [4, 10, 20]

maxProb = []
minProb = []
meanProb = []
maxProbRatio = []
nvalues = []
kvalues = []
prng = []

# RANDU

In [4]:
for nn in n:
    for kk in k:
        lcg = lcgRandom(seed=100) # set seed of RANDU to 100

        randu_counts = getEmpiricalDistr(lcg, n=nn, k=kk, reps=reps)
        #print("Number of missing samples: " + str(comb(n, k) - len(randu_counts)))
        itemFreq = printItemFreq(getItemCounts(randu_counts))

        maxProb = maxProb + [np.amax(list(itemFreq.values()))]
        minProb = minProb + [np.amin(list(itemFreq.values()))]
        meanProb = meanProb + [np.mean(list(itemFreq.values()))]
        maxProbRatio = maxProbRatio + [printMaxProbRatio(itemFreq)]
        nvalues = nvalues + [nn]
        kvalues = kvalues + [kk]
        prng = prng + ['RANDU']

# Super Duper LCG

In [5]:
# Parameters for the Super Duper LCG
A_SD = 0
B_SD = 69069
M_SD = 2**32

In [6]:
for nn in n:
    for kk in k:
        sdlcg = lcgRandom(seed=100, A=A_SD, B=B_SD, M=M_SD)

        sdlcg_counts = getEmpiricalDistr(sdlcg, n=nn, k=kk, reps=reps)
        #print("Number of missing samples: " + str(comb(n, k) - len(sdlcg_counts)))
        itemFreq = printItemFreq(getItemCounts(sdlcg_counts))

        maxProb = maxProb + [np.amax(list(itemFreq.values()))]
        minProb = minProb + [np.amin(list(itemFreq.values()))]
        meanProb = meanProb + [np.mean(list(itemFreq.values()))]
        maxProbRatio = maxProbRatio + [printMaxProbRatio(itemFreq)]
        nvalues = nvalues + [nn]
        kvalues = kvalues + [kk]
        prng = prng + ['Super Duper']

# Mersenne Twister

In [7]:
for nn in n:
    for kk in k:
        mt = MT19937(seed=100)

        mt_counts = getEmpiricalDistr(mt, n=nn, k=kk, reps=reps)
        #print("Number of missing samples: " + str(comb(n, k) - len(mt_counts)))
        itemFreq = printItemFreq(getItemCounts(mt_counts))

        maxProb = maxProb + [np.amax(list(itemFreq.values()))]
        minProb = minProb + [np.amin(list(itemFreq.values()))]
        meanProb = meanProb + [np.mean(list(itemFreq.values()))]
        maxProbRatio = maxProbRatio + [printMaxProbRatio(itemFreq)]
        nvalues = nvalues + [nn]
        kvalues = kvalues + [kk]
        prng = prng + ['MT']

In [8]:
d = {'Sample size' : kvalues,
     'Pop size' : nvalues,
     'PRNG' : prng,
     'Min Prob' : minProb,
     'Mean Prob' : meanProb,
     'Max Prob' : maxProb,
     'Max Selection Prob Ratio' : maxProbRatio
    }
resTable = pd.DataFrame(d)
cols = resTable.columns.tolist()
cols.reverse()
cols = [cols[1]] + [cols[0]] + cols[2:5] + [cols[6]] + [cols[5]]
resTable[cols].sort_values(['Pop size', 'Sample size'])

Unnamed: 0,Pop size,Sample size,PRNG,Min Prob,Mean Prob,Max Prob,Max Selection Prob Ratio
0,13,4,RANDU,0.307477,0.307692,0.308064,1.001908
9,13,4,Super Duper,0.307363,0.307692,0.308004,1.002086
18,13,4,MT,0.307383,0.307692,0.307893,1.001659
1,13,10,RANDU,0.76909,0.769231,0.769504,1.000539
10,13,10,Super Duper,0.768996,0.769231,0.769404,1.000531
19,13,10,MT,0.76907,0.769231,0.769441,1.000483
2,13,20,RANDU,1.0,1.0,1.0,1.0
11,13,20,Super Duper,1.0,1.0,1.0,1.0
20,13,20,MT,1.0,1.0,1.0,1.0
3,30,4,RANDU,0.133132,0.133333,0.133592,1.003454
