In [1]:
import math
import scipy as sp
from scipy.stats import bernoulli
import numpy as np
from numpy.testing import  assert_allclose
import json
np.random.seed(123456789)

In [2]:
from rilacs.strategies import linear_gamma_dist
import pytest
from rilacs.martingales import (
    apriori_Kelly_martingale,
    distKelly_martingale,
    sqKelly_martingale,
    dKelly_martingale,
)
import itertools

In [3]:
# to save dicts as json
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [4]:
def sprt_mart(x : np.array, N : int, mu : float=1/2, eta: float=1-np.finfo(float).eps, \
              u: float=1, random_order = True):
    '''
    Finds the p value for the hypothesis that the population 
    mean is less than or equal to mu against the alternative that it is eta,
    for a population of size N of values in the interval [0, u].
    
    Generalizes Wald's SPRT for the Bernoulli to sampling without replacement and to bounded
    values rather than binary values.

    If N is finite, assumes the sample is drawn without replacement
    If N is infinite, assumes the sample is with replacement
    
    Data are assumed to be in random order. If not, the calculation for sampling without replacement is incorrect.


    
    Parameters:
    -----------
    x : binary list, one element per draw. A list element is 1 if the 
        the corresponding trial was a success
    N : int
        population size for sampling without replacement, or np.infinity for 
        sampling with replacement
    theta : float in (0,u)
        hypothesized population mean
    eta : float in (0,u)
        alternative hypothesized population mean
    random_order : Boolean
        if the data are in random order, setting this to True can improve the power.
        If the data are not in random order, set to False
    '''
    if any((xx < 0 or xx > u) for xx in x):
        raise ValueError(f'Data out of range [0,{u}]')
    if np.isfinite(N):
        if not random_order:
            raise ValueError("data must be in random order for samples without replacement")
        S = np.insert(np.cumsum(x),0,0)[0:-1]  # 0, x_1, x_1+x_2, ...,  
        j = np.arange(1,len(x)+1)              # 1, 2, 3, ..., len(x)
        m = (N*mu-S)/(N-j+1)                   # mean of population after (j-1)st draw, if null is true
    else:
        m = mu
    with np.errstate(divide='ignore',invalid='ignore'): 
        terms = np.cumprod((x*eta/m + (u-x)*(u-eta)/(u-m))/u) # generalization of Bernoulli SPRT
    terms[m<0] = np.inf                        # the null is surely false
    return terms


In [5]:
def shrink_trunc(x: np.array, N: int, mu: float=1/2, nu: float=1-np.finfo(float).eps, u: float=1, c: float=1/2, 
                 d: float=100) -> np.array: 
    '''
    apply the shrinkage and truncation estimator to an array
    
    sample mean is shrunk towards nu, with relative weight d compared to a single observation.
    estimate is truncated above at u-u*eps and below at mu_j+e_j(c,j)
    
    S_1 = 0
    S_j = \sum_{i=1}^{j-1} x_i, j > 1
    m_j = (N*mu-S_j)/(N-j+1) if np.isfinite(N) else mu
    e_j = c/sqrt(d+j-1)
    eta_j =  ( (d*nu + S_j)/(d+j-1) \vee (m_j+e_j) ) \wedge u*(1-eps)
    
    Parameters
    ----------
    x : np.array
        input data       
    mu : float in (0, 1)
        hypothesized population mean
    eta : float in (t, 1)
        initial alternative hypothethesized value for the population mean
    c : positive float
        scale factor for allowing the estimated mean to approach t from above
    d : positive float
        relative weight of nu compared to an observation, in updating the alternative for each term
    '''
    S = np.insert(np.cumsum(x),0,0)[0:-1]  # 0, x_1, x_1+x_2, ...,  
    j = np.arange(1,len(x)+1)              # 1, 2, 3, ..., len(x)
    m = (N*mu-S)/(N-j+1) if np.isfinite(N) else mu   # mean of population after (j-1)st draw, if null is true 
    return np.minimum(u*(1-np.finfo(float).eps), np.maximum((d*nu+S)/(d+j-1),m+c/np.sqrt(d+j-1)))

In [6]:
def test_shrink_trunc():
    epsj = lambda c, d, j: c/math.sqrt(d+j-1)
    Sj = lambda x, j: 0 if j==1 else np.sum(x[0:j-1])
    muj = lambda N, mu, x, j: (N*mu - Sj(x, j))/(N-j+1) if np.isfinite(N) else mu
    nus = [.51, .55, .6]
    mu = 1/2
    u = 1
    d = 10
    vrand =  sp.stats.bernoulli.rvs(1/2, size=20)
    v = [
        np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]),
        np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
        vrand
    ]
    for nu in nus:
        c = (nu-mu)/2
        for x in v:
            N = len(x)
            xinf = shrink_trunc(x, np.inf, mu, nu, c=c, d=d)
            xfin = shrink_trunc(x, len(x), mu, nu, c=c, d=d)
            yinf = np.zeros(len(x))
            yfin = np.zeros(len(x))
            for j in range(1,len(x)+1):
                est = (d*nu + Sj(x,j))/(d+j-1)
                most = u*(1-np.finfo(float).eps)
                yinf[j-1] = np.minimum(np.maximum(mu+epsj(c,d,j), est), most)
                yfin[j-1] = np.minimum(np.maximum(muj(N,mu,x,j)+epsj(c,d,j), est), most)
            np.testing.assert_allclose(xinf, yinf)    
            np.testing.assert_allclose(xfin, yfin)    
    
test_shrink_trunc()

In [7]:
def alpha_mart(x: np.array, N: int, mu: float=1/2, eta: float=1-np.finfo(float).eps, u: float=1, \
               estim: callable=shrink_trunc) -> np.array :
    '''
    Finds the ALPHA martingale for the hypothesis that the population 
    mean is less than or equal to t using a martingale method,
    for a population of size N, based on a series of draws x.
    
    The draws must be in random order, or the sequence is not a martingale under the null
    
    If N is finite, assumes the sample is drawn without replacement
    If N is infinite, assumes the sample is with replacement

    Parameters
    ----------
    x : list corresponding to the data
    N : int
        population size for sampling without replacement, or np.infinity for sampling with replacement
    mu : float in (0,1)
        hypothesized fraction of ones in the population
    eta : float in (t,1) 
        alternative hypothesized population mean
    estim : callable
        estim(x, N, mu, eta, u) -> np.array of length len(x), the sequence of values of eta_j for ALPHA
               
    Returns
    -------   
    terms : array
        sequence of terms that would be a nonnegative martingale under the null
    '''
    S = np.insert(np.cumsum(x),0,0)[0:-1]  # 0, x_1, x_1+x_2, ...,  
    j = np.arange(1,len(x)+1)              # 1, 2, 3, ..., len(x)
    m = (N*mu-S)/(N-j+1) if np.isfinite(N) else mu   # mean of population after (j-1)st draw, if null is true 
    etaj = estim(x, N, mu, eta, u) 
    with np.errstate(divide='ignore',invalid='ignore'):
        terms = np.cumprod((x*etaj/m + (1-x)*(u-etaj)/(u-m))/u)
    terms[m<0] = np.inf
    return terms

# Sampling with replacement

In [None]:
# Calculations for Table 1: sampling with replacement

reps = int(10**3)
max_size = int(10**7)

thetal = [0.505, 0.51, 0.52, 0.53, 0.54, 0.55, 0.6, 0.65, 0.7]
etal = thetal
alpha = 0.05
mu = 1/2

c_base=0.5
dl=[10, 100, 500, 1000]

results_a = {}
results_w = {}
for theta in thetal:
    results_a[theta] = {}
    results_w[theta] = {}
    for eta in etal:
        results_a[theta][eta] = {}
        results_w[theta][eta] = 0
        for d in dl:
            results_a[theta][eta][d] = 0
    for i in range(reps):
        x = bernoulli.rvs(theta, size=max_size)
        for eta in etal:          
            c = c_base*(eta-1/2)
            for d in dl:
                mart = alpha_mart(x, np.inf, mu=1/2, eta=eta, u=1, \
                                 estim=lambda x, N, mu, eta, u: shrink_trunc(x,N,mu,eta,1,c=c,d=d))
                t = np.argmax(mart >= 1/alpha)
                results_a[theta][eta][d] += t if t>0 else np.inf           
            mart = np.cumprod(x*eta/mu+(1-x)*(1-eta)/(1-mu))
            t = np.argmax(mart >= 1/alpha)
            results_w[theta][eta] += t if t>0 else np.inf 
    for eta in etal:
        results_w[theta][eta] /= reps
        print(f'\n{theta=} {eta=} {results_w[theta][eta]=:.1f}')
        for d in dl:
            results_a[theta][eta][d] /= reps
        print(f'{[f"{d}: {results_a[theta][eta][d]:.1f}" for d in dl]}')

In [None]:
for theta in thetal:
    print('\\hline')
    for eta in etal:
            print(f'{theta} & {eta} &'
                  f'{[(round(results_a[theta][eta][d]) if np.isfinite(results_a[theta][eta][d]) else "---" ) for d in dl]}'
                  f' & {round(results_w[theta][eta]) if np.isfinite(results_w[theta][eta]) else "---"} \\\\')

# Sampling without replacement, no invalid votes

In [None]:
# Calculations for Table 2, sampling without replacement
# N, sam_max, theta, and eta were chosen to allow comparison to Huang et al. 20

reps = int(10**5)

thetal = [.505, .51, .52, .55, .6, .64, .7]
N = 20000
sam_max = 2000
etal = [.51, .55, .7]  # used for BRAVO in Huang et al 2020
alpha = 0.05

# for ALPHA
c_base=0.5
dl= [10, 100, 500, 1000]

# for RiLACS
D = 10
beta = 1

resl = ['rej_sam','not_rej_sam','rej_N','not_rej_N']

results_a = {}
results_apa = {}
results_apk = {}
results_sqk = {}

for theta in thetal:
    print(f'{theta=}')
    n_A = int(N*theta)
    n_B = N - n_A
    x = np.array([1]*n_A+[0]*n_B)
    results_a[theta] = {}
    results_apa[theta] = {}
    results_apk[theta] = {}
    results_sqk[theta] = {}
    for r in resl:
        results_sqk[theta][r] = 0
    for eta in etal:
        results_apk[theta][eta] = {}
        results_apa[theta][eta] = {}
        for r in resl:
            results_apk[theta][eta][r] = 0
            results_apa[theta][eta][r] = 0
        results_a[theta][eta]={} 
        for d in dl:
            results_a[theta][eta][d] = {}
            for r in resl:
                results_a[theta][eta][d][r] = 0
    for i in range(reps):
        np.random.shuffle(x)
        
        # sqKelly
        mart = sqKelly_martingale(x, m=1/2, N=N, D=D, beta=beta)
        # rejections by sam_max
        found = np.argmax(mart[0:sam_max] >= 1/alpha)
        results_sqk[theta]['rej_sam'] += found      
        results_sqk[theta]['not_rej_sam'] += (found==0)
        # rejections by N
        found = np.argmax(mart >= 1/alpha)
        results_sqk[theta]['rej_N'] += found
        results_sqk[theta]['not_rej_N'] += (found==0) # should not occur   
        
        # a priori Kelly and a priori SPRT
        for eta in etal:
            n_eta_A = int(N*eta)
            n_eta_B = N - n_eta_A
            c = c_base*(eta-1/2)
            
            # a priori Kelly
            mart = apriori_Kelly_martingale(x, m=0.5, N=N, n_A=n_eta_A, n_B=n_eta_B)
            # rejections by sam_max 
            found = np.argmax(mart[0:sam_max] >= 1/alpha)
            results_apk[theta][eta]['rej_sam'] += found      
            results_apk[theta][eta]['not_rej_sam'] += (found==0)
            # rejections by N
            found = np.argmax(mart >= 1/alpha)
            results_apk[theta][eta]['rej_N'] += found
            results_apk[theta][eta]['not_rej_N'] += (found==0) # should not occur
            
            # a priori SPRT
            mart = sprt_mart(x, N, mu=1/2, eta=eta, u=1, random_order=True)
            # rejections by sam_max 
            found = np.argmax(mart[0:sam_max] >= 1/alpha)
            results_apa[theta][eta]['rej_sam'] += found      
            results_apa[theta][eta]['not_rej_sam'] += (found==0)
            # rejections by N
            found = np.argmax(mart >= 1/alpha)
            results_apa[theta][eta]['rej_N'] += found
            results_apa[theta][eta]['not_rej_N'] += (found==0) # should not occur        
            
            # ALPHA
            for d in dl:               
                mart = alpha_mart(x, N, mu=1/2, eta=eta, u=1, \
                                  estim=lambda x, N, mu, eta, u: shrink_trunc(x,N,mu,eta,1,c=c,d=d))
                # rejections by sam_max 
                found = np.argmax(mart[0:sam_max] >= 1/alpha)
                results_a[theta][eta][d]['rej_sam'] += found      
                results_a[theta][eta][d]['not_rej_sam'] += (found==0)
                # rejections by N
                found = np.argmax(mart >= 1/alpha)
                results_a[theta][eta][d]['rej_N'] += found
                results_a[theta][eta][d]['not_rej_N'] += (found==0) # should not occur        
                
for theta in thetal:
    results_sqk[theta]['rej_sam'] /= (reps-results_sqk[theta]['not_rej_sam'])
    results_sqk[theta]['not_rej_sam'] /= reps
    results_sqk[theta]['rej_N'] /= (reps-results_sqk[theta]['not_rej_N'])
    if results_sqk[theta]['not_rej_N'] > 0:
        print(f'sqKelly did not reject for {theta=}')  # sanity check
        
    for eta in etal:
        results_apk[theta][eta]['rej_sam'] /= (reps-results_apk[theta][eta]['not_rej_sam'])
        results_apk[theta][eta]['not_rej_sam'] /= reps
        results_apk[theta][eta]['rej_N'] /= (reps-results_apk[theta][eta]['not_rej_N'])
        if results_apk[theta][eta]['not_rej_N'] > 0:
            print(f'a priori Kelly did not reject for {theta=}, {eta=}')

        results_apa[theta][eta]['rej_sam'] /= (reps-results_apa[theta][eta]['not_rej_sam'])
        results_apa[theta][eta]['not_rej_sam'] /= reps
        results_apa[theta][eta]['rej_N'] /= (reps-results_apa[theta][eta]['not_rej_N'])
        if results_apa[theta][eta]['not_rej_N'] > 0:
            print(f'a priori ALPHA did not reject for {theta=}, {eta=}')
            
        for d in dl:
            results_a[theta][eta][d]['rej_sam'] /= (reps-results_a[theta][eta][d]['not_rej_sam'])
            results_a[theta][eta][d]['not_rej_sam'] /= reps
            results_a[theta][eta][d]['rej_N'] /= (reps-results_a[theta][eta][d]['not_rej_N'])
            if results_a[theta][eta][d]['not_rej_N'] > 0:
                print(f'ALPHA did not reject for {theta=}, {eta=}, {d=}')

In [None]:
# encode dicts and save as json

file_stems = ['results_sqk','results_apk','results_apa','results_a']
for fs in file_stems:
    with open(fs+'.json','w') as file:
        file.write(json.dumps(eval(fs), cls=NpEncoder, indent = 4))   

In [63]:
# test reading dicts from json
file_stems = ['results_sqk','results_apk','results_apa','results_a']
for fs in file_stems:
    with open(fs+'.json','r') as file:
        data = json.load(file)
        exec(fs + ' = data')

# NOTE: keys that were ints are now strings

In [78]:
# NOTE: keys that were ints are now strings
print('\\begin{tabular}{l|rrrrrrr|rrrrrrr|rrrrrrr}' +
      '& \\multicolumn{14}{|c|}{$n=2,000$} &  \multicolumn{7}{|c}{$n=N$} \\\\ \n' +
      'Method & \\multicolumn{7}{c}{power, $\\theta=$} & \\multicolumn{7}{c|}{mean sample size, $\\theta=$} ' +
      ' & \\multicolumn{7}{|c}{mean sample size, $\\theta=$ }\\\\ \n' +
      f'{" ".join((*["& .505 & .51 & .52 & .55 & .6 & .64 & .7 "]*3,))} \\\\ \n' +
      '\\hline')

print('sqKelly ' +
      f'''{" ".join([f"& {int(100*(1-results_sqk[str(theta)]['not_rej_sam']))} " for theta in thetal])} ''' +
      f'''& {" ".join([f"& {int(results_sqk[str(theta)]['rej_sam'])} " for theta in thetal])} ''' +
      f'''& {" ".join([f"& {int(results_sqk[str(theta)]['rej_N'])} " for theta in thetal])} \\\\ '''
      )
print('\\hline')
for eta in etal:
    print(f'''a priori Kelly $\\eta={eta}$ & ''' +
          f'''{" ".join([f"& {int(100*(1-results_apk[str(theta)][str(eta)]['not_rej_sam']))} " for theta in thetal])} ''' +
          f''' {" ".join([f"& {int(results_apk[str(theta)][str(eta)]['rej_sam'])} " for theta in thetal])} ''' +
          f''' {" ".join([f"& {int(results_apk[str(theta)][str(eta)]['rej_N'])} " for theta in thetal])} \\\\ '''
          )
print('\\hline')
for eta in etal:
    print(f'''SPRT $\\eta={eta}$ ''' +
          f'''{" ".join([f"& {int(100*(1-results_apa[str(theta)][str(eta)]['not_rej_sam']))} " for theta in thetal])} ''' +
          f''' {" ".join([f"& {int(results_apa[str(theta)][str(eta)]['rej_sam'])} " for theta in thetal])} ''' +
          f''' {" ".join([f"& {int(results_apa[str(theta)][str(eta)]['rej_N'])} " for theta in thetal])} \\\\  '''
          )
print('\\hline')
for eta in etal:
    for d in dl:
        print(f'''ALPHA $\\eta={eta}$ {d=} ''' +
              f'''{" ".join([f"& {int(100*(1-results_a[str(theta)][str(eta)][str(d)]['not_rej_sam']))} " for theta in thetal])} ''' +
              f'''{" ".join([f"& {int(results_a[str(theta)][str(eta)][str(d)]['rej_sam'])} " for theta in thetal])} ''' +
              f'''{" ".join([f"& {int(results_a[str(theta)][str(eta)][str(d)]['rej_N'])} " for theta in thetal])} \\\\ '''
          )        
print('\\end{tabular} \n')


\begin{tabular}{l|rrrrrrr|rrrrrrr|rrrrrrr}& \multicolumn{14}{|c|}{$n=2,000$} &  \multicolumn{7}{|c}{$n=N$} \\ 
Method & \multicolumn{7}{c}{power, $\theta=$} & \multicolumn{7}{c|}{mean sample size, $\theta=$}  & \multicolumn{7}{|c}{mean sample size, $\theta=$ }\\ 
& .505 & .51 & .52 & .55 & .6 & .64 & .7  & .505 & .51 & .52 & .55 & .6 & .64 & .7  & .505 & .51 & .52 & .55 & .6 & .64 & .7  \\ 
\hline
sqKelly & 8  & 14  & 37  & 98  & 100  & 100  & 100  & & 637  & 723  & 852  & 562  & 180  & 110  & 67  & & 17911  & 14269  & 4852  & 585  & 180  & 110  & 67  \\ 
\hline
a priori Kelly $\eta=0.51$ & & 0  & 0  & 3  & 84  & 100  & 100  & 100   & 1759  & 1748  & 1728  & 1469  & 773  & 547  & 380   & 13808  & 8374  & 4194  & 1590  & 773  & 547  & 380  \\ 
a priori Kelly $\eta=0.55$ & & 8  & 14  & 37  & 98  & 100  & 100  & 100   & 642  & 707  & 817  & 550  & 199  & 130  & 85   & 18065  & 14921  & 5457  & 576  & 199  & 130  & 85  \\ 
a priori Kelly $\eta=0.7$ & & 5  & 5  & 8  & 19  & 86  & 100  & 100

## Sampling without replacement with some non-votes

In [8]:
# Tables 3 and 4

reps = int(10**3)

thetal = [.51, .52, .55, .6, .7]
blanks = [.1, .25, .5, .75]
Nl = [10000, 100000, 500000]
etal = thetal  
alpha = 0.05

# for ALPHA
c_base=0.5
dl= [10, 100, 1000]

# for RiLACs
D = 10
beta = 1

resl = ['rej_N','not_rej_N']

results_a_b = {}
results_apa_b = {}
results_apk_b = {}
results_sqk_b = {}

for theta in thetal:
    print(f'{theta=}')
    results_a_b[theta] = {}
    results_apa_b[theta] = {}
    results_apk_b[theta] = {}
    results_sqk_b[theta] = {}
    for N in Nl:
        print(f'\t{N=}')
        results_a_b[theta][N] = {}
        results_apa_b[theta][N] = {}
        results_apk_b[theta][N] = {}
        results_sqk_b[theta][N] = {}
        for b in blanks:
            print(f'\t\tblanks={b}')
            results_a_b[theta][N][b] = {}
            results_apa_b[theta][N][b] = {}
            results_apk_b[theta][N][b] = {}
            results_sqk_b[theta][N][b] = {}
            non_blank = int(N*(1-b))
            n_A = int(non_blank*theta)
            n_B = non_blank - n_A
            x = np.array([1]*n_A+[0]*n_B+[0.5]*(N-non_blank))
            for r in resl:
                results_sqk_b[theta][N][b][r] = 0
            for eta in etal:
                results_apk_b[theta][N][b][eta] = {}
                results_apa_b[theta][N][b][eta] = {}
                for r in resl:
                    results_apk_b[theta][N][b][eta][r] = 0
                    results_apa_b[theta][N][b][eta][r] = 0
                results_a_b[theta][N][b][eta]={} 
                for d in dl:
                    results_a_b[theta][N][b][eta][d] = {}
                    for r in resl:
                        results_a_b[theta][N][b][eta][d][r] = 0
            for i in range(reps):
                np.random.shuffle(x)

                # sqKelly
                mart = sqKelly_martingale(x, m=1/2, N=N, D=D, beta=beta)
                # rejections by N
                found = np.argmax(mart >= 1/alpha)
                results_sqk_b[theta][N][b]['rej_N'] += found
                results_sqk_b[theta][N][b]['not_rej_N'] += (found==0) # should not occur   

                # a priori Kelly and a priori SPRT
                for eta in etal:
                    n_eta_A = int(non_blank*eta)
                    n_eta_B = non_blank - n_eta_A
                    eta_shangrla = (non_blank*eta + (N-non_blank)/2)/N
                    c = c_base*(eta-1/2)

                    # a priori Kelly
                    mart = apriori_Kelly_martingale(x, m=0.5, N=N, n_A=n_eta_A, n_B=n_eta_B)
                    # rejections by N
                    found = np.argmax(mart >= 1/alpha)
                    results_apk_b[theta][N][b][eta]['rej_N'] += found
                    results_apk_b[theta][N][b][eta]['not_rej_N'] += (found==0) # should not occur

                    # a priori SPRT
                    mart = sprt_mart(x, N, mu=1/2, eta=eta_shangrla, u=1, random_order=True)
                    # rejections by N
                    found = np.argmax(mart >= 1/alpha)
                    results_apa_b[theta][N][b][eta]['rej_N'] += found
                    results_apa_b[theta][N][b][eta]['not_rej_N'] += (found==0) # should not occur        

                    # ALPHA
                    for d in dl:               
                        mart = alpha_mart(x, N, mu=1/2, eta=eta_shangrla, u=1, \
                                          estim=lambda x, N, mu, eta, u: shrink_trunc(x,N,mu,eta,1,c=c,d=d))
                        # rejections by N
                        found = np.argmax(mart >= 1/alpha)
                        results_a_b[theta][N][b][eta][d]['rej_N'] += found
                        results_a_b[theta][N][b][eta][d]['not_rej_N'] += (found==0) # should not occur        

for theta in thetal:
    for N in Nl:
        for b in blanks:
            results_sqk_b[theta][N][b]['rej_N'] /= (reps-results_sqk_b[theta][N][b]['not_rej_N'])
            if results_sqk_b[theta][N][b]['not_rej_N'] > 0:
                print(f'sqKelly did not reject for {theta=}, {N=}, {b=}')  # sanity check

            for eta in etal:
                results_apk_b[theta][N][b][eta]['rej_N'] /= (reps-results_apk_b[theta][N][b][eta]['not_rej_N'])
                if results_apk_b[theta][N][b][eta]['not_rej_N'] > 0:
                    print(f'a priori Kelly did not reject for {theta=},{N=}, {b=}, {eta=}')

                results_apa_b[theta][N][b][eta]['rej_N'] /= (reps-results_apa_b[theta][N][b][eta]['not_rej_N'])
                if results_apa_b[theta][N][b][eta]['not_rej_N'] > 0:
                    print(f'a priori ALPHA did not reject for {theta=}, {N=}, {b=}, {eta=}')

                for d in dl:
                    results_a_b[theta][N][b][eta][d]['rej_N'] /= (reps-results_a_b[theta][N][b][eta][d]['not_rej_N'])
                    if results_a_b[theta][N][b][eta][d]['not_rej_N'] > 0:
                        print(f'ALPHA did not reject for {theta=}, {N=}, {b=}, {eta=}, {d=}')

theta=0.51
N=10000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=100000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=500000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
theta=0.52
N=10000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=100000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=500000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
theta=0.55
N=10000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=100000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=500000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
theta=0.6
N=10000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=100000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=500000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
theta=0.7
N=10000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=100000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75
N=500000
blanks=0.1
blanks=0.25
blanks=0.5
blanks=0.75


In [56]:
print('\\begin{tabular}{lll|rrrr|rrrr|rrrr} \n' +
      '& & & \\multicolumn{4}{|c|}{$N=$10,000, \\%blank} &  \\multicolumn{4}{|c|}{$N=$100,000 \\%blank} & \\multicolumn{4}{|c}{$N=$500,000 \\%blank} \\\\ \n' +
#      ' & & & \\multicolumn{4}{|c|}{fraction blank} & \\multicolumn{4}{|c|}{fraction blank} & \\multicolumn{4}{|c}{fraction blank} \\\\ \n' +
      f'$\\theta$ & Method & params {" ".join((*["& 10 & 25 & 50 & 75 "]*3,))} \\\\'
      )

for theta in thetal:
    print(f'''\\hline {theta} & sqKelly & {" ".join(([f"& {int(results_sqk_b[theta][N][b]['rej_N']) :.0f} " for N in Nl for b in blanks]))} \\\\''')
    for eta in etal:
        print('\\cline{2-15} &' + f''' apKelly & $\\eta=${eta} {" ".join(([f"& {int(results_apk_b[theta][N][b][eta]['rej_N']) :.0f} " for N in Nl for b in blanks]))} \\\\''')
        for d in dl:
            print(f'''& ALPHA & $\\eta=${eta} $d=${d} {" ".join(([f"& {int(results_a_b[theta][N][b][eta][d]['rej_N']) :.0f} " for N in Nl for b in blanks]))} \\\\''')
        print(f''' & ALPHA & $\\eta=${eta} $d=\infty$ {" ".join(([f"& {int(results_apa_b[theta][N][b][eta]['rej_N']) :.0f} " for N in Nl for b in blanks]))} \\\\''')
        
print('\\end{tabular} \n')

\begin{tabular}{lll|rrrr|rrrr|rrrr} 
& & & \multicolumn{4}{|c|}{$N=$10,000, \%blank} &  \multicolumn{4}{|c|}{$N=$100,000 \%blank} & \multicolumn{4}{|c}{$N=$500,000 \%blank} \\ 
$\theta$ & Method & params & 10 & 25 & 50 & 75  & 10 & 25 & 50 & 75  & 10 & 25 & 50 & 75  \\
\hline 0.51 & sqKelly & & 7271  & 7420  & 7568  & 8124  & 73038  & 71845  & 73009  & 72056  & 363029  & 357717  & 348645  & 359587  \\
\cline{2-15} & apKelly & $\eta=$0.51 & 6464  & 7092  & 8047  & 9500  & 13565  & 16001  & 21784  & 35756  & 16007  & 19459  & 27831  & 52930  \\
& ALPHA & $\eta=$0.51 $d=$10 & 6299  & 6742  & 7557  & 9131  & 19190  & 21973  & 30928  & 56423  & 24656  & 29591  & 45372  & 113200  \\
& ALPHA & $\eta=$0.51 $d=$100 & 6101  & 6636  & 7559  & 9142  & 17616  & 20921  & 30526  & 56335  & 22423  & 27878  & 44243  & 112691  \\
& ALPHA & $\eta=$0.51 $d=$1000 & 5882  & 6561  & 7572  & 9172  & 16318  & 19758  & 29917  & 56669  & 20239  & 25785  & 43134  & 112720  \\
 & ALPHA & $\eta=$0.51 $d=\infty$ & 5

In [9]:
file_stems = ['results_sqk_b','results_apk_b','results_apa_b','results_a_b']
for fs in file_stems:
    with open(fs+'.json','w') as file:
        file.write(json.dumps(eval(fs), cls=NpEncoder, indent = 4))   

In [42]:
best = {}
for theta in thetal:
    best[theta] = {}
    for N in Nl:
        best[theta][N] = {}
        for b in blanks:
            best[theta][N][b] = results_sqk_b[theta][N][b]['rej_N']
            for eta in etal:
                best[theta][N][b] = min([best[theta][N][b], results_apk_b[theta][N][b][eta]['rej_N'], 
                                         results_apa_b[theta][N][b][eta]['rej_N']])
                for d in dl:
                    best[theta][N][b] = min([best[theta][N][b], results_a_b[theta][N][b][eta][d]['rej_N']])
        

In [43]:
best

{0.51: {10000: {0.1: 5409.292, 0.25: 5919.96, 0.5: 6491.219, 0.75: 7923.86},
  100000: {0.1: 13565.794, 0.25: 16001.688, 0.5: 21381.092, 0.75: 33375.673},
  500000: {0.1: 16007.838, 0.25: 19459.365, 0.5: 27769.965, 0.75: 52930.246}},
 0.52: {10000: {0.1: 2749.961, 0.25: 3146.757, 0.5: 3740.16, 0.75: 5383.689},
  100000: {0.1: 3910.75, 0.25: 4720.113, 0.5: 6805.454, 0.75: 12355.561},
  500000: {0.1: 4086.6, 0.25: 4987.651, 0.5: 7670.988, 0.75: 14533.562}},
 0.55: {10000: {0.1: 632.371, 0.25: 718.052, 0.5: 1052.424, 0.75: 1838.313},
  100000: {0.1: 647.253, 0.25: 805.948, 0.5: 1153.444, 0.75: 2349.629},
  500000: {0.1: 664.028, 0.25: 850.411, 0.5: 1180.349, 0.75: 2353.366}},
 0.6: {10000: {0.1: 157.758, 0.25: 196.103, 0.5: 301.101, 0.75: 537.41},
  100000: {0.1: 167.667, 0.25: 206.127, 0.5: 303.217, 0.75: 583.019},
  500000: {0.1: 164.82, 0.25: 195.794, 0.5: 306.887, 0.75: 631.752}},
 0.7: {10000: {0.1: 41.963, 0.25: 51.383, 0.5: 76.067, 0.75: 144.29},
  100000: {0.1: 41.512, 0.25: 50.30

In [44]:
sqk_r = 1
apk_r = {}
a_r = {}
apa_r = {}
for eta in etal:
    apk_r[eta] = 1
    apa_r[eta] = 1
    a_r[eta] = {}
    for d in dl:
        a_r[eta][d]=1

items = 0
for theta in thetal:
    for N in Nl:
        for b in blanks:
            items += 1
            sqk_r *= results_sqk_b[theta][N][b]['rej_N']/best[theta][N][b]
            for eta in etal:
                apk_r[eta] *= results_apk_b[theta][N][b][eta]['rej_N']/best[theta][N][b]
                apa_r[eta] *= results_apa_b[theta][N][b][eta]['rej_N']/best[theta][N][b]
                for d in dl:
                    a_r[eta][d] *= results_a_b[theta][N][b][eta][d]['rej_N']/best[theta][N][b]

items

60

In [55]:
print('\\begin{tabular}{llr}\\\\ \nMethod & Parameters & Score \\\\')
print(f'\\hline SqKelly & & {sqk_r**(1/items) :0.2f} \\\\ \n \hline a priori Kelly ')
for eta in etal:
    print(f' & $\\eta=${eta} & {apk_r[eta]**(1/items) :0.2f} \\\\')
print('\hline ALPHA ')
for eta in etal:
    for d in dl:
        print(f' & $\\eta=${eta} $d=${d} & {a_r[eta][d]**(1/items) :0.2f} \\\\ ')
    print(f' & $\\eta=${eta} $d=\infty$ & {apa_r[eta]**(1/items) :0.2f} \\\\')
    print('\\cline{2-3}')

print('\\end{tabular}')

\begin{tabular}{llr}\\ 
Method & Parameters & Score \\
\hline SqKelly & & 1.89 \\ 
 \hline a priori Kelly 
 & $\eta=$0.51 & 2.91 \\
 & $\eta=$0.52 & 1.98 \\
 & $\eta=$0.55 & 2.14 \\
 & $\eta=$0.6 & 2.98 \\
 & $\eta=$0.7 & 7.48 \\
\hline ALPHA 
 & $\eta=$0.51 $d=$10 & 1.62 \\ 
 & $\eta=$0.51 $d=$100 & 1.77 \\ 
 & $\eta=$0.51 $d=$1000 & 2.29 \\ 
 & $\eta=$0.51 $d=\infty$ & 3.81 \\
\cline{2-3}
 & $\eta=$0.52 $d=$10 & 1.61 \\ 
 & $\eta=$0.52 $d=$100 & 1.73 \\ 
 & $\eta=$0.52 $d=$1000 & 2.08 \\ 
 & $\eta=$0.52 $d=\infty$ & 2.63 \\
\cline{2-3}
 & $\eta=$0.55 $d=$10 & 1.60 \\ 
 & $\eta=$0.55 $d=$100 & 1.63 \\ 
 & $\eta=$0.55 $d=$1000 & 1.71 \\ 
 & $\eta=$0.55 $d=\infty$ & 2.16 \\
\cline{2-3}
 & $\eta=$0.6 $d=$10 & 1.58 \\ 
 & $\eta=$0.6 $d=$100 & 1.54 \\ 
 & $\eta=$0.6 $d=$1000 & 1.60 \\ 
 & $\eta=$0.6 $d=\infty$ & 2.40 \\
\cline{2-3}
 & $\eta=$0.7 $d=$10 & 1.57 \\ 
 & $\eta=$0.7 $d=$100 & 1.57 \\ 
 & $\eta=$0.7 $d=$1000 & 1.99 \\ 
 & $\eta=$0.7 $d=\infty$ & 3.90 \\
\cline{2-3}
\end{tabular}
