# Imports

In [1]:
import numpy as np
from scipy.special import comb
import pandas as pd
import mpmath
from tqdm.notebook import tqdm

# Utils

In [2]:
def combinations(n, r):
    return mpmath.fdiv(mpmath.factorial(n), mpmath.fmul(mpmath.factorial(r), mpmath.factorial(n-r)))

In [3]:
def get_max_k_high_precission(n, p, conf, MAX_SEARCH_SPACE = 1000):
    
    probs = []
    max_k = None
    
    for i in range(int(n)):
        prob = mpmath.fmul(combinations(n, i), mpmath.fmul(mpmath.power(p, i), mpmath.power(1-p, n-i)))
        probs.append(prob)
        final_prob = mpmath.fsum(probs)
        
        if final_prob < conf: # k is still small enough to satisfy condition
            max_k = i
        else:
            break
    
    return max_k

# Presets

In [4]:
hour_range = [0.5, 0.75, 1, 1.5, 2, 4, 8, 16, 32]
minutes_range = [30, 45, 60, 90, 120, 240, 480, 960, 1920]

n_slots_range = [m * 60 / 12 for m in minutes_range]
p_non_censoring_probability = np.arange(0.1, 1, 0.1)
confidence = [1e-03, 1e-06, 1e-09, 1e-12, 1e-24]

In [19]:
n_slots_range

[150.0, 225.0, 300.0, 450.0, 600.0, 1200.0, 2400.0, 4800.0, 9600.0]

In [20]:
p_non_censoring_probability

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [21]:
confidence

[0.001, 1e-06, 1e-09, 1e-12, 1e-24]

In [36]:
results = []

for i, n in enumerate(n_slots_range):
    print(f'calculate for {hour_range[i]} hours...')
    for p in tqdm(p_non_censoring_probability):
        for conf in confidence:
            max_k = get_max_k_high_precission(n, p, conf)
            results.append((hour_range[i], n, p, conf, max_k))
            
results_df = pd.DataFrame(results, columns = ['hours', 'n', 'p', 'conf', 'max_k'])
results_df

calculate for 0.5 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 0.75 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 1 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 1.5 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 2 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 4 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 8 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 16 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

calculate for 32 hours...


  0%|          | 0/9 [00:00<?, ?it/s]

Unnamed: 0,hours,n,p,conf,max_k
0,0.5,150.0,0.1,1.000000e-03,4.0
1,0.5,150.0,0.1,1.000000e-06,0.0
2,0.5,150.0,0.1,1.000000e-09,
3,0.5,150.0,0.1,1.000000e-12,
4,0.5,150.0,0.1,1.000000e-24,
...,...,...,...,...,...
400,32.0,9600.0,0.9,1.000000e-03,8547.0
401,32.0,9600.0,0.9,1.000000e-06,8496.0
402,32.0,9600.0,0.9,1.000000e-09,8458.0
403,32.0,9600.0,0.9,1.000000e-12,8426.0


In [41]:
results_df[results_df.hours == 2]

Unnamed: 0,hours,n,p,conf,max_k
180,2.0,600.0,0.1,0.001,38.0
181,2.0,600.0,0.1,1e-06,27.0
182,2.0,600.0,0.1,1e-09,20.0
183,2.0,600.0,0.1,1e-12,15.0
184,2.0,600.0,0.1,1e-24,2.0
185,2.0,600.0,0.2,0.001,90.0
186,2.0,600.0,0.2,1e-06,75.0
187,2.0,600.0,0.2,1e-09,64.0
188,2.0,600.0,0.2,1e-12,56.0
189,2.0,600.0,0.2,1e-24,32.0


# Presets 2

In [6]:
hour_range = [1, 2, 4, 6, 12, 24, 48, 120, 168]

n_slots_range = [h * 60 * 60 / 12 for h in hour_range]
p_non_censoring_probability = [0.1, 0.01, 0.001, 0.0001]
confidence = [1e-03, 1e-06, 1e-09]

In [7]:
results = []

for i, n in enumerate(n_slots_range):
    print(f'calculate for {hour_range[i]} hours...')
    for p in tqdm(p_non_censoring_probability):
        for conf in confidence:
            # max_k = get_max_k(n, p, conf)
            max_k = get_max_k_high_precission(n, p, conf)

            results.append((hour_range[i], n, p, conf, max_k))
            
results_df = pd.DataFrame(results, columns = ['hours', 'n', 'p', 'conf', 'max_k'])
results_df

calculate for 1 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 2 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 4 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 6 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 12 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 24 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 48 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 120 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

calculate for 168 hours...


  0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0,hours,n,p,conf,max_k
0,1,300.0,0.1000,1.000000e-03,14.0
1,1,300.0,0.1000,1.000000e-06,8.0
2,1,300.0,0.1000,1.000000e-09,3.0
3,1,300.0,0.0100,1.000000e-03,
4,1,300.0,0.0100,1.000000e-06,
...,...,...,...,...,...
103,168,50400.0,0.0010,1.000000e-06,20.0
104,168,50400.0,0.0010,1.000000e-09,13.0
105,168,50400.0,0.0001,1.000000e-03,
106,168,50400.0,0.0001,1.000000e-06,


In [9]:
results_df[results_df.hours == 48]

Unnamed: 0,hours,n,p,conf,max_k
72,48,14400.0,0.1,0.001,1329.0
73,48,14400.0,0.1,1e-06,1271.0
74,48,14400.0,0.1,1e-09,1228.0
75,48,14400.0,0.01,0.001,108.0
76,48,14400.0,0.01,1e-06,90.0
77,48,14400.0,0.01,1e-09,77.0
78,48,14400.0,0.001,0.001,3.0
79,48,14400.0,0.001,1e-06,0.0
80,48,14400.0,0.001,1e-09,
81,48,14400.0,0.0001,0.001,


In [10]:
results_df[results_df.hours == 168]

Unnamed: 0,hours,n,p,conf,max_k
96,168,50400.0,0.1,0.001,4832.0
97,168,50400.0,0.1,1e-06,4722.0
98,168,50400.0,0.1,1e-09,4640.0
99,168,50400.0,0.01,0.001,435.0
100,168,50400.0,0.01,1e-06,400.0
101,168,50400.0,0.01,1e-09,375.0
102,168,50400.0,0.001,0.001,29.0
103,168,50400.0,0.001,1e-06,20.0
104,168,50400.0,0.001,1e-09,13.0
105,168,50400.0,0.0001,0.001,


In [54]:
results_df.to_parquet('results_lower_p.parquet')

# Other

In [None]:
# mpmath.fmul(2, mpmath.fmul(2, 2))
# mpmath.power(3, 2)
# mpmath.fsum([3, 2])

In [None]:
def get_max_k_high_precission(n, p, conf, MAX_SEARCH_SPACE = 1000):
    
    probs = []
    max_k = None
    
    for i in range(MAX_SEARCH_SPACE):
        prob = mpmath.fmul(combinations(n, i), mpmath.fmul(mpmath.power(p, i), mpmath.power(1-p, n-i)))
        
        # prob = np.prod([comb(n, i), np.power(p, i), np.power(1-p, n-i)])
        # prob = comb(n, i) * np.power(p, i) * np.power(1-p, n-i)
        
        probs.append(prob)
        final_prob = mpmath.fsum(probs)
        
        if final_prob < conf: # k is still small enough to satisfy condition
            max_k = i
        else:
            break
    
    return max_k

In [None]:
def get_max_k(n, p, conf, MAX_SEARCH_SPACE = 1000):
    
    probs = []
    max_k = None
    
    for i in range(MAX_SEARCH_SPACE):
        prob = np.prod([comb(n, i), np.power(p, i), np.power(1-p, n-i)])
        probs.append(prob)
        final_prob = np.array(probs).sum()
        
        if final_prob < conf: # k is still small enough to satisfy condition
            max_k = i
        else:
            break
    
    return max_k

In [154]:
get_max_k(n=688, p=0.1, conf=1e-06)

34

In [155]:
get_max_k_high_precission(n=688, p=0.1, conf=1e-06)

34