In [1]:
import sys
sys.path.insert(1, '/content/drive/MyDrive/COVID-19-pooling') 

In [2]:
! pip install mip



In [3]:
import os
if not os.path.exists('./results'): 
    os.mkdir('./results')

In [4]:
from util import simulate_x
import numpy as np
import matplotlib.pyplot as plt
from optimal_sizes import minT
from membership_matrix import generate_doubly_regular_col
from test import test_M
from util import get_Ts 
import json 

In [5]:
num_trials = 100 

# Number of Pools Analysis
In this notebook, we will investigate the effect of the number of pools on the performance of pooling matrices. 

In [6]:
n = 1536
column_weight = 8

fnr = 0
fpr = 0 

fs = list(np.arange(0.01, 0.11, 0.01))

In [None]:
# get average accuracy 
Ts = get_Ts(n, column_weight) 

results = {} 

for f in fs: 
    # simulate data 
    average_accuracy = []
    simulate_x(n, f, num_trials=num_trials)
    
    # get average accuracy for different number of pools 
    for T in Ts: 
        print("Starting T=%s ..." % T)
        M = generate_doubly_regular_col((T, n), column_weight) 
        info = test_M(M, f, n, fpr, fnr, num_trials=num_trials)
        average_accuracy.append(np.average(info["accuracy"]))
    results[f] = average_accuracy

# save results 
with open("./results/num-pools-n%s.txt" % n, 'w') as outfile:
    json.dump(results, outfile) 

On average, 15.44 positives in each trail.
Starting T=12 ...
Finished trial 100
Starting T=16 ...
Finished trial 100
Starting T=24 ...
Finished trial 100
Starting T=32 ...
Finished trial 100
Starting T=48 ...
Finished trial 100
Starting T=64 ...
Finished trial 100
Starting T=96 ...
Finished trial 100
Starting T=128 ...
Finished trial 100
Starting T=192 ...
Finished trial 100
Starting T=256 ...
Finished trial 100
Starting T=384 ...
Finished trial 100
Starting T=512 ...
Finished trial 100
Starting T=768 ...
Finished trial 100
On average, 30.87 positives in each trail.
Starting T=12 ...
Finished trial 100
Starting T=16 ...
Finished trial 100
Starting T=24 ...
Finished trial 100
Starting T=32 ...


In [None]:
# plot 
fig, ax = plt.subplots() 
for f in fs: 
    average_accuracy = results[f]
    
    lowerbound = minT(f, n) 
    num_pool_index = len(Ts) - 1
    for i in range(len(Ts) - 1):
        if Ts[i] >= lowerbound:
            num_pool_index = i
            break 

    ax.plot(Ts, average_accuracy, label="f=%.0f%%"% (f * 100))
    ax.scatter(Ts[num_pool_index], average_accuracy[num_pool_index])

ax.legend()
plt.xlabel("Number of Pools")
plt.ylabel("Accuracy") 
plt.title("Effect of the Number of Pools")

plt.savefig("./results/num-pools-n%s-m%s.png" % (n, column_weight))
plt.show() 