In [1]:
import sys
sys.path.insert(1, '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling') # set this to your directory

In [2]:
import numpy as np
from numpy import genfromtxt
import scipy.io
from test import recover_pool_results

In [3]:
# this is the membership matrix by Shental et al. 
# download the file from https://github.com/NoamShental/PBEST/blob/master/mFiles/poolingMatrix.mat 
matrix_file = scipy.io.loadmat('/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/shental-poolingMatrix.mat')
membership_matrix = matrix_file['poolingMatrix'] 

In [105]:
def compare_truth_and_estimates(membership_matrix, true_infection_vectors_file, fpr, fnr, f): 
    "get ground truth from true_infection_vectors_file and attempt to recover the ground truth." 
    xs = genfromtxt(true_infection_vectors_file, delimiter=',')
    pool_results = np.sign(np.matmul(membership_matrix, xs)) 
    recovered_xs, recovered_false_ps, recovered_false_ns = recover_pool_results(membership_matrix, pool_results, fpr, fnr, f) 
    
    print("=========================") 
    
    num_errors = (xs != recovered_xs).sum()
    num_fp = ((xs == 0) * (recovered_xs == 1)).sum() 
    num_fn = ((xs == 1) * (recovered_xs == 0)).sum() 
    print("%s errors: %s false positive(s), %s false negative(s)" % (num_errors, num_fp, num_fn))
    accuracy = (xs == recovered_xs).sum() / xs.size 
    print("accuracy: %.2f %%" % (accuracy * 100))
    return xs, recovered_xs, recovered_false_ps, recovered_false_ns

def check_ILP_optimality(xs, recovered_xs): 
    "This is only for noiseless data. For noisy measurement, need to include ||f|| and ||n|| in the objective."
    _, num_trials = xs.shape
    for trial in range(100):
        x, recovered_x = xs[:, trial], recovered_xs[:, trial] 
        num_errors = (x != recovered_x).sum()
        if num_errors != 0:
            print("||x|| = %s >= ||recovered_x||? %s" % (sum(x), sum(x) >= sum(recovered_x)))

# Test Result for f = 1/384

In [16]:
fpr, fnr, f = 0, 0, 1/384 
file1 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-1-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(file_1, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

Starting trail 0 ...
Starting trail 10 ...
Starting trail 20 ...
Starting trail 30 ...
Starting trail 40 ...
Starting trail 50 ...
Starting trail 60 ...
Starting trail 70 ...
Starting trail 80 ...
Starting trail 90 ...
0 errors: 0 false positive(s), 0 false negative(s)
accuracy: 100.00 %


# Test Result for f = 2/384

In [26]:
fpr, fnr, f = 0, 0, 2/384 
file2 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-2-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(file2, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

Starting trail 0 ...
Starting trail 10 ...
Starting trail 20 ...
Starting trail 30 ...
Starting trail 40 ...
Starting trail 50 ...
Starting trail 60 ...
Starting trail 70 ...
Starting trail 80 ...
Starting trail 90 ...
1 errors: 0 false positive(s), 1 false negative(s)
accuracy: 100.00 %
||x|| = 6.0 >= ||recovered_x||? True


# Test Result for f = 3/384

In [29]:
fpr, fnr, f = 0, 0, 3/384 
file3 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-3-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(file3, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

Starting trail 0 ...
Starting trail 10 ...
Starting trail 20 ...
Starting trail 30 ...
Starting trail 40 ...
Starting trail 50 ...
Starting trail 60 ...
Starting trail 70 ...
Starting trail 80 ...
Starting trail 90 ...
33 errors: 15 false positive(s), 18 false negative(s)
accuracy: 99.91 %
||x|| = 5.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 6.0 >= ||recovered_x||? True
||x|| = 8.0 >= ||recovered_x||? True
||x|| = 8.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True


In [31]:
recovered_false_ps.sum(), recovered_false_ns.sum() # should be zeros

(0.0, 0.0)

# Test Result for f = 4/384

In [32]:
fpr, fnr, f = 0, 0, 4/384 
file4 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-4-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(file4, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

Starting trail 0 ...
Starting trail 10 ...
Starting trail 20 ...
Starting trail 30 ...
Starting trail 40 ...
Starting trail 50 ...
Starting trail 60 ...
Starting trail 70 ...
Starting trail 80 ...
Starting trail 90 ...
151 errors: 65 false positive(s), 86 false negative(s)
accuracy: 99.61 %
||x|| = 8.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 10.0 >= ||recovered_x||? True
||x|| = 10.0 >= ||recovered_x||? True
||x|| = 6.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 8.0 >= ||recovered_x||? True
||x|| = 6.0 >= ||recovered_x||? True
||x|| = 10.0 >= ||recovered_x||? True
||x|| = 5.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 8.0 >= ||recovered_x||? True
||x|| = 11.0 >= ||recovered_x||? True
||x|| = 7.0 >= ||recovered_x||? True
||x|| = 9.0 >= ||recovered_x||? True


In [33]:
recovered_false_ps.sum(), recovered_false_ns.sum() # should be zeros

(0.0, 0.0)

# Test using Random Measurement Matrix
Randomly choose 48 nonzero entries for each row. 

In [110]:
random_membership_matrix = np.zeros(membership_matrix.shape)
num_pools, num_samples = random_membership_matrix.shape

In [111]:
for i in range(num_pools):
    indices = np.random.choice(num_samples, 48, replace=False)
    for index in indices:
        random_membership_matrix[i, index] = 1 

In [112]:
# how many times each individual is included in the test
random_membership_matrix.sum(0)

array([ 3.,  5.,  8.,  5.,  8.,  7.,  6.,  6.,  6.,  5.,  4.,  9.,  8.,
        4.,  8.,  6.,  9.,  4.,  4.,  5.,  3.,  9.,  4.,  9.,  1.,  4.,
        7.,  8.,  2.,  7., 10.,  7.,  4.,  7.,  4.,  7.,  5.,  7.,  7.,
        6.,  6.,  8.,  4.,  6.,  1.,  6.,  3.,  3.,  5.,  2.,  7.,  4.,
        6.,  6.,  5.,  8.,  9.,  9., 10.,  7.,  5.,  5., 10.,  6.,  8.,
        8.,  1.,  5.,  2.,  8.,  3.,  8.,  5.,  8.,  7.,  5.,  3.,  4.,
        6.,  7.,  5.,  3., 11.,  3.,  7.,  5.,  8.,  6.,  5.,  5.,  9.,
       16.,  6.,  6., 11.,  7.,  5.,  5.,  7.,  6.,  8.,  7.,  4.,  2.,
        6.,  7.,  6.,  3.,  7.,  8.,  6.,  8.,  7.,  2.,  7.,  1.,  7.,
        6.,  7.,  9.,  3.,  9.,  3.,  7.,  7.,  5.,  4.,  5.,  5.,  6.,
        8.,  8.,  6., 10.,  5.,  8.,  7.,  4.,  4.,  8.,  6.,  4., 10.,
        2.,  9.,  3.,  7.,  6.,  7.,  9.,  8.,  6.,  6.,  4.,  9.,  5.,
        7.,  6.,  3.,  7.,  3.,  9.,  4.,  4.,  8.,  8.,  8.,  2., 10.,
        9.,  5.,  8.,  5.,  7.,  5.,  5.,  7.,  7.,  8.,  6.,  7

# Test Result for f = 1/384

In [None]:
fpr, fnr, f = 0, 0, 1/384 
file1 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-1-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(random_membership_matrix, file_1, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

Starting trail 0 ...
Starting trail 10 ...
Starting trail 20 ...


# Test Result for f = 2/384

In [None]:
fpr, fnr, f = 0, 0, 2/384 
file2 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-2-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(random_membership_matrix, file2, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

# Test Result for f = 3/384

In [None]:
fpr, fnr, f = 0, 0, 3/384 
file3 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-3-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(random_membership_matrix, file3, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)

# Test Result for f = 4/384

In [None]:
fpr, fnr, f = 0, 0, 4/384 
file4 = '/Users/yiningliu/research/pooled-sampling/COVID-19-pooling/tests/data/x-p-4-384.csv' 
xs, recovered_xs, recovered_false_ps, recovered_false_ns = compare_truth_and_estimates(random_membership_matrix, file4, fpr, fnr, f)
check_ILP_optimality(xs, recovered_xs)