In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd
import csv

models = ['moderate','weak','strong']

Nbootstrap = 10
num_random_contexts = 1000


## CR9114

In [28]:
#### antibody choice ####
antibody = "9114"
#antibody = "6261" 

antigens = ["H1", "H3", "FluB"] 
L = 16



In [51]:
# read likelihoods from files and make dictionary
prob_dict = {}
for model in models:    
    with open('data/likelihoods_'+antibody+'_'+model+'.csv','r') as readfile:
        proba_reader = csv.reader(readfile)
        for row in proba_reader:
            key = tuple(row[:L])
            if 'mixed' in key:
                key = tuple(['average']*L)
            values = [float(x) for x in row[L:]]
            if model == 'strong':
                if 'average' in key:
                    mean = np.nan
                    sem = np.nan
                else:
                    mean = np.mean(values)
                    sem = np.std(values,ddof=1)
            else:
                mean = np.log(np.mean(values))
                sem = np.std(values,ddof=1)/np.mean(values)

            if key in prob_dict.keys():
                prob_dict[key].append(mean)
                prob_dict[key].append(sem)
            else:
                if key == tuple(['H1']*L): scenario_type = 'H1_only'
                elif key == tuple(['H3']*L): scenario_type = 'H3_only'
                elif key == tuple(['FluB']*L): scenario_type = 'FluB_only'
                elif 'average' in key: scenario_type = 'mixed_average'
                else: scenario_type = 'sequential'
                prob_dict[key] = [scenario_type,mean,sem]
                
        readfile.close()
        
    with open('data/likelihoods_'+antibody+'_'+model+'_random.csv','r') as readfile:  
        
        random_values = np.empty(num_random_contexts)
        random_errs = np.empty(num_random_contexts)
        
        proba_reader = csv.reader(readfile)
        j = 0
        
        for row in proba_reader:
            values = [float(x) for x in row[L:]]
            if model == 'strong':
                random_values[j] = np.mean(values)
                random_errs[j] = np.std(values,ddof=1)          
            else:
                random_values[j] = np.log(np.mean(values))
                random_errs[j] = np.std(values,ddof=1)/np.mean(values)
            j += 1
        readfile.close()

    mean = np.mean(random_values)
    sem = np.std(random_errs,ddof=1)
    
    key = tuple(['random']*L)
    if key in prob_dict:
        prob_dict[key].append(mean)
        prob_dict[key].append(sem)
    else:
        prob_dict[key] = ['mixed_random',mean,sem]
                
print(len(prob_dict))


725


In [52]:
# reformat as dataframe
prob_df = pd.DataFrame({'ScenarioType':[x[0] for x in prob_dict.values()],
    'Ag1':[x[0] for x in prob_dict.keys()],
    'Ag2':[x[1] for x in prob_dict.keys()],
    'Ag3':[x[2] for x in prob_dict.keys()],
    'Ag4':[x[3] for x in prob_dict.keys()],
    'Ag5':[x[4] for x in prob_dict.keys()],
    'Ag6':[x[5] for x in prob_dict.keys()],
    'Ag7':[x[6] for x in prob_dict.keys()],
    'Ag8':[x[7] for x in prob_dict.keys()],
    'Ag9':[x[8] for x in prob_dict.keys()],
    'Ag10':[x[9] for x in prob_dict.keys()],
    'Ag11':[x[10] for x in prob_dict.keys()],
    'Ag12':[x[11] for x in prob_dict.keys()],
    'Ag13':[x[12] for x in prob_dict.keys()],
    'Ag14':[x[13] for x in prob_dict.keys()],
    'Ag15':[x[14] for x in prob_dict.keys()],
    'Ag16':[x[15] for x in prob_dict.keys()],
    'MeanLogProb_Moderate':[x[1] for x in prob_dict.values()],
    'StdErrLogProb_Moderate':[x[2] for x in prob_dict.values()], 
    'MeanLogProb_Weak':[x[3] for x in prob_dict.values()],
    'StdErrLogProb_Weak':[x[4] for x in prob_dict.values()], 
    'MeanPaths_Strong':[x[5] for x in prob_dict.values()],
    'StdErrPaths_Strong':[x[6] for x in prob_dict.values()] 

})

prob_df = prob_df.sort_values(by=['MeanLogProb_Moderate'],ascending=False)

prob_df.to_csv('data/9114_scenario_probabilities.csv',index=False)

prob_df

Unnamed: 0,ScenarioType,Ag1,Ag2,Ag3,Ag4,Ag5,Ag6,Ag7,Ag8,Ag9,...,Ag13,Ag14,Ag15,Ag16,MeanLogProb_Moderate,StdErrLogProb_Moderate,MeanLogProb_Weak,StdErrLogProb_Weak,MeanPaths_Strong,StdErrPaths_Strong
437,sequential,H1,H1,H1,H1,H1,H3,H3,H3,H3,...,H3,FluB,FluB,FluB,-12.456035,1.037490,-10.180258,0.273735,12021371.3,9.013946e+06
554,sequential,H1,H1,H1,H1,H1,H1,H3,H3,H3,...,H3,FluB,FluB,FluB,-12.663612,1.145840,-10.163164,0.279486,21052293.6,1.470031e+07
476,sequential,H1,H1,H1,H1,H1,H3,H3,H3,H3,...,FluB,FluB,FluB,FluB,-12.861887,0.777849,-10.622306,0.272821,3947722.4,2.314285e+06
205,sequential,H1,H1,H1,H1,H1,H1,H3,H3,H3,...,FluB,FluB,FluB,FluB,-13.235156,0.947050,-10.628184,0.278974,6170172.1,3.376908e+06
716,sequential,H1,H1,H1,H1,H3,H3,H3,H3,H3,...,H3,FluB,FluB,FluB,-13.425262,0.999357,-10.284436,0.267540,3652536.5,2.941756e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,sequential,H3,H3,H3,H3,FluB,FluB,FluB,FluB,FluB,...,FluB,FluB,FluB,FluB,-62.866281,2.301367,-15.257277,0.104440,0.0,0.000000e+00
192,FluB_only,FluB,FluB,FluB,FluB,FluB,FluB,FluB,FluB,FluB,...,FluB,FluB,FluB,FluB,-62.866281,2.301367,-15.257277,0.104440,0.0,0.000000e+00
280,sequential,H3,H3,FluB,FluB,FluB,FluB,FluB,FluB,FluB,...,FluB,FluB,FluB,FluB,-62.866281,2.301367,-15.257277,0.104440,0.0,0.000000e+00
83,sequential,H3,H3,H3,FluB,FluB,FluB,FluB,FluB,FluB,...,FluB,FluB,FluB,FluB,-62.866281,2.301367,-15.257277,0.104440,0.0,0.000000e+00


## CR6261 

In [53]:
#### 6261 ####
antibody = "6261"

antigens = ["H1", "H9"]
L = 11



In [54]:
# read likelihoods from files and make dictionary
prob_dict = {}
for model in models:    
    with open('data/likelihoods_'+antibody+'_'+model+'.csv','r') as readfile:
        proba_reader = csv.reader(readfile)
        for row in proba_reader:
            key = tuple(row[:L])
            if 'mixed' in key:
                key = tuple(['average']*L)
            values = [float(x) for x in row[L:]]
            if model == 'strong':
                if 'average' in key:
                    mean = np.nan
                    sem = np.nan
                else:
                    mean = np.mean(values)
                    sem = np.std(values,ddof=1)
            else:
                mean = np.log(np.mean(values))
                sem = np.std(values,ddof=1)/np.mean(values)

            if key in prob_dict.keys():
                prob_dict[key].append(mean)
                prob_dict[key].append(sem)
            else:
                if key == tuple(['H1']*L): scenario_type = 'H1_only'
                elif key == tuple(['H9']*L): scenario_type = 'H9_only'
                elif 'average' in key: scenario_type = 'mixed_average'
                else: scenario_type = 'sequential'
                prob_dict[key] = [scenario_type,mean,sem]
                
        readfile.close()
        
    with open('data/likelihoods_'+antibody+'_'+model+'_random.csv','r') as readfile:  
        
        random_values = np.empty(num_random_contexts)
        random_errs = np.empty(num_random_contexts)
        
        proba_reader = csv.reader(readfile)
        j = 0
        
        for row in proba_reader:
            values = [float(x) for x in row[L:]]
            if model == 'strong':
                random_values[j] = np.mean(values)
                random_errs[j] = np.std(values,ddof=1)          
            else:
                random_values[j] = np.log(np.mean(values))
                random_errs[j] = np.std(values,ddof=1)/np.mean(values)
            j += 1
        readfile.close()

    mean = np.mean(random_values)
    sem = np.std(random_errs,ddof=1)
    
    key = tuple(['random']*L)
    if key in prob_dict:
        prob_dict[key].append(mean)
        prob_dict[key].append(sem)
    else:
        prob_dict[key] = ['mixed_random',mean,sem]
                
print(len(prob_dict))

24


In [55]:
# reformat as dataframe
prob_df = pd.DataFrame({'ScenarioType':[x[0] for x in prob_dict.values()],
    'Ag1':[x[0] for x in prob_dict.keys()],
    'Ag2':[x[1] for x in prob_dict.keys()],
    'Ag3':[x[2] for x in prob_dict.keys()],
    'Ag4':[x[3] for x in prob_dict.keys()],
    'Ag5':[x[4] for x in prob_dict.keys()],
    'Ag6':[x[5] for x in prob_dict.keys()],
    'Ag7':[x[6] for x in prob_dict.keys()],
    'Ag8':[x[7] for x in prob_dict.keys()],
    'Ag9':[x[8] for x in prob_dict.keys()],
    'Ag10':[x[9] for x in prob_dict.keys()],
    'Ag11':[x[10] for x in prob_dict.keys()],
    'MeanLogProb_Moderate':[x[1] for x in prob_dict.values()],
    'StdErrLogProb_Moderate':[x[2] for x in prob_dict.values()], 
    'MeanLogProb_Weak':[x[3] for x in prob_dict.values()],
    'StdErrLogProb_Weak':[x[4] for x in prob_dict.values()], 
    'MeanPaths_Strong':[x[5] for x in prob_dict.values()],
    'StdErrPaths_Strong':[x[6] for x in prob_dict.values()] 

})

prob_df = prob_df.sort_values(by=['MeanLogProb_Moderate'],ascending=False)

prob_df.to_csv('data/6261_scenario_probabilities.csv',index=False)

prob_df

Unnamed: 0,ScenarioType,Ag1,Ag2,Ag3,Ag4,Ag5,Ag6,Ag7,Ag8,Ag9,Ag10,Ag11,MeanLogProb_Moderate,StdErrLogProb_Moderate,MeanLogProb_Weak,StdErrLogProb_Weak,MeanPaths_Strong,StdErrPaths_Strong
15,sequential,H1,H1,H1,H1,H9,H9,H9,H9,H9,H9,H9,-7.075328,0.372122,-7.884102,0.101984,135191.7,41300.037043
1,sequential,H1,H1,H1,H9,H9,H9,H9,H9,H9,H9,H9,-7.326466,0.408493,-7.940729,0.075671,107705.4,35781.425079
22,sequential,H1,H1,H1,H1,H1,H9,H9,H9,H9,H9,H9,-7.785895,0.558482,-7.931012,0.093875,162012.5,42229.556883
21,sequential,H1,H1,H1,H1,H1,H1,H9,H9,H9,H9,H9,-7.976453,0.437457,-8.065697,0.069256,175671.8,40319.878392
0,sequential,H1,H1,H1,H1,H1,H1,H1,H9,H9,H9,H9,-8.56322,0.632307,-8.320703,0.079079,181928.8,39160.899703
18,sequential,H1,H1,H1,H1,H1,H1,H1,H1,H9,H9,H9,-9.737674,0.401018,-8.688664,0.062854,186372.7,35028.119285
13,sequential,H1,H1,H9,H9,H9,H9,H9,H9,H9,H9,H9,-9.97433,0.784462,-8.509556,0.059543,60248.9,22570.026714
20,mixed_average,average,average,average,average,average,average,average,average,average,average,average,-10.924602,0.342697,-9.307377,0.035549,,
10,sequential,H1,H1,H1,H1,H1,H1,H1,H1,H1,H9,H9,-11.082049,0.373252,-9.187944,0.042897,188468.9,47198.609119
23,mixed_random,random,random,random,random,random,random,random,random,random,random,random,-11.637744,0.177345,-9.020336,0.023864,118053.6977,11285.407949
