In [16]:
import csv
import glob
import pickle
import numpy as np
import scipy as sp
from scipy.stats import ttest_ind

import matplotlib.pyplot as plt
%matplotlib inline

In [28]:
NN = 64.

In [29]:
metrics_selected = list(['death','cuminfection_0','cuminfection_1', 'cuminfection_2', 'cuminfection_12', 
                         'cuminfection_X', 'labresult_0', 'labresult_1', 'labresult_2', 'labresult_12',
                        'cumsuperinfection', 'colonization_0', 'colonization_1', 'colonization_2', 
                         'colonization_12', 'misempiric', 'tempempiric', 'def_drug_use_A', 'def_drug_use_B', 
                         'def_drug_use_C', 'def_drug_use_L', 'corr_drug_use_A', 'corr_drug_use_B', 
                         'corr_drug_use_C', 'corr_drug_use_L', 'mutation_1', 'mutation_2', 'mutation_12', 
                         'transmission_0', 'transmission_1', 'transmission_2', 'transmission_12'])

In [30]:
def cohen_d(x,y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return (np.mean(x) - np.mean(y)) / np.sqrt(((nx-1)*np.std(x, ddof=1) ** 2 + (ny-1)*np.std(y, ddof=1) ** 2) / dof)

In [31]:
def cohen_d_compute(v1, v2, metric):
    v1_metric_list = [value[metric][-1]/value['admission'][-1]*100 for key, value in v1.items()]
    v2_metric_list = [value[metric][-1]/value['admission'][-1]*100 for key, value in v2.items()]
    return round(cohen_d(v1_metric_list, v2_metric_list),3)

In [32]:
def cohen_d_compute_time(v1, v2, metric, n):
    v1_metric_list = [value[metric][n]/(NN+value['admission'][n])*100 for key, value in v1.items()]
    v2_metric_list = [value[metric][n]/(NN+value['admission'][n])*100 for key, value in v2.items()]
    return round(cohen_d(v1_metric_list, v2_metric_list),3)

In [33]:
def cohen_d_compute_iter(v1, v2, metric, n):
    v1_metric_list = [value[metric][-1]/(NN+value['admission'][-1])*100 for key, value in v1.items()]
    v2_metric_list = [value[metric][-1]/(NN+value['admission'][-1])*100 for key, value in v2.items()]
    return round(cohen_d(v1_metric_list[:n], v2_metric_list[:n]),3)

In [34]:
metrics2_selected = list(['realcuminfection_0','realcuminfection_1','realcuminfection_2','realcuminfection_12',
                         'misempirictotal','druguseB','druguseC','cuminfection'])

def cohen_d_metrics2 (v1, v2, n, metric):
    if metric == 'realcuminfection_0':
        v1_metric_list = [(value['labresult_0'][n]+value['transmission_0'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['labresult_0'][n]+value['transmission_0'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'realcuminfection_1':
        v1_metric_list = [(value['labresult_1'][n]+value['transmission_1'][n]+value['mutation_1'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['labresult_1'][n]+value['transmission_1'][n]+value['mutation_1'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'realcuminfection_2':
        v1_metric_list = [(value['labresult_2'][n]+value['transmission_2'][n]+value['mutation_2'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['labresult_2'][n]+value['transmission_2'][n]+value['mutation_2'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'realcuminfection_12':
        v1_metric_list = [(value['labresult_12'][n]+value['transmission_12'][n]+value['mutation_12'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['labresult_12'][n]+value['transmission_12'][n]+value['mutation_12'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'misempirictotal':
        v1_metric_list = [(value['misempiric'][n]+value['labresult_2'][n]+value['labresult_12'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['misempiric'][n]+value['labresult_2'][n]+value['labresult_12'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'druguseB':
        v1_metric_list = [(value['def_drug_use_B'][n]+value['corr_drug_use_B'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['def_drug_use_B'][n]+value['corr_drug_use_B'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'druguseC':
        v1_metric_list = [(value['def_drug_use_C'][n]+value['corr_drug_use_C'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['def_drug_use_C'][n]+value['corr_drug_use_C'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    elif metric == 'cuminfection':
        v1_metric_list = [(value['labresult_0'][n]+value['transmission_0'][n]+value['labresult_1'][n]+value['transmission_1'][n]+value['mutation_1'][n]
                          +value['labresult_2'][n]+value['transmission_2'][n]+value['mutation_2'][n]+value['labresult_12'][n]+value['transmission_12'][n]+value['mutation_12'][n])/(NN+value['admission'][n])*100 for key, value in v1.items()]
        v2_metric_list = [(value['labresult_0'][n]+value['transmission_0'][n]+value['labresult_1'][n]+value['transmission_1'][n]+value['mutation_1'][n]
                          +value['labresult_2'][n]+value['transmission_2'][n]+value['mutation_2'][n]+value['labresult_12'][n]+value['transmission_12'][n]+value['mutation_12'][n])/(NN+value['admission'][n])*100 for key, value in v2.items()]
    return round(cohen_d(v1_metric_list, v2_metric_list),3)

In [35]:
folder_path = "/Users/xxh31/Desktop/rand_expmts/p64h16/"

file_names = ['q=0.3_p&r=0.3', 'q=0.2_p&r=0.5', 'q=0.5_p&r=0.2', 'q=0.15_p&r=0.15', 
              'q=0.05_p&r=0.5', 'q=0.5_p&r=0.05']

dic_metrics2 = {}

for file_name in file_names:

    pairs = ('v1_' + file_name, 'v2_' + file_name)

    pkl_file1 = glob.glob(folder_path + pairs[0] + '/*.pkl')[0]

    with open(pkl_file1, "rb") as f:
        record_v1 = pickle.load(f)

    pkl_file2 = glob.glob(folder_path + pairs[1] + '/*.pkl')[0]

    with open(pkl_file2, "rb") as f:
        record_v2 = pickle.load(f)
        
    longlist = []
    for metric in metrics2_selected:
        clist = []
        for i in range (1,209):
            clist.append(cohen_d_metrics2(record_v1, record_v2, 7*i, metric))
        longlist.append(clist)
    dic_metrics2[file_name] = longlist

In [36]:
## save Cohen's D values about metrics2 as pickle file ##
import pickle
with open('metrics2_p64h16.pickle', 'wb') as handle:
    pickle.dump(dic_metrics2, handle, protocol=pickle.HIGHEST_PROTOCOL)

#with open('metrics2.pickle', 'rb') as handle:
#    dic_metrics2 = pickle.load(handle)

In [37]:
folder_path = "/Users/xxh31/Desktop/rand_expmts/p64h16/"

file_names = ['q=0.3_p&r=0.3', 'q=0.2_p&r=0.5', 'q=0.5_p&r=0.2', 'q=0.15_p&r=0.15', 
              'q=0.05_p&r=0.5', 'q=0.5_p&r=0.05']

dic_metrics1 = {}

for file_name in file_names:

    pairs = ('v1_' + file_name, 'v2_' + file_name)

    pkl_file1 = glob.glob(folder_path+pairs[0]+'/*.pkl')[0]

    with open(pkl_file1, "rb") as f:
        record_v1 = pickle.load(f)

    pkl_file2 = glob.glob(folder_path+pairs[1]+'/*.pkl')[0]

    with open(pkl_file2, "rb") as f:
        record_v2 = pickle.load(f)
    longlist = []
    for metric in metrics_selected:
        clist = []
        for i in range (1,209):
            clist.append(cohen_d_compute_time(record_v1, record_v2, metric, 7*i))
        longlist.append(clist)
    dic_metrics1[file_name] = longlist

  return (np.mean(x) - np.mean(y)) / np.sqrt(((nx-1)*np.std(x, ddof=1) ** 2 + (ny-1)*np.std(y, ddof=1) ** 2) / dof)


In [38]:
## save Cohen's D values about metrics1 as pickle file ##

with open('metrics1_p64h16.pickle', 'wb') as handle:
    pickle.dump(dic_metrics1, handle, protocol=pickle.HIGHEST_PROTOCOL)

#with open('metrics1.pickle', 'rb') as handle:
#    dic_metrics1 = pickle.load(handle)