# Correlation Fit

In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import model
from matplotlib import pyplot as plt
import pickle
%matplotlib inline

In [2]:
prep_data = model.data_and_network_prep()

In [3]:
data_for_fit_i = model.create_data_for_fit_influenza()
data_for_fit_v = model.create_data_for_fit(prep_data)

## Vaccination model

Load results

In [5]:
# Get list of files
file_names = os.listdir('../../Data/vaccination_model/model_results_updated')

# Merge lists
model_results_all = []
for file_name in file_names:
    with open(f'../../Data/vaccination_model/model_results_updated/{file_name}', 'rb') as pickle_in:
        cur_res = pickle.load(pickle_in)
    model_results_all.extend(cur_res)

In [6]:
# Get Results and likelihood lists 
model_results_list = [res['model_results'] for res in model_results_all]
model_res_likelihood_list = [res['likelihood'] for res in model_results_all]

# Get median realization
med = np.argsort(np.array(model_res_likelihood_list))[len(model_res_likelihood_list)//2]
median_model_results = model_results_list[med]

In [None]:
del model_results_all
del cur_res

Load results homogenous

In [4]:
# Get list of files
file_names = os.listdir('L:/Dor/Data/vaccination_model/model_results_homo/')

# Merge lists
model_results_all_homo = []
for file_name in file_names:
    with open(f'L:/Dor/Data/vaccination_model/model_results_homo/{file_name}', 'rb') as pickle_in:
        cur_res = pickle.load(pickle_in)
    model_results_all_homo.extend(cur_res)

# Get Results and likelihood lists 
model_results_list_homo = [res['model_results'] for res in model_results_all_homo][:100]
model_res_likelihood_list_homo = [res['likelihood'] for res in model_results_all_homo][:100]

# Get median realization
med_homo = np.argsort(np.array(model_res_likelihood_list_homo))[len(model_res_likelihood_list_homo)//2]
median_model_results_homo = model_results_list_homo[med_homo]

In [6]:
model_res_likelihood_list[med]

64045.919901715395

In [18]:
model_res_likelihood_list[med]

63988.87193888805

In [6]:
model_res_likelihood_list_homo[med_homo]

63932.15428404272

#### Correlation fit (not weighted)

In [5]:
print(f'correlation fit aggregated: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data):.2f}')
print(f'correlation fit by subdist: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist=True):.2f}')
print(f'correlation fit by subdist and age: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist_age=True):.2f}')

TypeError: unsupported format string passed to tuple.__format__

#### Correlation fit weighted

In [6]:
print(f'correlation fit aggregated: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, weighted=True):.2f}')
print(f'correlation fit by subdist: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist=True, weighted=True):.2f}')
print(f'correlation fit by subdist and age: {model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist_age=True, weighted=True):.2f}')

correlation fit aggregated: 0.95
correlation fit by subdist: 0.87
correlation fit by subdist and age: 0.85


#### Smoothed weighted

In [7]:
window = 2

vacc_agg = model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, weighted=True, smooth=True, window=window)
vacc_subdist = model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist=True, weighted=True, smooth=True, window=window)
vacc_subdist_age = model.calc_correlation_fit_vacc(median_model_results, data_for_fit_v, prep_data, by_subdist_age=True, weighted=True, smooth=True, window=window)
print(f'correlation fit aggregated: r:{vacc_agg[0]:.2f}, p:{vacc_agg[1]}')
print(f'correlation fit by subdist: r:{vacc_subdist[0]:.2f}, p:{vacc_subdist[1]}')
print(f'correlation fit by subdist and age: r:{vacc_subdist_age[0]:.2f}, p:{vacc_subdist_age[1]}')

correlation fit aggregated: r:0.99, p:1.0265768256487908e-20
correlation fit by subdist: r:0.94, p:5.668121216637671e-07
correlation fit by subdist and age: r:0.93, p:1.0646131414078742e-06


### Each season separately

#### Weighted

In [7]:
window = None

corr_agg_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, weighted=True)[0]
               for season in model.seasons]

corr_subdist_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist=True, weighted=True)[0]
                   for season in model.seasons]

corr_subdist_age_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist_age=True, weighted=True)[0]
                       for season in model.seasons]

# p-values
corr_agg_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, weighted=True)[1]
                 for season in model.seasons]

corr_subdist_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist=True, weighted=True)[1]
                     for season in model.seasons]

corr_subdist_age_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist_age=True, weighted=True)[1]
                         for season in model.seasons]

In [8]:
print(f'correlation fit aggregated: r:{np.array(corr_agg_ws).mean():.2f}, p: {np.array(corr_agg_ws_p).mean()}')
print(f'correlation fit by subdist: r:{np.array(corr_subdist_ws).mean():.2f}, p:{np.array(corr_subdist_ws_p).mean()}')
print(f'correlation fit by subdist and age: r:{np.array(corr_subdist_age_ws)[1:].mean():.2f}, p:{np.array(corr_subdist_age_ws_p).mean()}')

correlation fit aggregated: r:0.88, p: 1.160236615534211e-06
correlation fit by subdist: r:0.83, p:0.0001124992316616087
correlation fit by subdist and age: r:0.82, p:0.02649415505178619


In [9]:
corr_subdist_age_ws

[0.6105789770139545,
 0.8486032839154903,
 0.8506716979541122,
 0.8404012589252113,
 0.857485179576659,
 0.8412650442311231,
 0.6984111610279864]

#### Weighted smoothed

In [16]:
window = 4

corr_agg_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, weighted=True, smooth=True, window=window)[0]
               for season in model.seasons]

corr_subdist_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist=True, weighted=True, smooth=True, window=window)[0]
                   for season in model.seasons]

corr_subdist_age_ws = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist_age=True, weighted=True, smooth=True, window=window)[0]
                       for season in model.seasons]

# p-values
corr_agg_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, weighted=True, smooth=True, window=window)[1]
                 for season in model.seasons]

corr_subdist_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist=True, weighted=True, smooth=True, window=window)[1]
                     for season in model.seasons]

corr_subdist_age_ws_p = [model.calc_correlation_fit_vacc_separatly(median_model_results, prep_data, season, by_subdist_age=True, weighted=True, smooth=True, window=window)[1]
                         for season in model.seasons]

In [17]:
print(f'correlation fit aggregated: r:{np.array(corr_agg_ws).mean():.2f}, p: {np.array(corr_agg_ws_p).mean()}')
print(f'correlation fit by subdist: r:{np.array(corr_subdist_ws).mean():.2f}, p:{np.array(corr_subdist_ws_p).mean()}')
print(f'correlation fit by subdist and age: r:{np.array(corr_subdist_age_ws)[1:].mean():.2f}, p:{np.array(corr_subdist_age_ws_p).mean()}')

correlation fit aggregated: r:0.94, p: 1.2165779667655747e-09
correlation fit by subdist: r:0.91, p:1.2420837222653961e-05
correlation fit by subdist and age: r:0.90, p:0.023656754649046177


In [18]:
corr_subdist_age_ws

[0.6731072507498825,
 0.9268795470813409,
 0.9265029215233063,
 0.9107727764452294,
 0.92862865107923,
 0.9036553672766127,
 0.8225011665277207]

Homogenous model

In [10]:
window = 2

vacc_agg_h = model.calc_correlation_fit_vacc(median_model_results_homo, data_for_fit_v, prep_data, weighted=True, smooth=True, window=window)
vacc_subdist_h = model.calc_correlation_fit_vacc(median_model_results_homo, data_for_fit_v, prep_data, by_subdist=True, weighted=True, smooth=True, window=window)
vacc_subdist_age_h = model.calc_correlation_fit_vacc(median_model_results_homo, data_for_fit_v, prep_data, by_subdist_age=True, weighted=True, smooth=True, window=window)
print(f'correlation fit aggregated: r:{vacc_agg_h[0]:.2f}, p:{vacc_agg_h[1]}')
print(f'correlation fit by subdist: r:{vacc_subdist_h[0]:.2f}, p:{vacc_subdist_h[1]}')
print(f'correlation fit by subdist and age: r:{vacc_subdist_age_h[0]:.2f}, p:{vacc_subdist_age_h[1]}')

correlation fit aggregated: r:0.94, p:3.9604365046065027e-13
correlation fit by subdist: r:0.93, p:1.8068421029970448e-11
correlation fit by subdist and age: r:0.91, p:4.28535343677451e-08


## Coupled model

Load results

In [31]:
# Get list of files
# path = 'L:/Dor/Data/coupled_model/model_results'
# path = 'L:/Dor/Data/coupled_model/model_results_new'
# path = 'L:/Dor/Data/coupled_model/model_results_correction'
path = 'L:/Dor/Data/coupled_model/model_results_updated'
file_names = os.listdir(path)#[:-1]


# Merge lists
model_results_all_coupled = []
for file_name in file_names:
    with open(f'{path}/{file_name}', 'rb') as pickle_in:
        cur_res = pickle.load(pickle_in)
    model_results_all_coupled.extend(cur_res)
    
# Get Results list 
model_results_list_coupled = [res['model_results'] for res in model_results_all_coupled]

# Get likelihood list by season
likelihood_lists = {season: [] for season in model.seasons}
for res in model_results_all_coupled:
    for season, likelihood in res['likelihood_by_season'].items():
        likelihood_lists[season].append(likelihood)
        
# Get median realization
meds_coupled = {season: np.argsort(np.array(likelihood_lists[season]))[len(likelihood_lists[season])//2]
                for season in model.seasons}
median_model_results_coupled = {season: model_results_list_coupled[meds_coupled[season]][s]
                                for s, season in enumerate(model.seasons)}

In [32]:
[likelihood_lists[s][meds_coupled[s]] for s in model.seasons]

[24577.216317505354,
 5679.828209233844,
 19887.821383009505,
 13633.336895309827,
 12783.182267431304,
 22890.25836154576,
 10927.574317919176]

In [33]:
len(model_results_list_coupled)

200

In [None]:
{2011: 25680.30347994885,
 2012: 6802.081990843844,
 2013: 20692.573126474173,
 2014: 14330.364836244316,
 2015: 13368.259347393756,
 2016: 23606.825638159233,
 2017: 11182.21193947767}

In [None]:
# new
{2011: 25676.17355680652,
 2012: 6645.295124895659,
 2013: 20785.260055026094,
 2014: 14235.311724006167,
 2015: 13516.775762137417,
 2016: 23685.01692769133,
 2017: 11355.742885831502}

#### Correlation fit (not weighted)

In [34]:
corr_agg = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season)
            for season in model.seasons]

corr_subdist = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist=True)
                for season in model.seasons]

corr_subdist_age = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist_age=True)
                    for season in model.seasons]

In [35]:
print(f'correlation fit aggregated: {np.array(corr_agg).mean():.2f}')
print(f'correlation fit by subdist: {np.array(corr_subdist).mean():.2f}')
print(f'correlation fit by subdist and age: {np.array(corr_subdist_age).mean():.2f}')

correlation fit aggregated: 0.47
correlation fit by subdist: 0.40
correlation fit by subdist and age: 0.37


#### Correlation fit weighted

In [36]:
corr_agg_w = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, weighted=True)
              for season in model.seasons]

corr_subdist_w = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist=True, weighted=True)
                  for season in model.seasons]

corr_subdist_age_w = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist_age=True, weighted=True)
                      for season in model.seasons]

In [37]:
print(f'correlation fit aggregated: {np.array(corr_agg_w).mean():.2f}')
print(f'correlation fit by subdist: {np.array(corr_subdist_w).mean():.2f}')
print(f'correlation fit by subdist and age: {np.array(corr_subdist_age_w)[1:].mean():.2f}')

correlation fit aggregated: 0.47
correlation fit by subdist: 0.41
correlation fit by subdist and age: 0.38


#### Correlation fit weighted smoothed

In [38]:
window = 4

corr_agg_ws = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, weighted=True, smooth=True, window=window)[0]
               for season in model.seasons]

corr_subdist_ws = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist=True, weighted=True, smooth=True, window=window)[0]
                   for season in model.seasons]

corr_subdist_age_ws = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist_age=True, weighted=True, smooth=True, window=window)[0]
                       for season in model.seasons]

# p-values
corr_agg_ws_p = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, weighted=True, smooth=True, window=window)[1]
                 for season in model.seasons]

corr_subdist_ws_p = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist=True, weighted=True, smooth=True, window=window)[1]
                     for season in model.seasons]

corr_subdist_age_ws_p = [model.calc_correlation_fit_flu(median_model_results_coupled[season], data_for_fit_i, prep_data, season, by_subdist_age=True, weighted=True, smooth=True, window=window)[1]
                         for season in model.seasons]

In [39]:
print(f'correlation fit aggregated: r:{np.array(corr_agg_ws).mean():.2f}, p: {np.array(corr_agg_ws_p).mean()}')
print(f'correlation fit by subdist: r:{np.array(corr_subdist_ws).mean():.2f}, p:{np.array(corr_subdist_ws_p).mean()}')
print(f'correlation fit by subdist and age: r:{np.array(corr_subdist_age_ws)[1:].mean():.2f}, p:{np.array(corr_subdist_age_ws_p).mean()}')

correlation fit aggregated: r:0.95, p: 1.1129666046106655e-20
correlation fit by subdist: r:0.89, p:8.141195085086505e-08
correlation fit by subdist and age: r:0.87, p:1.5130479546940932e-06


In [13]:
print(f'correlation fit aggregated: r:{np.array(corr_agg_ws).mean():.2f}, p: {np.array(corr_agg_ws_p).mean()}')
print(f'correlation fit by subdist: r:{np.array(corr_subdist_ws).mean():.2f}, p:{np.array(corr_subdist_ws_p).mean()}')
print(f'correlation fit by subdist and age: r:{np.array(corr_subdist_age_ws)[1:].mean():.2f}, p:{np.array(corr_subdist_age_ws_p).mean()}')

correlation fit aggregated: r:0.93, p: 1.2252209142816829e-17
correlation fit by subdist: r:0.86, p:2.007248785997474e-05
correlation fit by subdist and age: r:0.83, p:0.0022957232196945195


In [40]:
corr_subdist_age_ws

[0.9435619006530725,
 0.8298471251849016,
 0.8896753510853048,
 0.8707612750912651,
 0.8454395321487713,
 0.8482506223224988,
 0.920433354795908]