Notebook to plot the results from the lag/lead experiments between WW and T&T or CIS.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import wastewater as ww

In [None]:
mae = dict()
mae_sem = dict()
mae_natural = dict()
mae_natural_sem = dict()
shifts = dict()

name = "xgb"

In [None]:
bucket = "s3://jbc-staging-data-wip/jbc-wip/01. Data/01. Raw/Waste Water/ww_users/mario/cis_results"

mae['tt'] = pd.read_csv(bucket+f"/tt_shifts_mae_{name}.csv")["0"]
mae_sem['tt'] = pd.read_csv(bucket+f"/tt_shifts_mae_sem_{name}.csv")["0"]
mae_natural['tt'] = pd.read_csv(bucket+f"/tt_shifts_mae_natural_{name}.csv")["0"]
mae_natural_sem['tt'] = pd.read_csv(bucket+f"/tt_shifts_mae_sem_natural_{name}.csv")["0"]
shifts['tt'] = pd.read_csv(bucket+f"/tt_shifts_{name}.csv")["0"]

In [None]:
bucket = "s3://jbc-staging-data-wip/jbc-wip/01. Data/01. Raw/Waste Water/ww_users/mario/cis_results"

mae['cis'] = pd.read_csv(bucket+f"/cis_shifts_mae_{name}.csv")["0"]
mae_sem['cis'] = pd.read_csv(bucket+f"/cis_shifts_mae_sem_{name}.csv")["0"]
mae_natural['cis'] = pd.read_csv(bucket+f"/cis_shifts_mae_natural_{name}.csv")["0"]
mae_natural_sem['cis'] = pd.read_csv(bucket+f"/cis_shifts_mae_sem_natural_{name}.csv")["0"]
shifts['cis'] = pd.read_csv(bucket+f"/cis_shifts_{name}.csv")["0"]

In [None]:
color = {}
color['tt'] = 'orange'
color['cis'] = 'blue'
window = 5

plot_range = (-11, 20)
range_cis = np.arange(len(shifts['cis']))#[:-4]
range_tt = np.arange(len(shifts['tt']))
range_cis = range_cis[(shifts['cis'] > plot_range[0]) & (shifts['cis'] < plot_range[1])]
range_tt = range_tt[(shifts['tt'] > plot_range[0]) & (shifts['tt'] < plot_range[1])]

#### Lag analysis in natural space

In [None]:
window = 5

f, ax = plt.subplots(2, 1, figsize=(7,6), sharex=True)

ax[0].scatter(shifts['cis'][range_cis],
              mae_natural['cis'][range_cis],
              color=color['cis'], alpha=0.7)

ax[0].errorbar(shifts['cis'][range_cis], 
               mae_natural['cis'][range_cis], 
               2 * mae_natural_sem['cis'][range_cis],
               fmt='o',
               label='bootstrap MAE', color=color['cis'], alpha=0.7)  #'Boostrap 95% CI'
ax[0].plot(shifts['cis'][range_cis],
           mae_natural['cis'][range_cis].rolling(window, center=True, min_periods=1).mean(),
           color=color['cis'],
           label=f'{window}-day rol. avg.'
          )
ax[0].vlines(0., 
             mae_natural['cis'][range_cis].min(), mae_natural['cis'][range_cis].max()
             , color='red', linestyle='dashed')

ax[0].set_title('a) Coronavirus Infection Survey', fontsize=14)
ax[0].set_ylabel('Mean Absolute Error \n (% infected)', fontsize=13)

ax[0].legend(fontsize=12, loc='lower right')

ax[1].scatter(shifts['tt'][range_tt],
              mae_natural['tt'][range_tt], 
              color=color['tt'], alpha=0.7)



ax[1].errorbar(shifts['tt'][range_tt], 
               mae_natural['tt'][range_tt],
               2 * mae_natural_sem['tt'][range_tt],
               fmt='o',
                label='bootstrap MAE', color=color['tt'], alpha=0.7)
ax[1].plot(shifts['tt'][range_tt],
           mae_natural['tt'].rolling(window, center=True, min_periods=1).mean()[range_tt], 
            color=color['tt'],
           label=f'{window}-day rol. avg.'
          )
ax[1].vlines(4, 10, 12.5, color='red', linestyle='dashed')
ax[1].set_ylabel('Mean Absolute Error \n (cases per 100k)', fontsize=13)  #'Boostrap 95% CI'
ax[1].set_title('b) T&T pillars 1&2', fontsize=14)
ax[1].legend(fontsize=12, loc='lower right')
ax[1].set_xlabel('Wastewater shift (days)', fontsize=14)

plt.tight_layout()

#### Lag analysis in Log10 space

In [None]:
f, ax = plt.subplots(2, 1, figsize=(7,6), sharex=True)

ax[0].scatter(shifts['tt'][range_tt],
              mae['tt'][range_tt], 
#            '-o',  
              color=color['tt'])



ax[0].errorbar(shifts['tt'][range_tt], 
               mae['tt'][range_tt],
               2 * mae_sem['tt'][range_tt],
               fmt='o',
                label='bootstrap mean', color=color['tt'])
ax[0].plot(shifts['tt'][range_tt],
           mae['tt'].rolling(window, center=True, min_periods=1).mean()[range_tt], 
            color=color['tt'],
           label=f'{window}-day rol. avg.'
          )
ax[0].set_ylabel('Mean Absolute Error \n (Log10 cases per 100k)', fontsize=13)  #'Boostrap 95% CI'
ax[0].set_title('T&T pillars 1&2', fontsize=14)


ax[1].scatter(shifts['cis'][range_cis],
              mae['cis'][range_cis],
#            '-o', 
#               label='mean',
              color=color['cis'])

ax[1].errorbar(shifts['cis'][range_cis], 
               mae['cis'][range_cis], 
               2 * mae_sem['cis'][range_cis],
               fmt='o',
               label='bootstrap mean', color=color['cis'])  #'Boostrap 95% CI'
ax[1].plot(shifts['cis'][range_cis],
           mae['cis'][range_cis].rolling(window, center=True, min_periods=1).mean(),
           color=color['cis'],
           label=f'{window}-day rol. avg.'
          )
ax[1].set_title('Coronavirus Infection Survey', fontsize=14)
ax[1].set_ylabel('Mean Absolute Error \n (Log10 % infected)', fontsize=13)

ax[1].set_xlabel('Wastewater shift (days)', fontsize=14)
ax[0].legend(fontsize=12)
ax[1].legend(fontsize=12, loc='lower right')

plt.tight_layout()