In [1]:
import pandas as pd 
import numpy as np
import requests
from io import StringIO
from datetime import timedelta, datetime
from functions_evaluation import *
#ignore warnings
import warnings
warnings.filterwarnings('ignore')



In [2]:
# Loading surveillance data for evaluation: last surveillance file
start_date = datetime(2023, 9, 9)
state = 'US'
github_repo = "cdcepi/FluSight-forecast-hub"
github_directory = "auxiliary-data/target-data-archive"
surveillance_file = "target-hospital-admissions_2024-04-27.csv"
horizon_to_start = 9
df_surv = loading_surveillance_eval(start_date, github_repo, github_directory, surveillance_file, state, horizon_to_start)
df_surv.to_csv("../../../input_data/target-hospital-admissions_2024-04-27.csv", index=False)
df_surv.head()

Unnamed: 0,date,location,location_name,hospitalizations,weekly_rate,horizon
1370,2023-11-04,US,US,1974,0.59422,9
1317,2023-11-11,US,US,2695,0.811258,10
1264,2023-11-18,US,US,3422,1.030102,11
1211,2023-11-25,US,US,4240,1.276339,12
1158,2023-12-02,US,US,5752,1.731487,13


In [3]:
adaptive_ensemble2_path = "../output_data/adaptive_ensemble2/"
original_ensembles_path = "../output_data/original_ensembles/"
horizon_to_date = df_surv.set_index('horizon')['date'].to_dict()
surv_lookup = df_surv[['date', 'hospitalizations']].drop_duplicates()
k_values = [0.05, 0.15, 0.25, 0.50, 0.75]
loss_function = 'rmse'
dict_k_wis_rounds = {}
dict_k_mae_rounds = {}
alphas=[0.02, 0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

for k in k_values:
    dict_wis_rounds = {}
    dict_mae_rounds = {}
    unique_dates = df_surv['date'].sort_values().unique()[:-1]
    for date in unique_dates:
        # Adaptive Ensemble2
        full_path_adaptive_ensemble2 = f"{adaptive_ensemble2_path}/{date.strftime('%Y-%m-%d')}_{k}_{loss_function}.csv"
        df_adaptive_ensemble2 = import_ensemble2(full_path_adaptive_ensemble2, surv_lookup, horizon_to_date)
        wis_list_adens2, wis_mean_adens2, wmape_adens2 = compute_dict_WIS_AE(df_adaptive_ensemble2, alphas)
        # Orginal Ensemble2
        full_path_original_ensemble2 = f"{original_ensembles_path}/Ensemble_Ens2_USnational.csv"
        df_original_ensemble2 = import_ensemble_original(full_path_original_ensemble2, df_surv, date)
        wis_list_original_ens2, wis_mean_original_ens2, wmape_original_ens2 = compute_dict_WIS_AE(df_original_ensemble2, alphas)
        # Original Ensemble - Scenario A
        full_path_original_ensembleA = f"{original_ensembles_path}/Ensemble_A_USnational.csv"
        df_original_ensembleA = import_ensemble_original(full_path_original_ensembleA, df_surv, date)
        wis_list_original_ensA, wis_mean_original_ensA, wmape_original_ensA = compute_dict_WIS_AE(df_original_ensembleA, alphas)
        # Original Ensemble - Scenario B
        full_path_original_ensembleB = f"{original_ensembles_path}/Ensemble_B_USnational.csv"
        df_original_ensembleB = import_ensemble_original(full_path_original_ensembleB, df_surv, date)
        wis_list_original_ensB, wis_mean_original_ensB, wmape_original_ensB = compute_dict_WIS_AE(df_original_ensembleB, alphas)
        # Original Ensemble - Scenario C
        full_path_original_ensembleC = f"{original_ensembles_path}/Ensemble_C_USnational.csv"
        df_original_ensembleC = import_ensemble_original(full_path_original_ensembleC, df_surv, date)
        wis_list_original_ensC, wis_mean_original_ensC, wmape_original_ensC = compute_dict_WIS_AE(df_original_ensembleC, alphas)
        # Original Ensemble - Scenario D
        full_path_original_ensembleD = f"{original_ensembles_path}/Ensemble_D_USnational.csv"
        df_original_ensembleD = import_ensemble_original(full_path_original_ensembleD, df_surv, date)
        wis_list_original_ensD, wis_mean_original_ensD, wmape_original_ensD = compute_dict_WIS_AE(df_original_ensembleD, alphas)
        # Original Ensemble - Scenario E
        full_path_original_ensembleE = f"{original_ensembles_path}/Ensemble_E_USnational.csv"
        df_original_ensembleE = import_ensemble_original(full_path_original_ensembleE, df_surv, date)
        wis_list_original_ensE, wis_mean_original_ensE, wmape_original_ensE = compute_dict_WIS_AE(df_original_ensembleE, alphas)
        # Original Ensemble - Scenario F
        full_path_original_ensembleF = f"{original_ensembles_path}/Ensemble_F_USnational.csv"
        df_original_ensembleF = import_ensemble_original(full_path_original_ensembleF, df_surv, date)
        wis_list_original_ensF, wis_mean_original_ensF, wmape_original_ensF = compute_dict_WIS_AE(df_original_ensembleF, alphas)
        # Store WIS and AE results
        dict_wis_rounds[date] = [wis_mean_adens2, wis_mean_original_ens2, wis_mean_original_ensA, wis_mean_original_ensB, wis_mean_original_ensC, wis_mean_original_ensD, wis_mean_original_ensE, wis_mean_original_ensF]
        dict_mae_rounds[date] = [wmape_adens2, wmape_original_ens2, wmape_original_ensA, wmape_original_ensB, wmape_original_ensC, wmape_original_ensD, wmape_original_ensE, wmape_original_ensF]
    k_perc = int(k * 100)
    dict_k_wis_rounds[k_perc] = dict_wis_rounds
    dict_k_mae_rounds[k_perc] = dict_mae_rounds
 

0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0.95
0.9
0.85
0.8
0.75
0.7
0.65
0.6
0.55
0.99
0.975
0

In [4]:
# Create df_wis dataframe to store WIS results
rows = []
for k, group_data in dict_k_wis_rounds.items():
    for timestamp, values in group_data.items():
        rows.append({
            'k_perc': k,
            'week': timestamp,
            'wis_adaptive_ensemble2': values[0],
            'wis_original_ensemble2': values[1],
            'wis_original_ensembleA': values[2],
            'wis_original_ensembleB': values[3],
            'wis_original_ensembleC': values[4],
            'wis_original_ensembleD': values[5],
            'wis_original_ensembleE': values[6],
            'wis_original_ensembleF': values[7],
            'wis_rel_original2': values[0] / values[1],
            'wis_rel_originalA': values[0] / values[2],
            'wis_rel_originalB': values[0] / values[3],
            'wis_rel_originalC': values[0] / values[4],
            'wis_rel_originalD': values[0] / values[5],
            'wis_rel_originalE': values[0] / values[6],
            'wis_rel_originalF': values[0] / values[7]
        })
df_wis = pd.DataFrame(rows)
df_wis.to_csv(f"../output_data/performance_adaptive_ensemble/wis_performance_{loss_function}.csv", index=False)

In [5]:
# Create df_mae dataframe to store MAE results
rows = []
for k, group_data in dict_k_mae_rounds.items():
    for timestamp, values in group_data.items():
        rows.append({
            'k_perc': k,
            'week': timestamp,
            'mae_adaptive_ensemble2': values[0],
            'mae_original_ensemble2': values[1],
            'mae_original_ensembleA': values[2],
            'mae_original_ensembleB': values[3],
            'mae_original_ensembleC': values[4],
            'mae_original_ensembleD': values[5],
            'mae_original_ensembleE': values[6],
            'mae_original_ensembleF': values[7],
            'mae_rel_original2': values[0] / values[1],
            'mae_rel_originalA': values[0] / values[2],
            'mae_rel_originalB': values[0] / values[3],
            'mae_rel_originalC': values[0] / values[4],
            'mae_rel_originalD': values[0] / values[5],
            'mae_rel_originalE': values[0] / values[6],
            'mae_rel_originalF': values[0] / values[7]
        })
df_mae = pd.DataFrame(rows)
df_mae.to_csv(f"../output_data/performance_adaptive_ensemble/mae_performance_{loss_function}.csv", index=False)