In [1]:
import pandas as pd 
import numpy as np
import requests
from io import StringIO
from datetime import timedelta, datetime
from functions_evaluation import *
#ignore warnings
import warnings
warnings.filterwarnings('ignore')



In [2]:
start_date = datetime(2023, 9, 9)
github_repo = "cdcepi/FluSight-forecast-hub"
github_directory = "auxiliary-data/target-data-archive"
surveillance_file = "target-hospital-admissions_2024-04-27.csv"
horizon_to_start = 9

In [3]:
# Loading surveillance data for evaluation: last surveillance file for all states 
df_surv = loading_surveillance_eval(start_date, github_repo, github_directory, surveillance_file, horizon_to_start)
df_surv = df_surv[df_surv['location'] != 'US']
df_surv.head()

Unnamed: 0,date,location,location_name,hospitalizations,weekly_rate,horizon
1339,2023-11-04,17,Illinois,34,0.270634,9
1340,2023-11-04,18,Indiana,39,0.570848,9
1341,2023-11-04,20,Kansas,6,0.205729,9
1342,2023-11-04,21,Kentucky,21,0.46725,9
1338,2023-11-04,16,Idaho,5,0.258328,9


In [10]:
adaptive_ensemble2_path = "../output_data/adaptive_ensemble2"
original_ensembles_path = "../output_data/original_ensembles"
horizon_to_date = df_surv.set_index('horizon')['date'].to_dict()
k_values = [0.05, 0.15, 0.25, 0.50, 0.75]
loss_function = 'wmape'
dict_k_wis_rounds_states = {}
dict_k_mae_rounds_states = {}
alphas=[0.02, 0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90]

for state in df_surv['location'].unique():
    print('Processing state:', state)
    dict_k_wis_rounds = {}
    dict_k_mae_rounds = {}
    df_surv_state = df_surv[df_surv['location'] == state]
    surv_lookup = df_surv_state[['date', 'hospitalizations']].drop_duplicates()
    for k in k_values:
        dict_wis_rounds = {}
        dict_mae_rounds = {}
        unique_dates = df_surv_state['date'].sort_values().unique()[:-1]
        for date in unique_dates:
            # Adaptive Ensemble2
            full_path_adaptive_ensemble2 = f"{adaptive_ensemble2_path}/{date.strftime('%Y-%m-%d')}_{k}_{loss_function}_{state}.csv"
            df_adaptive_ensemble2 = import_ensemble2(full_path_adaptive_ensemble2, surv_lookup, horizon_to_date)
            wis_mean_adens2, wmape_adens2 = compute_dict_WIS_AE(df_adaptive_ensemble2, alphas)
            # Orginal Ensemble2
            full_path_original_ensemble2 = f"{original_ensembles_path}/Ensemble_Ens2_{state}.csv"
            df_original_ensemble2 = import_ensemble_original(full_path_original_ensemble2, df_surv_state, date)
            wis_mean_original_ens2, wmape_original_ens2 = compute_dict_WIS_AE(df_original_ensemble2, alphas)
            dict_wis_rounds[date] = [wis_mean_adens2,wis_mean_original_ens2]
            dict_mae_rounds[date] = [wmape_adens2, wmape_original_ens2]
        k_perc = int(k * 100)
        dict_k_wis_rounds[k_perc] = dict_wis_rounds
        dict_k_mae_rounds[k_perc] = dict_mae_rounds
    dict_k_wis_rounds_states[state] = dict_k_wis_rounds
    dict_k_mae_rounds_states[state] = dict_k_mae_rounds

Processing state: 17
Processing state: 18
Processing state: 20
Processing state: 21
Processing state: 16
Processing state: 25
Processing state: 23
Processing state: 26
Processing state: 27
Processing state: 22
Processing state: 24
Processing state: 02
Processing state: 13
Processing state: 12
Processing state: 10
Processing state: 11
Processing state: 09
Processing state: 08
Processing state: 06
Processing state: 04
Processing state: 05
Processing state: 01
Processing state: 29
Processing state: 15
Processing state: 30
Processing state: 19
Processing state: 28
Processing state: 56
Processing state: 54
Processing state: 55
Processing state: 53
Processing state: 50
Processing state: 51
Processing state: 49
Processing state: 48
Processing state: 47
Processing state: 46
Processing state: 45
Processing state: 37
Processing state: 72
Processing state: 38
Processing state: 31
Processing state: 33
Processing state: 44
Processing state: 35
Processing state: 34
Processing state: 36
Processing st

In [11]:
dict_k_mae_rounds_states

{'17': {5: {Timestamp('2023-11-04 00:00:00'): [0.6291555639222253,
    0.444804029954928],
   Timestamp('2023-11-11 00:00:00'): [0.6209928437892112, 0.4435989972863414],
   Timestamp('2023-11-18 00:00:00'): [0.6157608374255862, 0.44446758743188797],
   Timestamp('2023-11-25 00:00:00'): [0.6455740272736622, 0.4447906265280353],
   Timestamp('2023-12-02 00:00:00'): [0.6665453892738513, 0.44500984276461697],
   Timestamp('2023-12-09 00:00:00'): [0.707988778620866, 0.45216753696067796],
   Timestamp('2023-12-16 00:00:00'): [0.7367819633333813, 0.4549566490619685],
   Timestamp('2023-12-23 00:00:00'): [0.6843848801941063, 0.4512359898140885],
   Timestamp('2023-12-30 00:00:00'): [0.7972014198809878, 0.48786856459453787],
   Timestamp('2024-01-06 00:00:00'): [0.9431051962672181, 0.5501288174744476],
   Timestamp('2024-01-13 00:00:00'): [0.999854268958023, 0.587566658217979],
   Timestamp('2024-01-20 00:00:00'): [0.9377101936355843, 0.5800791647132373],
   Timestamp('2024-01-27 00:00:00'): [0

In [None]:
rows = []

for state, k_group_data in dict_k_wis_rounds_states.items():
    for k, group_data in k_group_data.items():
        for timestamp, values in group_data.items():
            rows.append({
                'state': state,
                'k_perc': k,
                'week': timestamp.strftime('%Y-%m-%d'),
                'wis_adaptive_ensemble2': values[0],
                'wis_original_ensemble2': values[1],
            })

# Create DataFrame
df_wis = pd.DataFrame(rows)
df_wis.to_csv('../output_data/performance_adaptive_ensemble/wis_adaptive_ensemble2_wmape.csv', index=False)

In [16]:
rows = []

for state, k_group_data in dict_k_mae_rounds_states.items():
    for k, group_data in k_group_data.items():
        for timestamp, values in group_data.items():
            rows.append({
                'state': state,
                'k_perc': k,
                'week': timestamp.strftime('%Y-%m-%d'),
                'mae_adaptive_ensemble2': values[0],
                'mae_original_ensemble2': values[1],
            })

# Create DataFrame
df_mae = pd.DataFrame(rows)
df_mae.to_csv('../output_data/performance_adaptive_ensemble/mae_adaptive_ensemble2_wmape.csv', index=False)