In [1]:
import os.path
import os
import re
import pandas
import numpy as np

First we need to locate all relevant files

In [10]:
input_path = '../sw/output'

# Define the regex pattern to match filenames
pattern = re.compile(r'^performance')

# Initialize an empty list to store matched files
matched_files = []

# Iterate through files in the directory
for filename in os.listdir(input_path):
    # Check if the filename matches the regex pattern
    if pattern.search(filename):
        # If it matches, add to the list
        matched_files.append(filename)

In [11]:
print(len(matched_files))
print(matched_files[:5])

784
['performance-forecasting-Ensemble-onsetfixed-0-typedecline-2-flag1-1-method-0-dist-0-horizon-1-weight_type--1-weekly-mpox-cases-China-area-1-05-18-2023.csv', 'performance-forecasting-Ensemble-onsetfixed-0-typedecline-2-flag1-1-method-0-dist-0-horizon-1-weight_type--1-weekly-mpox-cases-China-area-1-05-25-2023.csv', 'performance-forecasting-Ensemble-onsetfixed-0-typedecline-2-flag1-1-method-0-dist-0-horizon-1-weight_type--1-weekly-mpox-cases-China-area-1-06-01-2023.csv', 'performance-forecasting-Ensemble-onsetfixed-0-typedecline-2-flag1-1-method-0-dist-0-horizon-1-weight_type--1-weekly-mpox-cases-China-area-1-06-08-2023.csv', 'performance-forecasting-Ensemble-onsetfixed-0-typedecline-2-flag1-1-method-0-dist-0-horizon-1-weight_type--1-weekly-mpox-cases-China-area-1-06-15-2023.csv']


Now we accumulate data from files

In [12]:

df = None
model_no_col = "mod_num"
model_prefix = "SW-"

for file in matched_files:
    split_fn = file.split(".")
    name, extension = split_fn[0], split_fn[-1]
    name_elems = name.split("-")
    model = name_elems[2]
    horizon = name_elems[14]
    location = name_elems[-6]
    date = "-".join(name_elems[-3:])
    data = pandas.read_csv(os.path.join(input_path, file))
    data["model"] = model_prefix + model
    data["horizon"] = horizon
    data["date"] = date
    data["location"] = location
    data.columns.values[0] = model_no_col
    if df is None:
        df = data
    else:
        df = pandas.concat([df, data], ignore_index=True)

In [5]:
df.head()

Unnamed: 0,mod_num,MAE,MSE,Coverage 95%PI,WIS,model,horizon,date,location,AICc,RelativeLikelihood
0,2,1.377633,1.897873,100.0,1.483023,SW-Ensemble,0,05-18-2023,China,,
1,3,1.862628,3.469383,100.0,1.609873,SW-Ensemble,0,05-18-2023,China,,
2,4,1.920491,3.688287,100.0,1.566943,SW-Ensemble,0,05-18-2023,China,,
3,2,2.498195,6.24098,100.0,1.602979,SW-Ensemble,0,05-25-2023,China,,
4,3,0.266357,0.070946,100.0,1.528381,SW-Ensemble,0,05-25-2023,China,,


In [6]:
df[df["MAE"]>1000]

Unnamed: 0,mod_num,MAE,MSE,Coverage 95%PI,WIS,model,horizon,date,location,AICc,RelativeLikelihood


Now calculate averages for each model

In [13]:
summary = df.groupby(['model', model_no_col, "location", "horizon"])[["MAE", "MSE", "Coverage 95%PI", "WIS"]].mean().reset_index()
summary["model"] = summary.apply(lambda row: f"{row['model']}({row['mod_num']})", axis=1)
summary = summary[['model', "horizon", "location", "MSE", "MAE", "Coverage 95%PI", "WIS"]]
summary["WIS"] = np.log10(summary["WIS"])

In [14]:
summary.head()
# summary["horizon"].unique()

Unnamed: 0,model,horizon,location,MSE,MAE,Coverage 95%PI,WIS
0,SW-Ensemble(2),1,China,238.931138,10.86842,93.548387,0.91562
1,SW-Ensemble(2),2,China,538.985826,15.309923,91.666667,1.039561
2,SW-Ensemble(2),3,China,965.418706,19.632138,86.206897,1.149118
3,SW-Ensemble(2),4,China,1795.273128,25.10724,85.714286,1.242897
4,SW-Ensemble(2),1,Japan,2.138416,1.066827,100.0,-0.110245


Finally, output average metrics to the respective files

In [12]:
for location in summary["location"].unique():
    output_path = f"../dashboard/output/unsmoothed/{location}"
    summary[summary["location"] == location].to_csv(os.path.join(output_path, f"{model_prefix}average_metrics.csv"), index=False)

In [15]:
summary.to_csv(os.path.join("../dashboard/output/unsmoothed/", f"{model_prefix}average_metrics.csv"), index=False)