In [1]:
#04 Forecast Combination
## This code takes the forecasts from 03Autoregression and creates forecast combinations
## namely Mean, Median, Expanding Weighted Mean, and Rolling Weighted Mean

In [2]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import itertools
import statsmodels.api as sm
import os

In [3]:
## same as in 03Autoregression, this creates epiweeks from the dates
from epiweeks import Week, Year
from datetime import date
def create_epiweek(date):
    return Week.fromdate(date)
def create_epiweekplot(epiweek):
    epiweek = str(epiweek)
    return F'Y{epiweek[:4]}W{epiweek[4:]}'
def create_epiweek_fromstr(str):
    return Week.fromstring(str)

In [4]:
## Functions for generating weights

def min_max_norm_vector(x: pd.Series):
    if not isinstance(x, pd.Series):
        x = pd.Series(x)
    scaler = MinMaxScaler()
    w0 = scaler.fit_transform(x.values.reshape(-1, 1)).flatten()
    w0 = pd.Series(w0, index = x.index)
    return w0

def proportion(x):
    """ Proportion of sum
    """
    return x / np.sum(x)
    
def normalize_and_proportion(x):
    """ Min max normalization followed by proportion
    """
    return proportion(min_max_norm_vector(x))

In [5]:
## Weighted Mean using eXpanding MSE

## Generate xmse
def generate_xmse(dataset, target_var):
    y_pred = dataset.copy()
    y_xmse = y_pred[[target_var]].copy().drop(target_var, axis=1)
    window_end = y_xmse.index[0]
    df_end = y_xmse.index[-1]
    model_list = list(y_pred.loc[:, y_pred.columns != target_var].columns.values)
    while window_end != df_end:
        y_pred_xmse = y_pred.loc[:window_end]
        for model in model_list:
            y_xmse.at[window_end+1,model+'_xmse'] = mean_squared_error(y_pred_xmse[[target_var]], y_pred_xmse[[model]])    
        window_end += 1
    return y_xmse.dropna().apply(func = lambda x: normalize_and_proportion(-x), axis = 1)


In [6]:
## Weighted Mean using Rolling MSE
## Generate RMSE
def generate_rmse(dataset, target_var, window_size):
    y_pred = dataset.copy()
    y_rmse = y_pred[[target_var]].copy().drop(target_var, axis = 1)
    window_start = y_rmse.index[0]
    window_end = y_rmse.index[window_size-1]
    df_end = y_rmse.index[-1]
    model_list = list(y_pred.loc[:, y_pred.columns != target_var].columns.values)
    while window_end != df_end:
        y_pred_rmse = y_pred.loc[window_start:window_end]
        for model in model_list:
            y_rmse.at[window_end+1,model+'_rmse'] = mean_squared_error(y_pred_rmse[[target_var]], y_pred_rmse[[model]])    
        window_start += 1
        window_end += 1
    return y_rmse.dropna().apply(func = lambda x: normalize_and_proportion(-x), axis = 1)

In [7]:
## Generate combination (Mean, Median, XMSE, RMSE)
def generate_pred_combi(dataset, target_var, y_xmse, y_rmse):
    y_pred = dataset.copy()
    y_val = y_pred[[target_var]].copy()
    y_pred_combi = y_pred[[target_var]].copy().drop(target_var, axis=1)
    y_pred_combi['mean'] = y_pred.loc[:, y_pred.columns != target_var].mean(numeric_only = True, axis = 1)
    y_pred_combi['median'] = y_pred.loc[:, y_pred.columns != target_var].median(numeric_only = True, axis = 1)
    for epiweek in y_xmse.index:
        y_pred_combi.at[epiweek, 'mean_xmse'] = np.average(y_pred.loc[epiweek, y_pred.columns != target_var], weights = y_xmse.loc[epiweek])
    for epiweek in y_rmse.index:
        y_pred_combi.at[epiweek, 'mean_rmse'] = np.average(y_pred.loc[epiweek, y_pred.columns != target_var], weights = y_rmse.loc[epiweek])
    
    return pd.concat([y_val, y_pred_combi], axis = 'columns')

In [8]:
def generate_forecast_combi(dataset, target_var, rmse_window_size):
    y_xmse = generate_xmse(dataset, target_var)
    y_rmse = generate_rmse(dataset, target_var, rmse_window_size)
    return generate_pred_combi(dataset, target_var, y_xmse, y_rmse)

In [9]:
## This function finds the forecast prediction files from 03Autogression,
## and then creates prediction forecast combination outputs

def forecast_combination(target_var, pred_directory):
    directory = os.path.join(target_var, pred_directory)
    for filename in os.listdir(directory):
        pred_file = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(pred_file):
            print(pred_file)
            
            y_pred = pd.read_csv(pred_file, parse_dates = [0], dayfirst = True)
            y_pred['epiweek'] = y_pred['epiweek'].apply(create_epiweek_fromstr)
            y_pred = y_pred.set_index('epiweek')

            y_pred_combi = generate_forecast_combi(y_pred, target_var, rmse_window_size = 8)
            pred_combi_path = os.path.join(target_var, 'pred_combi')
            if not os.path.exists(pred_combi_path):
                os.makedirs(pred_combi_path)
            y_pred_combi.to_csv(os.path.join(pred_combi_path, filename))

            print('done')

In [10]:
def full_forecast_combination(target_variables_file, pred_directory):
    target_variables = []
    with open(target_variables_file, 'r') as file:
        for line in file:
            # Remove linebreak which is the last character of the string
            target_variable = line[:-1]
            # Add item to the list
            target_variables.append(target_variable)
    print(target_variables)

    for target_var in target_variables:
        forecast_combination(target_var, pred_directory)
    
full_forecast_combination('target_variables.txt', 'pred')

['Cardiovascular disease', 'Chronic respiratory disease', 'Factors influencing health status and contact with health services', 'Digestive disease', 'Endocrine disorders', 'Malignant neoplasms', 'Diabetes mellitus', 'Genitourinary disorders', 'Musculoskeletal disease', 'Infectious and Parasitic Diseases', 'Ill-defined diseases', 'Neurological and sense disorders', 'Oral Diseases', 'Other neoplasms', 'Respiratory Infection', 'Skin diseases']
Cardiovascular disease/pred/L8_S2.csv
done
Cardiovascular disease/pred/L8_S3.csv
done
Cardiovascular disease/pred/L8_S1.csv
done
Cardiovascular disease/pred/L8_S4.csv
done
Cardiovascular disease/pred/L8_S11.csv
done
Cardiovascular disease/pred/L8_S10.csv
done
Cardiovascular disease/pred/L8_S5.csv
done
Cardiovascular disease/pred/L8_S7.csv
done
Cardiovascular disease/pred/L8_S12.csv
done
Cardiovascular disease/pred/L8_S6.csv
done
Cardiovascular disease/pred/L8_S8.csv
done
Cardiovascular disease/pred/L8_S9.csv
done
Chronic respiratory disease/pred/L8_