In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import pickle
import random
import plotly.express as px
from sklearn.metrics import mean_squared_error

from model.expanding_utils import *
from pipeline_config import *
from hgru_data_preprocess.preprocess_config import *


In [2]:
torch.manual_seed(1)
np.random.seed(2)
random.seed(3)

# Creating New Dataset:

In [3]:
with open(test_predictions_path, 'rb') as f:
    original_prediction_dic = pickle.load(f)
    
with open(test_dataset_dict_path, 'rb') as f:
    test_dataset_dict = pickle.load(f)
    
with open(category_id_to_category_name_path, 'rb') as f:
    category_id_to_name_dict = pickle.load(f)
    
with open(categories_per_indent_path, 'rb') as f:
    categories_per_indent_dict = pickle.load(f)
    
with open(HRNNpath, 'rb') as f:
    raw_dataset_dict = pickle.load(f)
    
# with open(HRNN_EXPANDING_HOR_1, 'rb') as f:
#     raw_dataset_hor1 = pickle.load(f)
    
# with open(HRNN_EXPANDING_HOR_2, 'rb') as f:
#     raw_dataset_hor2 = pickle.load(f)
    
# with open(HRNN_EXPANDING_HOR_3, 'rb') as f:
#     raw_dataset_hor3 = pickle.load(f)
    
# with open(HRNN_EXPANDING_HOR_4, 'rb') as f:
#     raw_dataset_hor4 = pickle.load(f)
    
# with open(HRNN_EXPANDING_HOR_5, 'rb') as f:
#     raw_dataset_hor5 = pickle.load(f)

with open(HRNN_EXPANDING_HOR_6, 'rb') as f:
    raw_dataset_hor6 = pickle.load(f)

## Adding Prediction to Dataset - 6 Horizons:

In [4]:
categories_per_indent_dict ={key: categories_per_indent_dict[key] for key in categories_per_indent_dict.keys() if key in [0,1,2,3]}

In [5]:
def creating_new_dataset_dict(categories_per_indent_dict, category_id_to_name_dict, prediction_dic, test_dataset_dict, horizon):    
    new_dataset_dict = {}
    for indent in categories_per_indent_dict:
        for cat_id in categories_per_indent_dict[indent]:
            cat_name = category_id_to_name_dict[cat_id]
            predictions = prediction_dic[cat_name]
            prediction_df =  pd.DataFrame(predictions.flatten().detach().numpy())
            prediction_df.rename(columns = {0: f'expanding horizon: {horizon}'}, inplace=True)
            test_dataset_dict[cat_name] = test_dataset_dict[cat_name][test_dataset_dict[cat_name]['Year']> Year]
            test_df = test_dataset_dict[cat_name].reset_index(drop = True)
            new_dataset_dict[cat_name] = pd.concat([test_df, prediction_df], axis=1)
            
    return new_dataset_dict


In [6]:
def get_results_on_test_set(weights_path, new_data_dict, horizon, categories = None):
    predictions_dict = {}
    if categories is None:
        categories = list(new_data_dict.keys())
    
    for category in categories:
        print(category)
        dataloader = create_dataloader(new_data_dict[category], horizon)
        basic_model = Model
        basic_optimizer = Optimizer
        ckp_path = weights_path+category+'.pt'
        model, optimizer, checkpoint, valid_loss_min = load_checkpoint(ckp_path, basic_model, basic_optimizer)
        predictions = get_predictions_on_test_set(model, dataloader)
        predictions_dict[category] = predictions
    return predictions_dict

In [7]:
def create_final_dataset_dict(categories_per_indent_dict, category_id_to_name_dict, original_prediction_dic, raw_dataset_hor6, num_horizons = 6):
    for horizon in range(num_horizons+1):
        print(f'the horizon is: {horizon}')
        if horizon == 0:
            new_data_dict = creating_new_dataset_dict(categories_per_indent_dict, category_id_to_name_dict, original_prediction_dic, raw_dataset_hor6, horizon)
        else:
            predictions_dict = get_results_on_test_set(weightspath, new_data_dict, horizon)
            new_data_dict = creating_new_dataset_dict(categories_per_indent_dict, category_id_to_name_dict, predictions_dict, new_data_dict, horizon)
            
    return new_data_dict

In [8]:
data_dict = create_final_dataset_dict(categories_per_indent_dict, category_id_to_name_dict, original_prediction_dic, raw_dataset_hor6, num_horizons = 6)

the horizon is: 0
the horizon is: 1
Alcoholic beverages
the number of columns is: 14
the columns: ['Inflation t-11', 'Inflation t-10', 'Inflation t-9', 'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5', 'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1', 'Inflation t', 'Inflation t+1', 'Inflation t+2']
x shape is: torch.Size([32, 13])
y shape is: torch.Size([32])
input shape is: torch.Size([32, 13])
Apparel
the number of columns is: 14
the columns: ['Inflation t-11', 'Inflation t-10', 'Inflation t-9', 'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5', 'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1', 'Inflation t', 'Inflation t+1', 'Inflation t+2']
x shape is: torch.Size([32, 13])
y shape is: torch.Size([32])
input shape is: torch.Size([32, 13])
Cereals and bakery products
the number of columns is: 14
the columns: ['Inflation t-11', 'Inflation t-10', 'Inflation t-9', 'Inflation t-8', 'Inflation t-7', 'Inflation t-6', '

In [9]:
def create_test_dataframe(prediction_dataset):
    test_dict = {}
    for key in prediction_dataset.keys():
        columns = ['Category', 'Date', 'Year','Inflation t+1', 'Inflation t+2', 'Inflation t+3', 'Inflation t+4',
                    'Inflation t+5', 'Inflation t+6', 'Inflation t+7','expanding horizon: 0', 'expanding horizon: 1',
                    'expanding horizon: 2', 'expanding horizon: 3', 'expanding horizon: 4',
                    'expanding horizon: 5', 'expanding horizon: 6']
        df = prediction_dataset[key][columns]
        #df.dropna(inplace=True)
        
        new_columns = []
        for col in df.columns:
            if col in list(plot_dict.keys()):
                col = plot_dict[col]
            new_columns.append(col)
        df.columns = new_columns
        test_dict[key] = df
        
    return test_dict

In [10]:
plot_dict = create_test_dataframe(data_dict)

In [14]:
def plot_results(plot_dict, categories = None, horizon = 1):
    if categories is None:
        categories = list(plot_dict.keys())
    category_samples = random.sample(categories, 10)+['All items']
    for category in category_samples:
        category_df = plot_dict[category]
        fig = px.line(category_df, x="Date", y=[f"Actual - Period {int(horizon+1)}", f"Prediction - Period {int(horizon+1)}"], title=f'{category} - Actual VS Prediction: Period '+str(horizon+1))
        fig.show()

plot_results(plot_dict)

In [12]:
def avg_rmse(plot_dict, horizon = 0):
    mse_lst = []
    for key in list(plot_dict.keys()):
        cat_df = plot_dict[key]
        y_pred = cat_df[f'Prediction - Period {int(horizon+1)}'][:-(1+horizon)].values
        y_actual = cat_df[f'Actual - Period {int(horizon+1)}'][:-(1+horizon)].values
        mse = mean_squared_error(y_pred, y_actual)
        mse_lst.append(mse)
    
    total_mse = sum(mse_lst)
    avg_mse = total_mse/len(list(plot_dict.keys()))
    avg_rmse = np.sqrt(avg_mse)
    
    return avg_rmse
    

In [13]:
rmse_period_1 = avg_rmse(plot_dict,0)
rmse_period_2 = avg_rmse(plot_dict,1)
rmse_period_3 = avg_rmse(plot_dict,2)
rmse_period_4 = avg_rmse(plot_dict,3)
rmse_period_5 = avg_rmse(plot_dict,4)
rmse_period_6 = avg_rmse(plot_dict,5)
rmse_period_7 = avg_rmse(plot_dict,6)

print(f'RMSE Period 1: {rmse_period_1}')
print(f'RMSE Period 2: {rmse_period_2}')
print(f'RMSE Period 3: {rmse_period_3}')
print(f'RMSE Period 4: {rmse_period_4}')
print(f'RMSE Period 5: {rmse_period_5}')
print(f'RMSE Period 6: {rmse_period_6}')
print(f'RMSE Period 7: {rmse_period_7}')

RMSE Period 1: 1.6614437570057259
RMSE Period 2: 2.004781743307396
RMSE Period 3: 2.0502552500808955
RMSE Period 4: 1.8103402870879228
RMSE Period 5: 1.9219373195032676
RMSE Period 6: 2.0844739609440834
RMSE Period 7: 1.981939412345216
