# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import pickle
import random
import plotly.express as px
from sklearn.metrics import mean_squared_error

from config import *


In [2]:
torch.manual_seed(1)
np.random.seed(2)
random.seed(3)

# Read Results Data

In [3]:
with open('/Users/mvilenko/Desktop/CPI_HRNN - version 2.0/mayas_project/SGRU/data/predictions.pickle', 'rb') as f:
    predictions = pickle.load(f)

with open(BaselinePath, 'rb') as f:
    raw_dataset_dict = pickle.load(f)
    
with open(category_id_to_category_name_path, 'rb') as f:
    category_id_to_name_dict = pickle.load(f)
    
with open(categories_per_indent_path, 'rb') as f:
    categories_per_indent_dict = pickle.load(f)
        
with open('/Users/mvilenko/Desktop/CPI_HRNN - version 2.0/mayas_project/SGRU/data/test_dataset.pickle', 'rb') as f:
    test_dataset = pickle.load(f)

In [4]:
categories_per_indent_dict ={key: categories_per_indent_dict[key] for key in categories_per_indent_dict.keys() if key in [0,1,2,3]}
cat_ids = list(categories_per_indent_dict.values())

flat_cat_id = [item for sublist in cat_ids for item in sublist]

categories = []
for cat_id in flat_cat_id:
    categories.append(category_id_to_name_dict[cat_id])

In [5]:
def create_test_dataframe(raw_dataset_dict: dict):
    test_list =  []
    for key in categories:
        df = raw_dataset_dict[key][['Category_id','Category', 'Date', 'Year', 'Inflation t+1']]
        df.dropna(inplace=True)
        df.rename(columns={'Inflation t+1': 'Actual'}, inplace=True)
        target_df = df[df['Year'] > Year]
        test_list.append(target_df)
    
    test_df = pd.concat(test_list)
    return test_df

test_df = create_test_dataframe(raw_dataset_dict)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'Inflation t+1': 'Actual'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'Inflation t+1': 'Actual'}, inplace=True

In [14]:
def get_df_with_predictions(predictions, test_df):
    all_data_dict = {}
    prediction_df =  pd.DataFrame(predictions.flatten().detach().numpy())
    prediction_df.rename(columns = {0: 'Prediction'}, inplace=True)
    prediction_df.reset_index(inplace=True)
    test_df.reset_index(inplace=True)
    all_data = pd.concat([test_df, prediction_df], axis=1)

    all_data = all_data[~all_data.Category.isna()]
    all_data = all_data[~all_data.Prediction.isna()]
    
    all_data = all_data[['Category_id','Category','Date','Year','Actual','Prediction']]

    return all_data

all_data=get_df_with_predictions(predictions, test_df)

In [16]:
data_Categories  = list(all_data.Category.unique())

In [20]:
def plot_results(all_data, categories):
    category_samples = random.sample(categories, 10)+['All items']
    for category in category_samples:
        category_df = all_data[all_data['Category'] == category]
        fig = px.line(category_df, x="Date", y=["Actual", "Prediction"], title=f'{category} - Actual VS Prediction')
        fig.show()

plot_results(all_data, data_Categories)

In [26]:
def avg_rmse(all_data):
    mse_lst = []
    for cat in data_Categories:
        df_predictions = all_data[all_data['Category'] == cat]
        y_pred = df_predictions['Prediction'].values
        print(f'y pred: {y_pred}')
        y_actual = df_predictions['Actual'].values
        print(f'y actual: {y_actual}')
        print(cat)
        mse = mean_squared_error(y_pred, y_actual)
        mse_lst.append(mse)
    
    total_mse = sum(mse_lst)
    avg_mse = total_mse/len(data_Categories)
    avg_rmse = np.sqrt(avg_mse)
    
    return avg_rmse
    

In [27]:
avg_rmse(all_data)

y pred: [ 0.07066773  0.06697881  0.18591127  0.18841812  0.18151136  0.06324734
 -0.1288091  -0.0136138   0.05100881  0.15565798  0.22105625  0.12398347
  0.10541273  0.08006558  0.11630757  0.04642207 -0.03650627  0.10792455
  0.16668907  0.13252243  0.14615346 -0.04739376 -0.03467945  0.12976605
  0.1427517   0.23655275  0.19540375  0.19864476  0.17901017  0.1373018
  0.12133404]
y actual: [ 0.3073991   0.46129255  0.26638258  0.75731699  0.15728086 -0.29034911
  0.26673403 -0.16003258  0.33488418  0.40038626 -0.04928669  0.08084822
 -0.12823255  0.31393576  0.18612582  0.35631696  0.45132004  0.30114271
  0.34559346  0.15744854 -0.17067492  0.03325159  0.50684919  0.25002735
  0.74784159  0.50152027  0.37956881  0.48905611  0.41760903  0.48719925
  0.35297358]
Alcoholic beverages
y pred: [ 0.1402714   0.07349554 -0.08682824 -0.17350629 -0.01246736  0.11599197
  0.12947682  0.16455406  0.13228032  0.04559262  0.06594221  0.06343406
 -0.02155694  0.01333886  0.19944498  0.3514445   0

1.9042385706755554