In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from os import listdir
from os.path import join

In [8]:
model_outcome_folder = 'drive/My Drive/Colab Notebooks/Wishmart/model_outputs'
lgbm_files = [join(model_outcome_folder, f) for f in listdir(model_outcome_folder)]

In [4]:
print(lgbm_files)

['drive/My Drive/Colab Notebooks/Wishmart/model_outputs/nn_log_scaled_out_full.csv', 'drive/My Drive/Colab Notebooks/Wishmart/model_outputs/lgbm_log_scaled_out_full.csv']


Merge datasets.

In [None]:
types = {'store_nbr': 'int8',
                'item_nbr': 'int32',
                'class': 'int8',
                'unit_sales': 'float32',
         'perishable': 'int8'
            }

model_df = pd.read_csv(lgbm_files[0], parse_dates = ['date'], dtype = types, low_memory = True)
model_df.rename(columns={'actual_unit_sales': 'actual', 'predicted_unit_sales': 'pred_m1'}, inplace=True)

model_secondary = pd.read_csv(lgbm_files[1], parse_dates = ['date'], dtype = types, low_memory = True)
model_secondary.rename(columns={'actual_unit_sales': 'actual', 'predicted_unit_sales': 'pred_m2'}, inplace=True)

model_df['pred_m2'] = model_secondary['pred_m2']
model_df["pred_final"] = model_df[["pred_m1", "pred_m2"]].max(axis=1)

In [24]:
def get_error(weights, true_val, pred_val, predict_days_ahead=16):

    mse = mean_squared_error(true_val, pred_val, sample_weight=weights)
    rmse = np.sqrt(mse)

    mae = mean_absolute_error(true_val, pred_val, sample_weight=weights)

    mape = np.mean(np.abs((true_val - pred_val) / true_val)) * 100

    try:
        err = (true_val - pred_val) ** 2
        err = err.sum(axis=1) * weights
        nwrmsle = np.sqrt(err.sum() / weights.sum() / predict_days_ahead)
    except:
        nwrmsle = rmse

    return mse, rmse, nwrmsle, mae, mape

In [25]:
print("WMSE Model 1:", mean_squared_error(model_df['actual'].values, model_df["pred_m1"].values, sample_weight=(model_df["perishable"].values * 0.25 + 1)))
print("WMSE Model 2:", mean_squared_error(model_df['actual'].values, model_df["pred_m2"].values, sample_weight=(model_df["perishable"].values * 0.25 + 1)))
print("WMSE Ensemble:", mean_squared_error(model_df['actual'].values, model_df["pred_final"].values, sample_weight=(model_df["perishable"].values * 0.25 + 1)))

WMSE Model 1: 71.85915140590872
WMSE Model 2: 79.48696966655504
WMSE Ensemble: 70.37848315673571


In [26]:
weights = model_df["perishable"].values * 0.25 + 1

In [27]:
get_error(weights, model_df['actual'].values, model_df["pred_m1"].values)

  
  


(71.85915140590872,
 8.476977728289057,
 8.476977728289057,
 2.3971405287890573,
 nan)

In [30]:
get_error(weights, np.log1p(model_df['actual'].values), np.log1p(model_df["pred_m1"].values))

  
  


(0.3621393796520343,
 0.6017801755226191,
 0.6017801755226191,
 0.4485972174977284,
 nan)

In [28]:
get_error(weights, model_df['actual'].values, model_df["pred_m2"].values)

  
  


(79.48696966655504,
 8.915546515304323,
 8.915546515304323,
 2.619817636932413,
 nan)

In [31]:
get_error(weights, np.log1p(model_df['actual'].values), np.log1p(model_df["pred_m2"].values))

  
  


(0.4224839519346684,
 0.6499876552171344,
 0.6499876552171344,
 0.48991429965459793,
 nan)

In [29]:
get_error(weights, model_df['actual'].values, model_df["pred_final"].values)

  
  


(70.37848315673571, 8.38918846830465, 8.38918846830465, 2.504195318480556, nan)

In [32]:
get_error(weights, np.log1p(model_df['actual'].values), np.log1p(model_df["pred_final"].values))

  
  


(0.40784093099258195,
 0.6386242486725523,
 0.6386242486725523,
 0.48092080145342775,
 nan)