## Overall Prediction Observation


In [1]:
# Define variables
IS_FILTERED = True
ARCHIVED = None

SHOW_HEADER = True
SHOW_LAST_APPEARANCE = None

In [2]:
# Import libraries
import sys
sys.path.append('../../../prediction')

import numpy as np
import matplotlib.pyplot as plt

from putils.observation import compute_rmse, compute_mae, compute_mape, load_data_from_tuned_folder

In [3]:
# Load data
dfs = {
    "l1": load_data_from_tuned_folder("l1", last_appearance=SHOW_LAST_APPEARANCE),
    "l2": load_data_from_tuned_folder("l2", last_appearance=SHOW_LAST_APPEARANCE),
    "l3": load_data_from_tuned_folder("l3", last_appearance=SHOW_LAST_APPEARANCE),
}

# Make L1 and L2 data have the same length with L3 if SHOW_LAST_APPEARANCE = None
if SHOW_LAST_APPEARANCE is None:
    dfs["l1"] = dfs["l1"].tail(len(dfs["l3"]))
    dfs["l2"] = dfs["l2"].tail(len(dfs["l3"]))

# Indicate which columns to ignore
IGNORED_COLS = ["Time", "Actual"] # No need to include "FormattedTime" because it is used as index
if IS_FILTERED: IGNORED_COLS.append("Raw")

In [4]:
# Compute RMSE, MAE, and MAPE for each model
for key, df in dfs.items():
    print("Layer", key)
    for column in df.columns:
        if column not in IGNORED_COLS:
            actual = df["Raw"] if IS_FILTERED else df["Actual"]
            rmse = compute_rmse(actual, df[column])
            mape = compute_mape(actual, df[column])
            mae = compute_mae(actual, df[column])
            print(f"{column} - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
    print("------------------")

Layer l1
ARIMA - RMSE: 1049.1569, MAE: 746.8066, MAPE: 67.70%
SARIMA - RMSE: 1049.1745, MAE: 745.4478, MAPE: 67.46%
ETS - RMSE: 1459.6899, MAE: 1024.6374, MAPE: 94.28%
RNN - RMSE: 642.3348, MAE: 495.1957, MAPE: 49.11%
LSTM - RMSE: 665.7295, MAE: 516.6156, MAPE: 50.66%
CNN - RMSE: 741.9941, MAE: 576.8489, MAPE: 58.00%
GRU - RMSE: 649.1460, MAE: 501.2780, MAPE: 49.63%
TCN - RMSE: 847.4626, MAE: 667.0838, MAPE: 66.95%
------------------
Layer l2
LINEAR_REGRESSION - RMSE: 637.3582, MAE: 489.8354, MAPE: 47.91%
RANDOM_FOREST - RMSE: 649.3304, MAE: 498.4929, MAPE: 48.91%
FEEDFORWARD_NEURAL_NETWORK - RMSE: 650.6872, MAE: 498.2397, MAPE: 48.63%
------------------
Layer l3
Predicted - RMSE: 640.2040, MAE: 491.5452, MAPE: 48.15%
------------------


In [5]:
# Get data from d1
d1 = dfs["l1"]
d1 = d1.drop(columns=["Time", "Actual"])
alpha = 100

# Dict of models with their initial weights (1 / number of models)
weights = {}
for column in d1.columns:
    if column != "Average" and column != "Raw":
        weights[column] = 1 / (len(d1.columns) - 1) # -1 because the column "Raw" is ignored

# Create a new column for the average value
d1["Average"] = None

# Loop through each row and compute the weighted average
count = 0
for index, row in d1.iterrows():
    # Find the average value with weight of the row 
    total = 0
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            total += row[column] * weights[column]
    # Add the average value to the row
    d1.loc[index, "Average"] = total
    # Update the weights every 5 rows
    if count % 5 == 0:
        # Compute RMSE of each model from 0 to count - 1
        rmses = {}
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                rmses[column] = compute_rmse(d1["Raw"][:count], d1[column][:count])
        # Compute the sum of exponential RMSEs [exp(alpha * -rmse)]
        rmse_sum = sum([np.exp(alpha * -rmse) for rmse in rmses.values()])
        # Update the weights
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                result = np.exp(alpha * -rmses[column]) / rmse_sum
                # Check if nan
                if np.isnan(result) == False:
                    weights[column] = result
    # Increment count
    count += 1

print(f"final weights: {weights}")

# Save the data into new file
d1.to_csv("l1_weighted_average.csv")

  result = np.exp(alpha * -rmses[column]) / rmse_sum


final weights: {'ARIMA': 0.125, 'SARIMA': 0.125, 'ETS': 0.125, 'RNN': 0.125, 'LSTM': 0.125, 'CNN': 0.125, 'GRU': 0.125, 'TCN': 0.125}
