## Overall Prediction Observation


In [7]:
# Define variables
IS_FILTERED = True
ARCHIVED = None

SHOW_HEADER = True
SHOW_LAST_APPEARANCE = None

In [8]:
# Import libraries
import sys
sys.path.append('../../../prediction')

import numpy as np
import matplotlib.pyplot as plt

from putils.observation import compute_rmse, compute_mae, compute_mape, load_data_from_tuned_folder

In [9]:
# Load data
dfs = {
    "l1": load_data_from_tuned_folder("l1", last_appearance=SHOW_LAST_APPEARANCE),
    "l2": load_data_from_tuned_folder("l2", last_appearance=SHOW_LAST_APPEARANCE),
    "l3": load_data_from_tuned_folder("l3", last_appearance=SHOW_LAST_APPEARANCE),
}

# Make L1 and L2 data have the same length with L3 if SHOW_LAST_APPEARANCE = None
if SHOW_LAST_APPEARANCE is None:
    dfs["l1"] = dfs["l1"].tail(len(dfs["l3"]))
    dfs["l2"] = dfs["l2"].tail(len(dfs["l3"]))

# Indicate which columns to ignore
IGNORED_COLS = ["Time", "Actual"] # No need to include "FormattedTime" because it is used as index
if IS_FILTERED: IGNORED_COLS.append("Raw")

In [10]:
# Compute RMSE, MAE, and MAPE for each model
for key, df in dfs.items():
    print("Layer", key)
    for column in df.columns:
        if column not in IGNORED_COLS:
            actual = df["Raw"] if IS_FILTERED else df["Actual"]
            rmse = compute_rmse(actual, df[column])
            mape = compute_mape(actual, df[column])
            mae = compute_mae(actual, df[column])
            print(f"{column} - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
    print("------------------")

Layer l1
ARIMA - RMSE: 3657.4930, MAE: 2828.5929, MAPE: 59.27%
SARIMA - RMSE: 3570142765.4344, MAE: 206705187.0240, MAPE: 6620079.09%
RNN - RMSE: 2787.2507, MAE: 2179.4873, MAPE: 48.13%
LSTM - RMSE: 2826.7991, MAE: 2221.6232, MAPE: 49.28%
CNN - RMSE: 2897.4217, MAE: 2289.3315, MAPE: 50.15%
GRU - RMSE: 2795.2381, MAE: 2187.4274, MAPE: 48.44%
TCN - RMSE: 3069.8012, MAE: 2440.4078, MAPE: 53.72%
------------------
Layer l2
LINEAR_REGRESSION - RMSE: 2775.2838, MAE: 2169.9336, MAPE: 48.06%
RANDOM_FOREST - RMSE: 2800.6491, MAE: 2192.6197, MAPE: 48.35%
FEEDFORWARD_NEURAL_NETWORK - RMSE: 2779.8037, MAE: 2173.5007, MAPE: 47.93%
------------------
Layer l3
Predicted - RMSE: 2777.5877, MAE: 2173.0116, MAPE: 48.02%
------------------


In [11]:
# Get data from d1
d1 = dfs["l1"]
d1 = d1.drop(columns=["Time", "Actual", "SARIMA"])
alpha = 100

# Dict of models with their initial weights (1 / number of models)
weights = {}
for column in d1.columns:
    if column != "Average" and column != "Raw":
        weights[column] = 1 / (len(d1.columns) - 1) # -1 because the column "Raw" is ignored

# Create a new column for the average value
d1["Average"] = None

# Loop through each row and compute the weighted average
count = 0
for index, row in d1.iterrows():
    # Find the average value with weight of the row 
    total = 0
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            total += row[column] * weights[column]
    # Add the average value to the row
    d1.loc[index, "Average"] = total
    # Update the weights every 5 rows
    if count % 5 == 0:
        # Compute RMSE of each model from 0 to count - 1
        rmses = {}
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                rmses[column] = compute_rmse(d1["Raw"][:count], d1[column][:count])
        # Compute the sum of exponential RMSEs [exp(alpha * -rmse)]
        rmse_sum = sum([np.exp(alpha * -rmse) for rmse in rmses.values()])
        # Update the weights
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                result = np.exp(alpha * -rmses[column]) / rmse_sum
                # Check if nan
                if np.isnan(result) == False:
                    weights[column] = result
    # Increment count
    count += 1

print(f"final weights: {weights}")

# Save the data into new file
d1.to_csv("l1_weighted_average.csv")

  result = np.exp(alpha * -rmses[column]) / rmse_sum


final weights: {'ARIMA': 0.16666666666666666, 'RNN': 0.16666666666666666, 'LSTM': 0.16666666666666666, 'CNN': 0.16666666666666666, 'GRU': 0.16666666666666666, 'TCN': 0.16666666666666666}
