## Overall Prediction Observation


In [1]:
# Define variables
IS_FILTERED = True
ARCHIVED = None

SHOW_HEADER = True
SHOW_LAST_APPEARANCE = None

In [2]:
# Import libraries
import sys
sys.path.append('../../../prediction')

import numpy as np
import matplotlib.pyplot as plt

from putils.observation import compute_rmse, compute_mae, compute_mape, load_data_from_tuned_folder

In [3]:
# Load data
dfs = {
    "l1": load_data_from_tuned_folder("l1", last_appearance=SHOW_LAST_APPEARANCE),
    "l2": load_data_from_tuned_folder("l2", last_appearance=SHOW_LAST_APPEARANCE),
    "l3": load_data_from_tuned_folder("l3", last_appearance=SHOW_LAST_APPEARANCE),
}

# Make L1 and L2 data have the same length with L3 if SHOW_LAST_APPEARANCE = None
if SHOW_LAST_APPEARANCE is None:
    dfs["l1"] = dfs["l1"].tail(len(dfs["l3"]))
    dfs["l2"] = dfs["l2"].tail(len(dfs["l3"]))

# Indicate which columns to ignore
IGNORED_COLS = ["Time", "Actual"] # No need to include "FormattedTime" because it is used as index
if IS_FILTERED: IGNORED_COLS.append("Raw")

In [4]:
# Compute RMSE, MAE, and MAPE for each model
for key, df in dfs.items():
    print("Layer", key)
    for column in df.columns:
        if column not in IGNORED_COLS:
            actual = df["Raw"] if IS_FILTERED else df["Actual"]
            rmse = compute_rmse(actual, df[column])
            mape = compute_mape(actual, df[column])
            mae = compute_mae(actual, df[column])
            print(f"{column} - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
    print("------------------")

Layer l1
ARIMA - RMSE: 0.8338, MAE: 0.6441, MAPE: 29.69%
SARIMA - RMSE: 0.8313, MAE: 0.6404, MAPE: 29.55%
RNN - RMSE: 0.6364, MAE: 0.5036, MAPE: 23.89%
LSTM - RMSE: 0.6459, MAE: 0.5119, MAPE: 24.22%
CNN - RMSE: 0.6737, MAE: 0.5341, MAPE: 25.44%
GRU - RMSE: 0.6405, MAE: 0.5078, MAPE: 24.15%
TCN - RMSE: 0.7071, MAE: 0.5650, MAPE: 26.81%
------------------
Layer l2
LINEAR_REGRESSION - RMSE: 0.6346, MAE: 0.5019, MAPE: 23.70%
RANDOM_FOREST - RMSE: 0.6383, MAE: 0.5045, MAPE: 23.85%
FEEDFORWARD_NEURAL_NETWORK - RMSE: 0.6384, MAE: 0.5046, MAPE: 23.67%
------------------
Layer l3
Predicted - RMSE: 0.6348, MAE: 0.5022, MAPE: 23.68%
------------------


In [6]:
# Get data from d1
d1 = dfs["l1"]
d1 = d1.drop(columns=["Time", "Actual"])
alpha = 100

# Dict of models with their initial weights (1 / number of models)
weights = {}
for column in d1.columns:
    if column != "Average" and column != "Raw":
        weights[column] = 1 / (len(d1.columns) - 1) # -1 because the column "Raw" is ignored

# Create a new column for the average value
d1["Average"] = None

# Loop through each row and compute the weighted average
count = 0
for index, row in d1.iterrows():
    # Find the average value with weight of the row 
    total = 0
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            total += row[column] * weights[column]
    # Add the average value to the row
    d1.loc[index, "Average"] = total
    # Compute RMSE of each model from 0 to count - 1
    rmses = {}
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            rmses[column] = compute_rmse(d1["Raw"][:count], d1[column][:count])
    # Compute the sum of exponential RMSEs [exp(alpha * -rmse)]
    rmse_sum = sum([np.exp(alpha * -rmse) for rmse in rmses.values()])
    # Update the weights
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            result = np.exp(alpha * -rmses[column]) / rmse_sum
            # Check if nan
            if np.isnan(result) == False:
                weights[column] = result
    # Increment count
    count += 1

print(f"final weights: {weights}")

# Save the data into new file
d1.to_csv("l1_weighted_average.csv")

final weights: {'ARIMA': 1.2624178132239193e-09, 'SARIMA': 1.6062384251396493e-09, 'RNN': 0.4825060061587081, 'LSTM': 0.18575230726891126, 'CNN': 0.01146116348979183, 'GRU': 0.3198767979800453, 'TCN': 0.00040372223388738615}
