## Overall Prediction Observation


In [101]:
# Define variables
IS_FILTERED = True
ARCHIVED = None

SHOW_HEADER = True
SHOW_LAST_APPEARANCE = None

In [102]:
# Import libraries
import sys
sys.path.append('../../../prediction')

import numpy as np
import matplotlib.pyplot as plt

from putils.observation import compute_rmse, compute_mae, compute_mape, load_data_from_tuned_folder

In [103]:
# Load data
dfs = {
    "l1": load_data_from_tuned_folder("l1", last_appearance=SHOW_LAST_APPEARANCE),
    "l2": load_data_from_tuned_folder("l2", last_appearance=SHOW_LAST_APPEARANCE),
    "l3": load_data_from_tuned_folder("l3", last_appearance=SHOW_LAST_APPEARANCE),
}

# Make L1 and L2 data have the same length with L3 if SHOW_LAST_APPEARANCE = None
if SHOW_LAST_APPEARANCE is None:
    dfs["l1"] = dfs["l1"].tail(len(dfs["l3"]))
    dfs["l2"] = dfs["l2"].tail(len(dfs["l3"]))

# Indicate which columns to ignore
IGNORED_COLS = ["Time", "Actual"] # No need to include "FormattedTime" because it is used as index
if IS_FILTERED: IGNORED_COLS.append("Raw")

In [104]:
# Compute RMSE, MAE, and MAPE for each model
for key, df in dfs.items():
    print("Layer", key)
    for column in df.columns:
        if column not in IGNORED_COLS:
            actual = df["Raw"] if IS_FILTERED else df["Actual"]
            rmse = compute_rmse(actual, df[column])
            mape = compute_mape(actual, df[column])
            mae = compute_mae(actual, df[column])
            print(f"{column} - RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
    print("------------------")

Layer l1
ARIMA - RMSE: 0.0682, MAE: 0.0510, MAPE: 39.00%
SARIMA - RMSE: 0.0772, MAE: 0.0562, MAPE: 42.39%
RNN - RMSE: 0.0470, MAE: 0.0363, MAPE: 28.67%
LSTM - RMSE: 0.0501, MAE: 0.0386, MAPE: 30.61%
CNN - RMSE: 0.0525, MAE: 0.0408, MAPE: 32.43%
GRU - RMSE: 0.0492, MAE: 0.0379, MAPE: 30.01%
TCN - RMSE: 0.0583, MAE: 0.0458, MAPE: 35.98%
------------------
Layer l2
LINEAR_REGRESSION - RMSE: 0.0468, MAE: 0.0362, MAPE: 28.90%
RANDOM_FOREST - RMSE: 0.0474, MAE: 0.0367, MAPE: 29.41%
FEEDFORWARD_NEURAL_NETWORK - RMSE: 0.0473, MAE: 0.0367, MAPE: 29.34%
------------------
Layer l3
Predicted - RMSE: 0.0469, MAE: 0.0363, MAPE: 29.11%
------------------


In [105]:
# Get data from d1
d1 = dfs["l1"]
d1 = d1.drop(columns=["Time", "Actual"])
alpha = 100
d1.head()

Unnamed: 0_level_0,ARIMA,SARIMA,RNN,LSTM,CNN,GRU,TCN,Raw
FormattedTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
04:02:40,0.106734,0.111003,0.110547,0.111831,0.112221,0.110998,0.109915,0.058
04:02:45,0.100079,0.110853,0.125606,0.119704,0.112968,0.1091,0.114659,0.124
04:02:50,0.095099,0.106056,0.146601,0.126776,0.123387,0.11274,0.109229,0.144
04:02:55,0.091374,0.097464,0.158022,0.133991,0.131778,0.125485,0.117121,0.206
04:03:00,0.088587,0.092915,0.154177,0.136656,0.138083,0.134796,0.130759,0.122


In [106]:
# Get data from d1
d1 = dfs["l1"]
d1 = d1.drop(columns=["Time", "Actual"])
alpha = 100

# Dict of models with their initial weights (1 / number of models)
weights = {}
for column in d1.columns:
    if column != "Average" and column != "Raw":
        weights[column] = 1 / (len(d1.columns) - 1) # -1 because the column "Raw" is ignored

# Create a new column for the average value
d1["Average"] = None

# Loop through each row and compute the weighted average
count = 0
for index, row in d1.iterrows():
    # Find the average value with weight of the row 
    total = 0
    for column in d1.columns:
        if column != "Average" and column != "Raw":
            total += row[column] * weights[column]
    # Add the average value to the row
    d1.loc[index, "Average"] = total
    # Update the weights every 5 rows
    if count % 5 == 0:
        # Compute RMSE of each model from 0 to count - 1
        rmses = {}
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                rmses[column] = compute_rmse(d1["Raw"][:count], d1[column][:count])
        # Compute the sum of exponential RMSEs [exp(alpha * -rmse)]
        rmse_sum = sum([np.exp(alpha * -rmse) for rmse in rmses.values()])
        # Update the weights
        for column in d1.columns:
            if column != "Average" and column != "Raw":
                result = np.exp(alpha * -rmses[column]) / rmse_sum
                # Check if nan
                if np.isnan(result) == False:
                    weights[column] = result
    # Increment count
    count += 1

print(f"final weights: {weights}")

# Save the data into new file
d1.to_csv("l1_weighted_average.csv")

final weights: {'ARIMA': 0.033444665134729386, 'SARIMA': 0.01338255268389131, 'RNN': 0.2780748046900475, 'LSTM': 0.20330816292980833, 'CNN': 0.15955681636625874, 'GRU': 0.2224017701776258, 'TCN': 0.08983122801763897}
