In [5]:
# Import libraries
import sys
sys.path.append('../../../prediction')

import numpy as np
import pandas as pd

from putils.observation import compute_rmse, compute_mae, compute_mape, load_data_from_tuned_folder

In [6]:
dfs = {
    "l1": load_data_from_tuned_folder("l1"),
    "l2": load_data_from_tuned_folder("l2"),
    "l3": load_data_from_tuned_folder("l3"),
}

# Make data have the same length with L3
dfs["l2"] = dfs["l2"].tail(len(dfs["l3"]))
dfs["l1"] = dfs["l1"].tail(len(dfs["l3"]))

# Indicate which columns to ignore
IS_FILTERED = True
IGNORED_COLS = ["Time", "Actual", 'FormattedTime'] # No need to include "FormattedTime" because it is used as index
if IS_FILTERED: IGNORED_COLS.append("Raw")

In [7]:
def predict(initial_batch_size, observations, jump):
    df2 = dfs["l2"]
    df2 = df2.drop(columns=["Time", "Actual"])
    selected_model = ""
    
    # Get X (cols: ['LINEAR_REGRESSION','RANDOM_FOREST','FEEDFORWARD_NEURAL_NETWORK']) and y (col: Raw)
    X = df2[['LINEAR_REGRESSION','RANDOM_FOREST','FEEDFORWARD_NEURAL_NETWORK']]
    y = df2["Raw"]
    results = []

    # Find average (results) of each X for first 25 rows
    for i in range(initial_batch_size):
        # Find total by summing up all values in each column X
        total = 0
        for col in X.columns:
            total += X[col].iloc[i]
        # Find average by dividing total by number of columns
        average = total / len(X.columns)
        # Append average to results
        results.append(average)

    # Loop through the rest of the rows every 5 rows
    for i in range(initial_batch_size, len(X), jump):
        # Get the next 5 rows
        next_rows = X.iloc[i:i+jump] if i+jump < len(X) else X.iloc[i:]
        # Find RMSE for each model in X from i - observations to i
        rmse = {}
        for col in X.columns:
            rmse[col] = compute_rmse(y.iloc[i-observations:i], X[col].iloc[i-observations:i]) if observations > 0 else compute_rmse(y.iloc[:i], X[col].iloc[:i])
        # Find the model with the lowest RMSE
        selected_model = min(rmse, key=rmse.get)
        # Get the result of the selected model from i to i + jump
        next_results = next_rows[selected_model]
        for result in next_results:
            results.append(result)

    return y, results

In [9]:
# Compute RMSE, MAE, and MAPE for each model
for key, df in dfs.items():
    print("Layer", key)
    for column in df.columns:
        if column not in IGNORED_COLS:
            actual = df["Raw"] if IS_FILTERED else df["Actual"]
            rmse = compute_rmse(actual, df[column])
            mape = compute_mape(actual, df[column])
            mae = compute_mae(actual, df[column])
            print(f"{column} - RMSE: {rmse:.6f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%")
    print("------------------")

observations = -1
initial_batch_size = observations if observations > 0 else 1
jump = 1

y, results = predict(initial_batch_size, observations, jump)
# Compare the results with the actual values
rmse = compute_rmse(y, results)
# Print the results

print(f"Simple Selecting RMSE: {rmse:.6f}")

Layer l1
ARIMA - RMSE: 0.068164, MAE: 0.0510, MAPE: 39.00%
SARIMA - RMSE: 0.077243, MAE: 0.0562, MAPE: 42.39%
RNN - RMSE: 0.047964, MAE: 0.0372, MAPE: 29.49%
LSTM - RMSE: 0.050083, MAE: 0.0386, MAPE: 30.61%
CNN - RMSE: 0.052548, MAE: 0.0408, MAPE: 32.43%
GRU - RMSE: 0.049180, MAE: 0.0379, MAPE: 30.01%
TCN - RMSE: 0.058260, MAE: 0.0458, MAPE: 35.98%
------------------
Layer l2
LINEAR_REGRESSION - RMSE: 0.047864, MAE: 0.0371, MAPE: 29.68%
RANDOM_FOREST - RMSE: 0.048562, MAE: 0.0375, MAPE: 30.11%
FEEDFORWARD_NEURAL_NETWORK - RMSE: 0.048194, MAE: 0.0374, MAPE: 30.16%
------------------
Layer l3
Predicted - RMSE: 0.047931, MAE: 0.0372, MAPE: 29.85%
------------------
Simple Selecting RMSE: 0.047875
