In [1]:
import sys

import joblib
import mlflow
import pandas as pd
import numpy as np

import torch
from torch.utils.data import DataLoader

sys.path.append('..')

from utils import get_quantile_from_median, calculate_sklearn_metrics, plot_single_forecast
from dataset import TimeSeriesDataset

In [2]:
target_columns = 'nat_demand'
column_for_scale = ['T2M_toc', 'QV2M_toc', 'TQL_toc', 'W2M_toc', 'T2M_san', 'QV2M_san',
       'TQL_san', 'W2M_san', 'T2M_dav', 'QV2M_dav', 'TQL_dav', 'W2M_dav']

seq_length = 128
pred_length = 24
stride = 1

target_scaler = joblib.load('./joblib_artifacts/target_scaler.joblib')
scaler = joblib.load('./joblib_artifacts/scaler.joblib')

target_col = target_columns
past_covariates = column_for_scale
known_covariates = ["holiday", "school"]

In [3]:
train_df = pd.read_parquet('../../data/panama-electricity-load-forecasting/processed/train.parquet').drop(columns=['Holiday_ID'])
test_df = pd.read_parquet('../../data/panama-electricity-load-forecasting/processed/test.parquet').drop(columns=['Holiday_ID'])
true_test_df = test_df.copy()
test_df = pd.concat([train_df[-(seq_length + pred_length - 1):], test_df], axis=0).reset_index(drop=True)
del train_df

test_scaled = test_df.copy()

features_scaled = scaler.transform(test_df[column_for_scale])
test_scaled[column_for_scale] = features_scaled

features_scaled = target_scaler.transform(test_df[[target_columns]])
test_scaled[[target_columns]] = features_scaled

In [4]:
test_dataset = TimeSeriesDataset(
    test_scaled,
    target_col=target_col,
    past_covariates=past_covariates,
    known_covariates=known_covariates,
    seq_length=seq_length,
    pred_length=pred_length,
    stride=stride,
)

len(test_dataset)

744

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('best_model_full_5879.pth', map_location=device, weights_only=False)

In [6]:
def collate_fn(batch):
    x_hist = torch.stack([item["x_hist"] for item in batch])
    x_extra_hist = torch.stack([item["x_extra_hist"] for item in batch])
    x_extra_future = torch.stack([item["x_extra_future"] for item in batch])
    x_static = torch.stack([item["x_static"] for item in batch])
    y = torch.stack([item["y"] for item in batch])
    
    return {
        "x_hist": x_hist,
        "x_extra_hist": x_extra_hist,
        "x_extra_future": x_extra_future,
        "x_static": x_static,
        "y": y,
    }

batch_size = 1

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn,
)

In [7]:
model.eval()

all_predictions = []
all_true_values = []

with torch.no_grad():
    for i in range(0, len(test_dataset) - pred_length + 1, pred_length): # len(test_dataset) - pred_length + 1
        sample = test_dataset[i]

        x_hist = sample["x_hist"].unsqueeze(0).to(device)
        x_extra_hist = sample["x_extra_hist"].unsqueeze(0).to(device)
        x_extra_future = sample["x_extra_future"].unsqueeze(0).to(device)
        x_static = sample["x_static"].unsqueeze(0).to(device)
        y = sample["y"].unsqueeze(0).to(device)

        y_pred = model.forward(
            x_hist=x_hist,
            x_extra_hist=x_extra_hist,
            x_extra_future=x_extra_future,
            x_static=x_static,
        )

        y_pred_np = y_pred.cpu().numpy().squeeze()
        y_np = y.cpu().numpy().squeeze()

        all_predictions.extend(y_pred_np.tolist())
        all_true_values.extend(y_np.tolist())

    len_diff = len(true_test_df) - len(all_predictions) + (pred_length - 1)
    if len_diff > 0:
        start_idx = len(test_dataset) - int(np.ceil(len_diff / seq_length))
        print(start_idx)
        sample = test_dataset[start_idx]

        x_hist = sample["x_hist"].unsqueeze(0).to(device)
        x_extra_hist = sample["x_extra_hist"].unsqueeze(0).to(device)
        x_extra_future = sample["x_extra_future"].unsqueeze(0).to(device)
        x_static = sample["x_static"].unsqueeze(0).to(device)
        y = sample["y"].unsqueeze(0).to(device)

        y_pred = model.forward(
            x_hist=x_hist,
            x_extra_hist=x_extra_hist,
            x_extra_future=x_extra_future,
            x_static=x_static,
        )

        y_pred_np = y_pred.cpu().numpy().squeeze()
        y_np = y.cpu().numpy().squeeze()

        all_predictions.extend(y_pred_np.tolist()[1:len_diff + 1])
        all_true_values.extend(y_np.tolist()[1:len_diff + 1])

all_predictions = np.array(all_predictions)
all_true_values = np.array(all_true_values)

all_predictions = target_scaler.inverse_transform(all_predictions.reshape(-1, 1)).flatten()[pred_length - 1:]
all_true_values = target_scaler.inverse_transform(all_true_values.reshape(-1, 1)).flatten()[pred_length - 1:]

743


In [8]:
slice_index = test_df.shape[0] - all_predictions.shape[0]

results_df = pd.DataFrame()

results_df['data'] = test_df[slice_index:]['datetime']
results_df['y_pred'] = all_predictions
results_df['y_true'] = test_df[slice_index:][target_col]

df_for_metrics = results_df.copy().rename(columns={'y_pred': '0.5', 'y_true': 'target'})
df_for_metrics['0.1'] = get_quantile_from_median(df_for_metrics['0.5'].values, target_quantile=0.1)
df_for_metrics['0.9'] = get_quantile_from_median(df_for_metrics['0.5'].values, target_quantile=0.9)
sklearn_metrics = calculate_sklearn_metrics(df_for_metrics)
sklearn_metrics

{'MSE': 8331.413879358934,
 'MAE': 64.29265843312172,
 'MAPE': 5.043243667398649,
 'MASE': 1.5926946518470564,
 'SQL': 24.924481339562107}

In [9]:
lower_col = '0.1'
pred_col = '0.5'
upper_col = '0.9'

actual_col = 'target'
timestamp_col = 'data'
model_name = 'TSMixer'

height = 600
width = 1800

plot_single_forecast(df_for_metrics, timestamp_col=timestamp_col, actual_col=actual_col, pred_col=pred_col, model_name=model_name, height=height, width=width, lower_col=lower_col, upper_col=upper_col)

In [10]:
run_name = model_name

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Time_Series_Forecasting")

with mlflow.start_run(run_name=run_name):
        mlflow.log_metrics(sklearn_metrics)
        mlflow.log_param("model_name", model_name)

        mlflow.set_tag("prefix", 'AllData')

🏃 View run TSMixer at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/ca1e1720681e44eabf326675ef074cb1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
