In [1]:
import sys
import os

import joblib
import mlflow
import pandas as pd

import torch
from torch.utils.data import DataLoader
from sklearn.preprocessing import MinMaxScaler, StandardScaler

sys.path.append('../..')

from utils import get_quantile_from_median, calculate_sklearn_metrics
from torch_utils import TimeSeriesDataset, collate_fn

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("rosstat_forecasting");

In [2]:
data_dir = '../../../data/rosstat/processed'

train_df = pd.read_csv(os.path.join(data_dir, 'train/data.csv'))
val_df = pd.read_csv(os.path.join(data_dir, 'val/data.csv'))
test_df = pd.read_csv(os.path.join(data_dir, 'test/data.csv'))

print(f"Обучающая выборка: {train_df.shape[0]} строк")
print(f"Валидационная выборка: {val_df.shape[0]} строк")
print(f"Тестовая выборка: {test_df.shape[0]} строк")

Обучающая выборка: 4140 строк
Валидационная выборка: 828 строк
Тестовая выборка: 828 строк


In [3]:
TARGET_COL = "nominal_wage"
PAST_COVARIATES = [
    "capital_labor_ratio_change",
    "capital_productivity_change",
    "fixed_assets_renewal_comparable_prices",
    "labor_productivity",
    "high_productivity_jobs",
    "machinery_share_in_total_assets",
    "investment_share_for_modernization",
    "production_index_yoy",
    "production_index_mom",
]
KNOWN_COVARIATES = []
seq_length = 12
pred_length = 2
stride = 1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Нормализация

In [4]:
# Нормализация по code

scale_columns = PAST_COVARIATES + [TARGET_COL]

all_codes = train_df['code'].unique()

os.makedirs('./artifacts', exist_ok=True)

train_scaled = train_df.copy()
val_scaled = val_df.copy()
test_scaled = test_df.copy()

for code in all_codes:
    scaler = MinMaxScaler(feature_range=(0, 1))
    
    code_mask_train = train_df['code'] == code
    code_mask_val = val_df['code'] == code
    code_mask_test = test_df['code'] == code
    
    if sum(code_mask_train) > 0:
        features_scaled = scaler.fit_transform(train_df.loc[code_mask_train, scale_columns])
        train_scaled.loc[code_mask_train, scale_columns] = features_scaled
        
        if sum(code_mask_val) > 0:
            features_scaled = scaler.transform(val_df.loc[code_mask_val, scale_columns])
            val_scaled.loc[code_mask_val, scale_columns] = features_scaled
            
        if sum(code_mask_test) > 0:
            features_scaled = scaler.transform(test_df.loc[code_mask_test, scale_columns])
            test_scaled.loc[code_mask_test, scale_columns] = features_scaled
        
        joblib.dump(scaler, f'./artifacts/scaler_{code}.joblib')

scaler_dict = {code: joblib.load(f'./artifacts/scaler_{code}.joblib') for code in all_codes}
joblib.dump(scaler_dict, './artifacts/all_scalers.joblib')

['./artifacts/all_scalers.joblib']

In [5]:
# Нормализация по колонкам

scale_columns = PAST_COVARIATES + [TARGET_COL]
os.makedirs('./artifacts', exist_ok=True)

train_scaled = train_df.copy()
val_scaled = val_df.copy()
test_scaled = test_df.copy()

scalers = {}
for column in scale_columns:
    scaler = StandardScaler()
    
    train_scaled[column] = scaler.fit_transform(train_df[[column]])
    val_scaled[column] = scaler.transform(val_df[[column]])
    test_scaled[column] = scaler.transform(test_df[[column]])
    
    scalers[column] = scaler
    joblib.dump(scaler, f'./artifacts/scaler_{column}.joblib')

joblib.dump(scalers, './artifacts/all_column_scalers.joblib')

['./artifacts/all_column_scalers.joblib']

## Создание датасета с учётом панельной структуры данных

In [6]:
train_datasets = []
test_datasets = []
val_datasets = []

for code in train_df['code'].unique():
    train_scaled_subset = train_scaled[train_scaled['code'].eq(code)].copy()
    val_scaled_subset = val_scaled[val_scaled['code'].eq(code)].copy()
    test_scaled_subset = test_scaled[test_scaled['code'].eq(code)].copy()

    test_scaled_subset = pd.concat([val_scaled_subset[-seq_length:], test_scaled_subset])
    val_scaled_subset = pd.concat([train_scaled_subset[-seq_length:], val_scaled_subset])

    train_scaled_subset.sort_values(by=['date'], inplace=True)
    test_scaled_subset.sort_values(by=['date'], inplace=True)
    val_scaled_subset.sort_values(by=['date'], inplace=True)

    train_dataset = TimeSeriesDataset(
        train_scaled_subset,
        target_col=TARGET_COL,
        past_covariates=PAST_COVARIATES,
        known_covariates=KNOWN_COVARIATES,
        seq_length=seq_length,
        pred_length=pred_length,
        stride=stride,
    )
    train_datasets.append(train_dataset)

    val_dataset = TimeSeriesDataset(
        val_scaled_subset,
        target_col=TARGET_COL,
        past_covariates=PAST_COVARIATES,
        known_covariates=KNOWN_COVARIATES,
        seq_length=seq_length,
        pred_length=pred_length,
        stride=stride,
    )
    val_datasets.append(val_dataset)

    test_dataset = TimeSeriesDataset(
        test_scaled_subset,
        target_col=TARGET_COL,
        past_covariates=PAST_COVARIATES,
        known_covariates=KNOWN_COVARIATES,
        seq_length=seq_length,
        pred_length=pred_length,
        stride=stride,
    )
    test_datasets.append(test_dataset)

train_dataset = torch.utils.data.ConcatDataset(train_datasets)
val_dataset = torch.utils.data.ConcatDataset(val_datasets)
test_dataset = torch.utils.data.ConcatDataset(test_datasets)

print(f"Обучающий датасет: {len(train_dataset)} строк")
print(f"Валидационный датасет: {len(val_dataset)} строк")
print(f"Тестовый датасет: {len(test_dataset)} строк")

Обучающий датасет: 3243 строк
Валидационный датасет: 759 строк
Тестовый датасет: 759 строк


In [7]:
batch_size = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn,
)

In [8]:
from torchtsmixer import TSMixerExt

input_channels = train_dataset[0]['x_hist'].shape[1]
extra_channels = train_dataset[0]['x_extra_hist'].shape[1]
static_channels = train_dataset[0]['x_static'].shape[0]

In [9]:
import torch.nn as nn
from tqdm.auto import tqdm
# from lion_pytorch import Lion
import numpy as np
import os
import torch

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    batch_count = 0
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating", leave=False):
            x_hist = batch["x_hist"].to(device)
            x_extra_hist = batch["x_extra_hist"].to(device)
            x_extra_future = batch["x_extra_future"].to(device)
            x_static = batch["x_static"].to(device)
            y = batch["y"].to(device)
            y_pred = model.forward(
                x_hist=x_hist,
                x_extra_hist=x_extra_hist,
                x_extra_future=x_extra_future,
                x_static=x_static,
            )
            loss = criterion(y_pred, y)
            running_loss += loss.item()
            batch_count += 1
    avg_loss = running_loss / batch_count
    return {"loss": avg_loss, "rmse": np.sqrt(avg_loss)}

model = TSMixerExt(
    sequence_length=seq_length,
    prediction_length=pred_length,
    input_channels=input_channels,
    extra_channels=extra_channels,
    hidden_channels=64,
    static_channels=static_channels,
    output_channels=input_channels,
    normalize_before=False,
    ff_dim=128,
)

criterion = nn.MSELoss()
# optimizer = Lion(model.parameters(), lr=1e-4, weight_decay=1e-2)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-2)
num_epochs = 100
best_model_path = './best_model'
os.makedirs(best_model_path, exist_ok=True)
model.to(device)

patience = 8
early_stopping = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=patience//2,
    threshold=0.0001, min_lr=1e-6
)

early_stop_counter = 0
best_val_loss = float('inf')
epoch_progress = tqdm(range(num_epochs), desc="Training")

for epoch in epoch_progress:
    model.train()
    running_loss = 0.0
    batch_count = 0
    batch_progress = tqdm(train_loader, leave=False)
    
    for batch in batch_progress:
        x_hist = batch["x_hist"].to(device)
        x_extra_hist = batch["x_extra_hist"].to(device)
        x_extra_future = batch["x_extra_future"].to(device)
        x_static = batch["x_static"].to(device)
        y = batch["y"].to(device)
        y_pred = model.forward(
            x_hist=x_hist,
            x_extra_hist=x_extra_hist,
            x_extra_future=x_extra_future,
            x_static=x_static,
        )
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        running_loss += loss.item()
        batch_count += 1

    avg_loss = running_loss / batch_count
    val_metrics = evaluate(model, val_loader, criterion, device)
    
    # Update epoch progress bar with metrics
    epoch_progress.set_description(f"Epoch [{epoch+1}/{num_epochs}], Train: {avg_loss:.4f}, Val: {val_metrics['loss']:.4f}, RMSE: {val_metrics['rmse']:.4f}")
    
    early_stopping.step(val_metrics['loss'])
    print(f"Epoch [{epoch+1}/{num_epochs}], Train: {avg_loss:.4f}, Val: {val_metrics['loss']:.4f}, RMSE: {val_metrics['rmse']:.4f}")
    
    
    if val_metrics['loss'] < best_val_loss:
        best_val_loss = val_metrics['loss']
        torch.save(model.state_dict(), os.path.join(best_model_path, 'best_model_state_dict.pth'))
        torch.save(model, os.path.join(best_model_path, 'best_model_pickle.pth'))
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            epoch_progress.write(f"Early stopping triggered after {epoch+1} epochs. Best validation loss: {best_val_loss:.4f}")
            break

epoch_progress.write("Training complete")

Training:   0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [1/100], Train: 0.6434, Val: 0.4364, RMSE: 0.6606


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [2/100], Train: 0.1681, Val: 0.2445, RMSE: 0.4945


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [3/100], Train: 0.1305, Val: 0.2016, RMSE: 0.4490


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [4/100], Train: 0.1080, Val: 0.1853, RMSE: 0.4305


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [5/100], Train: 0.0977, Val: 0.1840, RMSE: 0.4289


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [6/100], Train: 0.0988, Val: 0.1611, RMSE: 0.4014


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [7/100], Train: 0.0865, Val: 0.2116, RMSE: 0.4600


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [8/100], Train: 0.0821, Val: 0.1608, RMSE: 0.4010


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [9/100], Train: 0.0774, Val: 0.1439, RMSE: 0.3794


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [10/100], Train: 0.0743, Val: 0.1546, RMSE: 0.3932


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [11/100], Train: 0.0742, Val: 0.1477, RMSE: 0.3843


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [12/100], Train: 0.0726, Val: 0.1376, RMSE: 0.3710


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [13/100], Train: 0.0700, Val: 0.1379, RMSE: 0.3714


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [14/100], Train: 0.0672, Val: 0.1284, RMSE: 0.3584


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [15/100], Train: 0.0666, Val: 0.1414, RMSE: 0.3760


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [16/100], Train: 0.0647, Val: 0.1367, RMSE: 0.3697


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [17/100], Train: 0.0620, Val: 0.1341, RMSE: 0.3662


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [18/100], Train: 0.0609, Val: 0.1402, RMSE: 0.3744


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [19/100], Train: 0.0672, Val: 0.1429, RMSE: 0.3780


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [20/100], Train: 0.0580, Val: 0.1317, RMSE: 0.3629


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [21/100], Train: 0.0576, Val: 0.1288, RMSE: 0.3589


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [22/100], Train: 0.0567, Val: 0.1311, RMSE: 0.3621
Early stopping triggered after 22 epochs. Best validation loss: 0.1284
Training complete


In [10]:
import torch.nn as nn
from tqdm.auto import tqdm
from lion_pytorch import Lion
import os
import torch

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    batch_count = 0
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating", leave=False):
            x_hist = batch["x_hist"].to(device)
            x_extra_hist = batch["x_extra_hist"].to(device)
            x_extra_future = batch["x_extra_future"].to(device)
            x_static = batch["x_static"].to(device)
            y = batch["y"].to(device)
            y_pred = model.forward(
                x_hist=x_hist,
                x_extra_hist=x_extra_hist,
                x_extra_future=x_extra_future,
                x_static=x_static,
            )
            loss = criterion(y_pred, y)
            running_loss += loss.item()
            batch_count += 1
    avg_loss = running_loss / batch_count
    return {"loss": avg_loss, "rmse": np.sqrt(avg_loss)}

model = TSMixerExt(
    sequence_length=seq_length,
    prediction_length=pred_length,
    input_channels=input_channels,
    extra_channels=extra_channels,
    hidden_channels=64,
    static_channels=static_channels,
    output_channels=input_channels,
    normalize_before=False,
    ff_dim=128,
)

criterion = nn.MSELoss()
optimizer = Lion(model.parameters(), lr=1e-4, weight_decay=1e-2)
num_epochs = 100
best_model_path = './best_model_lion'
os.makedirs(best_model_path, exist_ok=True)
model.to(device)

patience = 8
early_stopping = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=patience//2,
    threshold=0.0001, min_lr=1e-6
)

early_stop_counter = 0
best_val_loss = float('inf')
epoch_progress = tqdm(range(num_epochs), desc="Training")

for epoch in epoch_progress:
    model.train()
    running_loss = 0.0
    batch_count = 0
    batch_progress = tqdm(train_loader, leave=False)
    
    for batch in batch_progress:
        x_hist = batch["x_hist"].to(device)
        x_extra_hist = batch["x_extra_hist"].to(device)
        x_extra_future = batch["x_extra_future"].to(device)
        x_static = batch["x_static"].to(device)
        y = batch["y"].to(device)
        y_pred = model.forward(
            x_hist=x_hist,
            x_extra_hist=x_extra_hist,
            x_extra_future=x_extra_future,
            x_static=x_static,
        )
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        running_loss += loss.item()
        batch_count += 1

    avg_loss = running_loss / batch_count
    val_metrics = evaluate(model, val_loader, criterion, device)
    
    # Update epoch progress bar with metrics
    epoch_progress.set_description(f"Epoch [{epoch+1}/{num_epochs}], Train: {avg_loss:.4f}, Val: {val_metrics['loss']:.4f}, RMSE: {val_metrics['rmse']:.4f}")
    
    early_stopping.step(val_metrics['loss'])
    print(f"Epoch [{epoch+1}/{num_epochs}], Train: {avg_loss:.4f}, Val: {val_metrics['loss']:.4f}, RMSE: {val_metrics['rmse']:.4f}")
    
    
    if val_metrics['loss'] < best_val_loss:
        best_val_loss = val_metrics['loss']
        torch.save(model.state_dict(), os.path.join(best_model_path, 'best_model_state_dict.pth'))
        torch.save(model, os.path.join(best_model_path, 'best_model_pickle.pth'))
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            epoch_progress.write(f"Early stopping triggered after {epoch+1} epochs. Best validation loss: {best_val_loss:.4f}")
            break

epoch_progress.write("Training complete")

Training:   0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [1/100], Train: 0.7777, Val: 1.0255, RMSE: 1.0127


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [2/100], Train: 0.3912, Val: 0.5320, RMSE: 0.7294


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [3/100], Train: 0.1855, Val: 0.3067, RMSE: 0.5538


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [4/100], Train: 0.1101, Val: 0.2077, RMSE: 0.4557


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [5/100], Train: 0.0865, Val: 0.1481, RMSE: 0.3849


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [6/100], Train: 0.0715, Val: 0.1276, RMSE: 0.3572


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [7/100], Train: 0.0647, Val: 0.1290, RMSE: 0.3592


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [8/100], Train: 0.0601, Val: 0.1115, RMSE: 0.3339


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [9/100], Train: 0.0545, Val: 0.1092, RMSE: 0.3304


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [10/100], Train: 0.0517, Val: 0.1020, RMSE: 0.3194


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [11/100], Train: 0.0486, Val: 0.1442, RMSE: 0.3797


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [12/100], Train: 0.0473, Val: 0.1268, RMSE: 0.3561


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [13/100], Train: 0.0445, Val: 0.1084, RMSE: 0.3293


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [14/100], Train: 0.0410, Val: 0.1099, RMSE: 0.3315


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [15/100], Train: 0.0408, Val: 0.0987, RMSE: 0.3142


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [16/100], Train: 0.0394, Val: 0.1048, RMSE: 0.3237


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [17/100], Train: 0.0373, Val: 0.1291, RMSE: 0.3593


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [18/100], Train: 0.0366, Val: 0.0970, RMSE: 0.3115


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [19/100], Train: 0.0348, Val: 0.1297, RMSE: 0.3602


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [20/100], Train: 0.0351, Val: 0.1185, RMSE: 0.3442


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [21/100], Train: 0.0324, Val: 0.1176, RMSE: 0.3430


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [22/100], Train: 0.0320, Val: 0.0952, RMSE: 0.3086


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [23/100], Train: 0.0307, Val: 0.1220, RMSE: 0.3493


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [24/100], Train: 0.0306, Val: 0.0975, RMSE: 0.3122


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [25/100], Train: 0.0302, Val: 0.1300, RMSE: 0.3605


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [26/100], Train: 0.0296, Val: 0.1011, RMSE: 0.3180


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [27/100], Train: 0.0280, Val: 0.1111, RMSE: 0.3334


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [28/100], Train: 0.0269, Val: 0.1022, RMSE: 0.3197


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [29/100], Train: 0.0258, Val: 0.1047, RMSE: 0.3236


  0%|          | 0/102 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch [30/100], Train: 0.0256, Val: 0.1182, RMSE: 0.3438
Early stopping triggered after 30 epochs. Best validation loss: 0.0952
Training complete


In [11]:
model = TSMixerExt(
    sequence_length=seq_length,
    prediction_length=pred_length,
    input_channels=input_channels,
    extra_channels=extra_channels,
    hidden_channels=64,
    static_channels=static_channels,
    output_channels=input_channels,
    normalize_before=False,
    ff_dim=128,
)
model.to(device=device);

In [12]:
val_predictions = {}
test_predictions = {}

model2path = {'TSMixer': 'best_model/best_model_state_dict.pth', 'TSMixer_LiON': 'best_model_lion/best_model_state_dict.pth'}

for model_name, path in model2path.items():
    val_predictions_df = pd.DataFrame()
    test_predictions_df = pd.DataFrame()

    model.load_state_dict(torch.load(path, weights_only=True))
    model.eval()
    with torch.no_grad():
        for code, val_dataset_ in zip(all_codes, val_datasets):
            predictions = []
            true_values = []

            for i in range(0, len(val_dataset_), pred_length):
                sample = val_dataset_[i]

                x_hist = sample["x_hist"].unsqueeze(0).to(device)
                x_extra_hist = sample["x_extra_hist"].unsqueeze(0).to(device)
                x_extra_future = sample["x_extra_future"].unsqueeze(0).to(device)
                x_static = sample["x_static"].unsqueeze(0).to(device)
                y = sample["y"].unsqueeze(0).to(device)

                y_pred = model.forward(
                    x_hist=x_hist,
                    x_extra_hist=x_extra_hist,
                    x_extra_future=x_extra_future,
                    x_static=x_static,
                )

                y_pred_np = y_pred.cpu().numpy().squeeze()
                y_np = y.cpu().numpy().squeeze()

                predictions.extend(y_pred_np.tolist())
                true_values.extend(y_np.tolist())

            target_scaler = scalers[TARGET_COL]

            predictions = target_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
            true_values = target_scaler.inverse_transform(np.array(true_values).reshape(-1, 1)).flatten()

            df = pd.DataFrame([
                predictions,
                true_values,
            ]).transpose()
            df.columns = ['mean', 'y_true']
            df['code'] = code
            df['0.1'] = get_quantile_from_median(df['mean'].values, 0.1)
            df['0.9'] = get_quantile_from_median(df['mean'].values, 0.9)

            val_predictions_df = pd.concat([val_predictions_df, df])

    with torch.no_grad():
        for code, test_dataset_ in zip(all_codes, test_datasets):
            predictions = []
            true_values = []

            for i in range(0, len(test_dataset_), pred_length):
                sample = test_dataset_[i]

                x_hist = sample["x_hist"].unsqueeze(0).to(device)
                x_extra_hist = sample["x_extra_hist"].unsqueeze(0).to(device)
                x_extra_future = sample["x_extra_future"].unsqueeze(0).to(device)
                x_static = sample["x_static"].unsqueeze(0).to(device)
                y = sample["y"].unsqueeze(0).to(device)

                y_pred = model.forward(
                    x_hist=x_hist,
                    x_extra_hist=x_extra_hist,
                    x_extra_future=x_extra_future,
                    x_static=x_static,
                )

                y_pred_np = y_pred.cpu().numpy().squeeze()
                y_np = y.cpu().numpy().squeeze()

                predictions.extend(y_pred_np.tolist())
                true_values.extend(y_np.tolist())

            target_scaler = scalers[TARGET_COL]

            predictions = target_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
            true_values = target_scaler.inverse_transform(np.array(true_values).reshape(-1, 1)).flatten()

            df = pd.DataFrame([
                predictions,
                true_values,
            ]).transpose()
            df.columns = ['mean', 'y_true']
            df['code'] = code
            df['0.1'] = get_quantile_from_median(df['mean'].values, 0.1)
            df['0.9'] = get_quantile_from_median(df['mean'].values, 0.9)

            test_predictions_df = pd.concat([test_predictions_df, df])

    val_predictions_df.set_index('code', inplace=True)
    test_predictions_df.set_index('code', inplace=True)

    val_predictions[model_name] = val_predictions_df
    test_predictions[model_name] = test_predictions_df

In [19]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import datetime
from utils.plotting import plot_forecasts_val_test


date_col = pd.to_datetime(test_df["date"])
min_date = date_col.min().date()
max_date = date_col.max().date()
size_multiplyer = 2
height = 400 * size_multiplyer
width = 1600 * size_multiplyer
item_id = 6
title = f'Предсказания номинальной заработной платы (для code = {item_id})'

start_date_picker = widgets.DatePicker(
    description="Start date:", disabled=False, value=min_date
)

end_date_picker = widgets.DatePicker(
    description="End date:", disabled=False, value=max_date
)

output_area = widgets.Output()


def on_button_clicked(b):
    with output_area:
        clear_output(wait=True)
        start_date = datetime.datetime.combine(
            start_date_picker.value, datetime.datetime.min.time()
        )
        end_date = datetime.datetime.combine(
            end_date_picker.value, datetime.datetime.min.time()
        )
        plot_forecasts_val_test(
            val_df=val_df_,
            test_df=test_df_,
            val_predictions=all_val_models_predictions_,
            test_predictions=test_predictions,
            title=title,
            start_date=start_date,
            end_date=end_date,
            height=height,
            width=width,
            item_id=item_id,
        )


plot_button = widgets.Button(description="Plot Forecasts")
plot_button.on_click(on_button_clicked)

controls = widgets.VBox(
    [widgets.HBox([start_date_picker, end_date_picker]), plot_button]
)

display(controls, output_area)

val_df_ = val_df.rename(columns={'date': 'timestamp', "nominal_wage": "target"})[['code', 'timestamp', "target"]]
val_df_ = val_df_[val_df_['code'].eq(item_id)].reset_index(drop=True)
val_df_['timestamp'] = pd.to_datetime(val_df_['timestamp'])

test_df_ = test_df.rename(columns={'date': 'timestamp', "nominal_wage": "target"})[['code', 'timestamp', "target"]]
test_df_ = test_df_[test_df_['code'].eq(item_id)].reset_index(drop=True)
test_df_['timestamp'] = pd.to_datetime(test_df_['timestamp'])

val_df_ = pd.concat([val_df_, test_df_.iloc[[0]]])

all_val_models_predictions_ = val_predictions.copy()
for model_ in all_val_models_predictions_.keys():
    all_val_models_predictions_[model_] = pd.concat([all_val_models_predictions_[model_], test_predictions[model_].loc[[item_id]].iloc[[0]]])

with output_area:
    plot_forecasts_val_test(
        val_df=val_df_,
        test_df=test_df_,
        val_predictions=all_val_models_predictions_,
        test_predictions=test_predictions,
        title=title,
        height=height,
        width=width,
        item_id=item_id,
    )

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2023, 1, 1), description='Start date:'), DatePick…

Output()

In [14]:
all_models_metrics = {}

for model in test_predictions.keys():
    metrics_df = []
    for code in all_codes:
        pred_df = pd.concat([
            test_predictions[model].rename(columns={'mean': '0.5'})
            .loc[code][["0.1", "0.5", "0.9"]]
            .reset_index(drop=True),
            test_df[test_df["code"].eq(code)][["nominal_wage"]].reset_index(drop=True),
        ], axis=1)
        pred_df = pd.DataFrame(pred_df)

        metrics_df.append(calculate_sklearn_metrics(pred_df, target_column='nominal_wage'))

    metrics_dict = pd.DataFrame(metrics_df).mean().to_dict()

    all_models_metrics[model] = metrics_dict

all_models_metrics

{'TSMixer': {'MSE': 140934982.97671503,
  'MAE': 6616.331367959339,
  'MAPE': 6.674945116010894,
  'MASE': 1.503505748761305,
  'SQL': 2349.248054388028},
 'TSMixer_LiON': {'MSE': 81512410.86226273,
  'MAE': 5218.20076417145,
  'MAPE': 5.619511371478955,
  'MASE': 1.3704544714512483,
  'SQL': 1901.9399762831758}}

In [15]:
1/0

ZeroDivisionError: division by zero

In [None]:
prefix = 'TSMixer'

for k, metrics_ in all_models_metrics.items():
    run_name = f"{k}_{prefix}"

    with mlflow.start_run(run_name=run_name):
        mlflow.log_metrics(metrics_)
        mlflow.log_param("model_name", k)

        mlflow.set_tag("prefix", prefix)

🏃 View run TSMixer_TSMixer at: http://127.0.0.1:5000/#/experiments/169882278836627198/runs/4de1a3940e3a4884a2eb419ff07fef8c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/169882278836627198
🏃 View run TSMixer_LiON_TSMixer at: http://127.0.0.1:5000/#/experiments/169882278836627198/runs/c6a5993de72640209334c684ff305e39
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/169882278836627198


In [None]:
all_models_metrics

{'TSMixer': {'MSE': 107288060.00084822,
  'MAE': 5633.84614821767,
  'MAPE': 5.790095907679104,
  'MASE': 1.1370568118828535,
  'SQL': 2063.668330070852},
 'TSMixer_LiON': {'MSE': 70734775.08098634,
  'MAE': 4918.452059194301,
  'MAPE': 5.4690594125309655,
  'MASE': 1.3597564609305124,
  'SQL': 1831.1950418128847}}