In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import time
import optuna
from torch import optim
import copy
import torch.nn.functional as F

Модель

In [None]:
class TemporalFusionTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout_rate):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.attn = nn.MultiheadAttention(embed_dim=hidden_size, num_heads=4, batch_first=True)
        self.norm1 = nn.LayerNorm(hidden_size)
        self.norm2 = nn.LayerNorm(64)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden_size, 64)
        self.fc2 = nn.Linear(64, output_size)

    def forward(self, x):
        rnn_out, _ = self.rnn(x)
        attn_out, _ = self.attn(rnn_out, rnn_out, rnn_out)
        x = self.norm1(attn_out + rnn_out)
        x = x[:, -1, :]
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.norm2(x)
        x = self.fc2(x)
        return x


Загрузка данных и создание датасета

In [None]:
df = pd.read_csv('daily_accidents.csv', parse_dates=['CRASH DATE'])

df.set_index('CRASH DATE', inplace=True)

accidents = df['ACCIDENT_COUNT'].values
injuries = df['TOTAL_INJURIES'].values

scaler = MinMaxScaler(feature_range=(0, 1))
accidents_scaled = scaler.fit_transform(accidents.reshape(-1, 1))

train_size = int(len(accidents) * 0.8)
train_data, test_data = accidents_scaled[:train_size], accidents_scaled[train_size:]

def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        labels.append(data[i+seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 60
train_seq, train_labels = create_sequences(train_data, seq_length)
test_seq, test_labels = create_sequences(test_data, seq_length)

train_seq = torch.tensor(train_seq, dtype=torch.float32)
train_labels = torch.tensor(train_labels, dtype=torch.float32)
test_seq = torch.tensor(test_seq, dtype=torch.float32)
test_labels = torch.tensor(test_labels, dtype=torch.float32)

train_dataset = TensorDataset(train_seq, train_labels)
test_dataset = TensorDataset(test_seq, test_labels)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


Функция подбора гиперпараметров

In [None]:
def objective(trial):
    hidden_size = trial.suggest_categorical("hidden_size", [x for x in range(32, 257, 8) if x % 4 == 0])
    num_layers = trial.suggest_int("num_layers", 1, 4)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    model = TemporalFusionTransformer(
        input_size=1,
        hidden_size=hidden_size,
        output_size=1,
        num_layers=num_layers,
        dropout_rate=dropout_rate
    )

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(20):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    return val_loss / len(test_loader)


Подбор гиперпараметров

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)
print("Best parameters:", study.best_params)

[I 2025-05-06 00:12:38,814] A new study created in memory with name: no-name-7f31c025-8724-488f-bcaa-d2033fe761b4
[I 2025-05-06 00:21:09,882] Trial 0 finished with value: 0.06144622394016811 and parameters: {'hidden_size': 224, 'num_layers': 1, 'dropout_rate': 0.38697285914988766, 'lr': 0.00813495054150292}. Best is trial 0 with value: 0.06144622394016811.
[I 2025-05-06 00:37:40,816] Trial 1 finished with value: 0.043814029810684066 and parameters: {'hidden_size': 184, 'num_layers': 4, 'dropout_rate': 0.4895005592898265, 'lr': 0.002597220285835635}. Best is trial 1 with value: 0.043814029810684066.
[I 2025-05-06 00:39:42,165] Trial 2 finished with value: 0.0008549627548615847 and parameters: {'hidden_size': 64, 'num_layers': 3, 'dropout_rate': 0.12490749319381345, 'lr': 0.0018150293937917266}. Best is trial 2 with value: 0.0008549627548615847.
[I 2025-05-06 00:53:29,025] Trial 3 finished with value: 0.004865283179762108 and parameters: {'hidden_size': 216, 'num_layers': 3, 'dropout_rat

Best parameters: {'hidden_size': 112, 'num_layers': 1, 'dropout_rate': 0.15056345873853283, 'lr': 0.0015687298663782}


Инициализация модели и функция обучения

In [None]:
best_model_state = None

In [None]:

model = TemporalFusionTransformer(input_size=1, hidden_size=112, num_layers=1, dropout_rate=0.15056345873853283, output_size=1)
optimizer = optim.Adam(model.parameters(), lr=0.0015687298663782)
criterion = nn.MSELoss()

def train_model(model, train_loader, test_loader, optimizer, criterion, epochs=100, patience=40):
    best_val_loss = float('inf')
    early_stopping_counter = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Валидация
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader:
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
        val_loss /= len(test_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {running_loss:.4f}, Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break


Обучение

In [61]:
start_time = time.time()
train_model(model, train_loader, test_loader, optimizer, criterion, epochs=400)
end_time = time.time()

Epoch [10/400], Train Loss: 0.4160, Val Loss: 0.0008
Epoch [20/400], Train Loss: 0.3897, Val Loss: 0.0017
Epoch [30/400], Train Loss: 0.3638, Val Loss: 0.0008
Epoch [40/400], Train Loss: 0.3455, Val Loss: 0.0009
Epoch [50/400], Train Loss: 0.3199, Val Loss: 0.0008
Early stopping at epoch 50


In [63]:
if best_model_state is not None:
    model.load_state_dict(best_model_state)

Предсказание о метрики

In [None]:
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    true_values = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            predictions.append(outputs.numpy())
            true_values.append(labels.numpy())

    predictions = np.concatenate(predictions, axis=0)
    true_values = np.concatenate(true_values, axis=0)

    return predictions, true_values

predictions, true_values = evaluate_model(model, test_loader)

predictions = scaler.inverse_transform(predictions)
true_values = scaler.inverse_transform(true_values)

rmse = np.sqrt(mean_squared_error(true_values, predictions))
mae = mean_absolute_error(true_values, predictions)
r2 = r2_score(true_values, predictions)

correlation = np.corrcoef(true_values.flatten(), predictions.flatten())[0, 1]


print(f"TFT RMSE: {rmse}, MAE: {mae}, R²: {r2}, Correlation: {correlation}")

TFT RMSE: 30.99766482510215, MAE: 24.133407592773438, R²: 0.1999918818473816, Correlation: 0.4494391942545254


Время обучения

In [37]:
print(f"Время обучения: {end_time - start_time} секунд")

Время обучения: 2554.8727078437805 секунд
