In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
from torch.utils.data import DataLoader, TensorDataset
import optuna

Загрузка данных и создание датасета

In [None]:
df = pd.read_csv('daily_accidents_hol_dw_week.csv', parse_dates=['CRASH DATE'])
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])
df.set_index('CRASH DATE', inplace=True)

features = ['CRASH_COUNT', 'is_weekend', 'month', 'is_holiday']
df = df[features]

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
df_scaled = scaler.fit_transform(df)

In [None]:
seq_length = 30
X, y = [], []

for i in range(len(df_scaled) - seq_length):
    seq_x = df_scaled[i:i + seq_length]
    target_y = df_scaled[i + seq_length][0]
    X.append(seq_x)
    y.append(target_y)

In [None]:
X = np.array(X)
y = np.array(y).reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512)

Модель

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000, dropout=0.1):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))

        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)


class LSTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, output_size, dropout=0.1):
        super().__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)

        # Важно: приведение input к hidden_size для трансформера
        self.input_projection = nn.Linear(input_size, hidden_size)

        self.positional_encoding = PositionalEncoding(d_model=hidden_size, dropout=dropout)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=hidden_size,
            nhead=num_heads,
            dim_feedforward=hidden_size * 2,
            dropout=dropout,
            batch_first=True
        )

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=1)

        self.fusion_fc = nn.Linear(2 * hidden_size, output_size)

    def forward(self, x):
        # LSTM → last timestep
        lstm_out, _ = self.lstm(x)
        lstm_last = lstm_out[:, -1, :]

        # Transformer → input projected to hidden
        projected = self.input_projection(x)  # shape: (batch, seq_len, hidden_size)
        transformer_input = self.positional_encoding(projected)
        transformer_out = self.transformer_encoder(transformer_input)
        transformer_last = transformer_out[:, -1, :]

        # Fusion
        fused = torch.cat([lstm_last, transformer_last], dim=-1)
        return self.fusion_fc(fused)


Функция подбора гиперпараметров

In [None]:
def objective_lstransformer(trial):
    num_heads = trial.suggest_categorical("num_heads", [2, 4, 8])
    hidden_size_options = [hs for hs in range(32, 257, 8) if hs % num_heads == 0]
    hidden_size = trial.suggest_categorical("hidden_size", hidden_size_options)

    num_layers = trial.suggest_int("num_layers", 1, 3)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)

    model = LSTransformer(
        input_size=X_train.shape[2],
        hidden_size=hidden_size,
        num_layers=num_layers,
        num_heads=num_heads,
        output_size=1,
        dropout=dropout
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(20):
        for inputs, labels in train_loader:
            inputs, labels = inputs, labels

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs, labels
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

    return test_loss / len(test_loader)


Подбор гиперпараметров

In [36]:
study = optuna.create_study(direction="minimize")
study.optimize(objective_lstransformer, n_trials=30)

print("Best parameters:", study.best_params)

[I 2025-05-13 20:15:48,501] A new study created in memory with name: no-name-f654e5c8-df4a-44c7-98ce-17c7f44be119
[I 2025-05-13 20:16:43,493] Trial 0 finished with value: 0.004017368657514453 and parameters: {'num_heads': 4, 'hidden_size': 104, 'num_layers': 1, 'lr': 0.004834204887587471, 'dropout': 0.448230151818827}. Best is trial 0 with value: 0.004017368657514453.
[I 2025-05-13 20:17:16,384] Trial 1 finished with value: 0.003643224365077913 and parameters: {'num_heads': 2, 'hidden_size': 80, 'num_layers': 1, 'lr': 0.0023681708420401014, 'dropout': 0.30023144264093243}. Best is trial 1 with value: 0.003643224365077913.
[I 2025-05-13 20:18:35,225] Trial 2 finished with value: 0.00369907240383327 and parameters: {'num_heads': 2, 'hidden_size': 112, 'num_layers': 2, 'lr': 0.00018120944054815066, 'dropout': 0.2302734272057982}. Best is trial 1 with value: 0.003643224365077913.
[I 2025-05-13 20:25:37,538] Trial 3 finished with value: 0.004721765872091055 and parameters: {'num_heads': 4, 

Best parameters: {'num_heads': 4, 'hidden_size': 104, 'num_layers': 3, 'lr': 0.007860019328267204, 'dropout': 0.35406126455573217}


Параметры модели и инициализация

In [37]:
input_size = X_train.shape[2]
hidden_size = 104
num_layers = 3
num_heads = 4
output_size = 1
learning_rate = 0.007860019328267204
epochs = 400
batch_size = 256
dropout = 0.35406126455573217

In [90]:
model = LSTransformer(input_size, hidden_size, num_layers, num_heads, output_size, dropout=dropout)

In [None]:
print(X_train.shape)


torch.Size([3707, 30, 4])


In [91]:
criterion = nn.MSELoss()

In [92]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Обучение

In [156]:
patience = 5
best_val_loss = float('inf')
early_stopping_counter = 0

train_loss = []
start_time = time.time()

best_model_state = None  # чтобы сохранить лучшую модель

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / (X_train.shape[0] // batch_size)
    train_loss.append(avg_loss)

    # Валидация
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_val, y_val in test_loader:
            val_output = model(X_val)
            val_loss += criterion(val_output, y_val).item()
    val_loss /= len(test_loader)

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}')

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
        best_model_state = model.state_dict()
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds")

# Восстановить лучшую модель
if best_model_state is not None:
    model.load_state_dict(best_model_state)


Epoch 10/400, Train Loss: 0.0152, Val Loss: 0.0034
Early stopping at epoch 11
Training completed in 53.27 seconds


Предсказание и метрики

In [157]:
model.eval()
with torch.no_grad():
    y_pred_train = model(X_train).detach().numpy()
    y_pred_test = model(X_test).detach().numpy()

In [158]:

# Восстановление оригинальных масштабов
crash_count_index = features.index('CRASH_COUNT')
y_pred_train_real = scaler.inverse_transform(
    np.hstack([y_pred_train, np.zeros((len(y_pred_train), len(features) - 1))])
)[:, crash_count_index]
y_pred_test_real = scaler.inverse_transform(
    np.hstack([y_pred_test, np.zeros((len(y_pred_test), len(features) - 1))])
)[:, crash_count_index]
y_train_real = scaler.inverse_transform(
    np.hstack([y_train.numpy(), np.zeros((len(y_train), len(features) - 1))])
)[:, crash_count_index]
y_test_real = scaler.inverse_transform(
    np.hstack([y_test.numpy(), np.zeros((len(y_test), len(features) - 1))])
)[:, crash_count_index]

In [159]:
# Метрики качества
def print_metrics(y_true, y_pred, prefix=""):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr = np.corrcoef(y_true.flatten(), y_pred.flatten())[0, 1]
    print(f'{prefix}RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, Correlation: {corr:.4f}')

print_metrics(y_train_real, y_pred_train_real, "Train ")
print_metrics(y_test_real, y_pred_test_real, "Test  ")

Train RMSE: 61.2133, MAE: 43.6694, R²: 0.8560, Correlation: 0.9268
Test  RMSE: 31.7263, MAE: 24.8171, R²: 0.1929, Correlation: 0.5471
