In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
import optuna
import math

Загрузка данных

In [None]:
df = pd.read_csv('daily_accidents_hol_dw_week.csv', parse_dates=['CRASH DATE'])
df.set_index('CRASH DATE', inplace=True)
df = df[['CRASH_COUNT', 'is_weekend', 'month', 'is_holiday']]

Преобразованиее данных

In [None]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df.values)

def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        target = data[i+seq_length][0]
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

sequence_length = 60
sequences, targets = create_sequences(scaled, sequence_length)

X = sequences[:, :-1, :]
y = targets

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32).view(-1, 1)

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=512, shuffle=False)

Модель Transformer

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, output_size, dropout=0.1):
        super().__init__()
        self.embedding = nn.Linear(input_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dropout=dropout,
            batch_first=True,
            norm_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(d_model, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        out = x[:, -1, :]
        out = self.fc_out(out)
        return out


Функция подбора гиперпараметров

In [None]:
def objective_transformer(trial):
    d_model = trial.suggest_categorical("d_model", [32, 64, 96, 128])
    nhead_options = [h for h in [2, 4, 8] if d_model % h == 0]
    if not nhead_options:
        raise optuna.exceptions.TrialPruned()

    nhead = trial.suggest_categorical("nhead", nhead_options)
    num_layers = trial.suggest_int("num_layers", 1, 4)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    model = TransformerModel(
        input_size=4,
        d_model=d_model,
        nhead=nhead,
        num_layers=num_layers,
        output_size=1,
        dropout=dropout
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(10):
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            val_loss += criterion(output, y_batch).item()

    return val_loss / len(test_loader)


Подбор гиперпараметров

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective_transformer, n_trials=30)
print("Best hyperparameters:", study.best_params)

[I 2025-05-06 00:22:48,809] A new study created in memory with name: no-name-bc99dc64-fd3e-41d8-94f3-f0aa9298af96
[I 2025-05-06 00:30:19,862] Trial 0 finished with value: 0.020540217868983746 and parameters: {'d_model': 32, 'nhead': 2, 'num_layers': 2, 'dropout': 0.27926562970379815, 'lr': 0.0013733978469872802}. Best is trial 0 with value: 0.020540217868983746.
[I 2025-05-06 00:43:30,249] Trial 1 finished with value: 0.01540415920317173 and parameters: {'d_model': 64, 'nhead': 2, 'num_layers': 3, 'dropout': 0.41403648239411706, 'lr': 0.0025734413355351913}. Best is trial 1 with value: 0.01540415920317173.
[I 2025-05-06 00:55:23,155] Trial 2 finished with value: 0.025845874100923538 and parameters: {'d_model': 128, 'nhead': 2, 'num_layers': 2, 'dropout': 0.26495241398703495, 'lr': 0.0024184972100475765}. Best is trial 1 with value: 0.01540415920317173.
[I 2025-05-06 00:59:02,712] Trial 3 finished with value: 0.007231633644551039 and parameters: {'d_model': 32, 'nhead': 4, 'num_layers':

Best hyperparameters: {'d_model': 64, 'nhead': 8, 'num_layers': 1, 'dropout': 0.33377347840242144, 'lr': 0.0007303415873456501}


Параметры модели

In [None]:
input_size = 4
d_model = 64
nhead = 8
num_layers = 1
output_size = 1
num_epochs = 400
dropout = 0.33377347840242144

Инициализация модели

In [None]:
model = TransformerModel(input_size=input_size, d_model=d_model, nhead=nhead, num_layers=num_layers, output_size=output_size, dropout=dropout)
model = model.to(device)
criterion = nn.MSELoss()

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.00073034158734565018)

Обучение модели

In [None]:
import copy
best_model_state = None

In [None]:
patience = 40
best_val_loss = float('inf')
early_stopping_counter = 0
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            val_loss += criterion(output, y_batch).item()
    val_loss /= len(test_loader)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

end_time = time.time()
training_time = end_time - start_time
print(f"Training time: {training_time:.2f} seconds")


Epoch [10/400], Loss: 0.0178, Val Loss: 0.0039
Epoch [20/400], Loss: 0.0096, Val Loss: 0.0017
Epoch [30/400], Loss: 0.0102, Val Loss: 0.0010
Epoch [40/400], Loss: 0.0090, Val Loss: 0.0009
Epoch [50/400], Loss: 0.0085, Val Loss: 0.0014
Epoch [60/400], Loss: 0.0056, Val Loss: 0.0010
Epoch [70/400], Loss: 0.0070, Val Loss: 0.0013
Epoch [80/400], Loss: 0.0073, Val Loss: 0.0011
Epoch [90/400], Loss: 0.0048, Val Loss: 0.0009
Epoch [100/400], Loss: 0.0053, Val Loss: 0.0015
Epoch [110/400], Loss: 0.0048, Val Loss: 0.0010
Epoch [120/400], Loss: 0.0054, Val Loss: 0.0013
Epoch [130/400], Loss: 0.0049, Val Loss: 0.0008
Epoch [140/400], Loss: 0.0068, Val Loss: 0.0010
Epoch [150/400], Loss: 0.0055, Val Loss: 0.0009
Epoch [160/400], Loss: 0.0035, Val Loss: 0.0009
Epoch [170/400], Loss: 0.0043, Val Loss: 0.0009
Epoch [180/400], Loss: 0.0051, Val Loss: 0.0009
Epoch [190/400], Loss: 0.0052, Val Loss: 0.0008
Epoch [200/400], Loss: 0.0038, Val Loss: 0.0013
Epoch [210/400], Loss: 0.0052, Val Loss: 0.0015
E

In [None]:
if best_model_state is not None:
    model.load_state_dict(best_model_state)

Прогнозирование и возвращение оригинального масштаба

In [None]:
model.eval()
with torch.no_grad():
    y_pred = model(X_test)

y_pred = y_pred.cpu().numpy()
y_test_original = y_test.cpu().numpy()
y_pred_full = np.hstack([y_pred, np.zeros((y_pred.shape[0], 3))])
y_test_full = np.hstack([y_test_original, np.zeros((y_test_original.shape[0], 3))])
y_pred = scaler.inverse_transform(y_pred_full)[:, 0]
y_test_original = scaler.inverse_transform(y_test_full)[:, 0]

rmse = np.sqrt(mean_squared_error(y_test_original, y_pred))
mae = mean_absolute_error(y_test_original, y_pred)
r2 = r2_score(y_test_original, y_pred)
correlation = np.corrcoef(y_test_original.flatten(), y_pred.flatten())[0, 1]

print(f"Transformer RMSE: {rmse}, MAE: {mae}, R²: {r2}, Correlation: {correlation}")

Transformer RMSE: 30.07239980135927, MAE: 23.545352821763156, R²: 0.2751142928618131, Correlation: 0.543097185952459
