In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time
import optuna
import math

Загрузка данных

In [None]:
df = pd.read_csv('daily_accidents.csv', parse_dates=['CRASH DATE'])

df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE'])

df = df.groupby('CRASH DATE').agg({'ACCIDENT_COUNT': 'sum', 'TOTAL_INJURIES': 'sum'}).reset_index()


Преобразованиее данных

In [None]:

scaler = MinMaxScaler(feature_range=(0, 1))
df['ACCIDENT_COUNT'] = scaler.fit_transform(df[['ACCIDENT_COUNT']])

def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        sequences.append(seq)
    return np.array(sequences)

sequence_length = 60
accident_data = df['ACCIDENT_COUNT'].values

sequences = create_sequences(accident_data, sequence_length)

X = sequences[:, :-1]
y = sequences[:, -1]

X = np.expand_dims(X, axis=2)

train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=512, shuffle=True)
test_loader = DataLoader(test_data, batch_size=512, shuffle=False)

Модель Transformer

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, input_size, d_model, nhead, num_layers, output_size, dropout=0.1):
        super().__init__()
        self.embedding = nn.Linear(input_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dropout=dropout,
            batch_first=True,
            norm_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(d_model, output_size)

    def forward(self, x):
        x = self.embedding(x)     
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        out = x[:, -1, :]
        out = self.fc_out(out)
        return out


Функция подбора гиперпараметров

In [None]:
def objective_transformer(trial):
    d_model = trial.suggest_categorical("d_model", [32, 64, 96, 128])
    nhead_options = [h for h in [2, 4, 8] if d_model % h == 0]
    if not nhead_options:
        raise optuna.exceptions.TrialPruned()

    nhead = trial.suggest_categorical("nhead", nhead_options)
    num_layers = trial.suggest_int("num_layers", 1, 4)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    model = TransformerModel(
        input_size=1,
        d_model=d_model,
        nhead=nhead,
        num_layers=num_layers,
        output_size=1,
        dropout=dropout
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(10):
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            val_loss += criterion(output, y_batch).item()

    return val_loss / len(test_loader)


Подбор гиперпараметров

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective_transformer, n_trials=30)
print("Best hyperparameters:", study.best_params)

[I 2025-05-06 00:10:22,616] A new study created in memory with name: no-name-061433d2-39cc-4844-9f35-20480806a0e3
[I 2025-05-06 00:17:37,422] Trial 0 finished with value: 0.04047934152185917 and parameters: {'d_model': 64, 'nhead': 2, 'num_layers': 2, 'dropout': 0.2077406100713721, 'lr': 0.001900601801683295}. Best is trial 0 with value: 0.04047934152185917.
[I 2025-05-06 00:28:49,284] Trial 1 finished with value: 0.038001079112291336 and parameters: {'d_model': 32, 'nhead': 2, 'num_layers': 4, 'dropout': 0.27651140000550534, 'lr': 0.0018996801360388615}. Best is trial 1 with value: 0.038001079112291336.
[I 2025-05-06 00:39:10,208] Trial 2 finished with value: 0.0032840032363310456 and parameters: {'d_model': 32, 'nhead': 8, 'num_layers': 3, 'dropout': 0.20894814099321957, 'lr': 0.003760298688563014}. Best is trial 2 with value: 0.0032840032363310456.
[I 2025-05-06 00:48:33,504] Trial 3 finished with value: 0.0009181353379972279 and parameters: {'d_model': 96, 'nhead': 8, 'num_layers':

Best hyperparameters: {'d_model': 96, 'nhead': 8, 'num_layers': 3, 'dropout': 0.2228463047859486, 'lr': 0.00010334744618188217}


Параметры модели

In [34]:
input_size = 1
d_model = 96
nhead = 8
num_layers = 3
output_size = 1
dropout = 0.2228463047859486
num_epochs = 400

Инициализация модели

In [38]:
model = TransformerModel(input_size=input_size, d_model=d_model, nhead=nhead, num_layers=num_layers, output_size=output_size, dropout=dropout)
model = model.to(device)
criterion = nn.MSELoss()

In [39]:
optimizer = optim.Adam(model.parameters(), lr=0.00010334744618188217)

Обучение модели

In [40]:
import copy
best_model_state = None

In [None]:
patience = 40
best_val_loss = float('inf')
early_stopping_counter = 0
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            val_loss += criterion(output, y_batch).item()
    val_loss /= len(test_loader)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

end_time = time.time()
training_time = end_time - start_time
print(f"Training time: {training_time:.2f} seconds")


Epoch [10/400], Loss: 0.0204, Val Loss: 0.0008
Epoch [20/400], Loss: 0.0150, Val Loss: 0.0009
Epoch [30/400], Loss: 0.0117, Val Loss: 0.0008
Epoch [40/400], Loss: 0.0132, Val Loss: 0.0013
Epoch [50/400], Loss: 0.0090, Val Loss: 0.0021
Early stopping at epoch 53
Training time: 4461.16 seconds


In [None]:
if best_model_state is not None:
    model.load_state_dict(best_model_state)

Прогнозирование и возвращение оригинального масштаба

In [None]:
model.eval()
with torch.no_grad():
    y_pred = model(X_test)

y_pred = y_pred.cpu().numpy()
y_test_original = y_test.cpu().numpy()

y_pred = scaler.inverse_transform(y_pred)
y_test_original = scaler.inverse_transform(y_test_original)

rmse = np.sqrt(mean_squared_error(y_test_original, y_pred))
mae = mean_absolute_error(y_test_original, y_pred)
r2 = r2_score(y_test_original, y_pred)
correlation = np.corrcoef(y_test_original.flatten(), y_pred.flatten())[0, 1]

print(f"Transformer RMSE: {rmse}, MAE: {mae}, R²: {r2}, Correlation: {correlation}")

Transformer RMSE: 30.075539661617047, MAE: 23.628387451171875, R²: 0.26948028802871704, Correlation: 0.5294751021921705
