In [9]:
import h5py
import pickle
import folium
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F


with h5py.File('../data/raw_data/PEMS-BAY/PEMS-BAY.h5', 'r') as file:

    axis0 = file['speed']['axis0'][:]               # Идентификаторы датчиков
    block0_items = file['speed']['block0_items'][:] # Идентификаторы датчиков
    axis1 = file['speed']['axis1'][:]               # Метки времени
    timestamps = pd.to_datetime(axis1)              # Преобразование меток времени в формат datetime
    speed_data = file['speed']['block0_values'][:]  # Данные замеров скорости

pems_bay = pd.DataFrame(speed_data, index=timestamps, columns=axis0)
pems_bay = pems_bay[:2016]

# Открытие .pkl файла
with open('../data/raw_data/PEMS-BAY/adj_PEMS-BAY.pkl', 'rb') as file:
    data = pickle.load(file, encoding='bytes')
    
node_ids = [x.decode('utf-8') for x in data[0]]                             # Получаем список id узлов из data[0]
adj_matrix = data[2]                                                        # Получаем матрицу смежности из data[2]
pems_bay_adj = pd.DataFrame(adj_matrix, index=node_ids, columns=node_ids)   # Создание DataFrame с использованием id узлов как индексов и названий колонок

In [11]:
pems_bay.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2016 entries, 2017-01-01 00:00:00 to 2017-01-07 23:55:00
Columns: 325 entries, 400001 to 414694
dtypes: float64(325)
memory usage: 5.0 MB


In [13]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima.model import ARIMA


In [58]:
# Метрики

def compute_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return round(mae, 1), round(rmse, 1), round(mape, 1)

# Функции для генерации фичей

def generate_features(data):
    features = data.copy()

    # Скользящее среднее
    rolling_mean = data.rolling(window=3).mean()
    features = pd.concat([features, rolling_mean.add_suffix('_rolling_mean')], axis=1)

    # # Преобразование Фурье (амплитуды)
    # fft = np.fft.fft(data, axis=1)
    # fft_amplitude = pd.DataFrame(np.abs(fft), index=data.index, columns=[f'fft_{i}' for i in range(data.shape[1])])
    # features = pd.concat([features, fft_amplitude], axis=1)

    # # Лаговые признаки
    # for lag in range(1, 4):
    #     lagged = data.shift(lag, axis=1).add_suffix(f'_lag{lag}')
    #     features = pd.concat([features, lagged], axis=1)

    return features

# Разделение данных на train, val, test

def split_data(data, train_ratio=0.7, val_ratio=0.1):
    n = data.shape[0]
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + val_ratio))
    
    train = data.iloc[:train_end, :]
    val = data.iloc[train_end:val_end, :]
    test = data.iloc[val_end:, :]
    
    return train, val, test

In [65]:
# Загрузка данных
adj_matrix = pems_bay_adj.copy()
time_series = pems_bay.copy()

# Генерация фичей
original_features = time_series.copy()
enhanced_features = generate_features(time_series)

# Разделение данных
train_orig, val_orig, test_orig = split_data(original_features)
train_enh, val_enh, test_enh = split_data(enhanced_features)

In [70]:
# Обучение моделей
results = {}

# 1. Historical Average (HA)
def historical_average(data):
    return data.mean(axis=0).values

ha_pred = historical_average(test_orig)
results['HA_original'] = compute_metrics(test_orig.iloc[-1, :], ha_pred)

In [None]:
# 2. ARIMA (на примере одного узла)
def train_arima(data, node_index):
    model = ARIMA(data.iloc[node_index, :-1], order=(5, 1, 0))
    model_fit = model.fit()
    pred = model_fit.forecast(steps=1)
    return pred

node_index = 0
arima_pred = train_arima(train_orig, node_index)
results['ARIMA_original'] = compute_metrics(test_orig.iloc[node_index, -1:], arima_pred)

In [None]:

# 3. SVR
def train_svr(features, target):
    model = SVR(kernel='rbf')
    model.fit(features, target)
    return model

svr_model = train_svr(train_orig.T, val_orig.T.mean(axis=1))
svr_pred = svr_model.predict(test_orig.T)
results['SVR_original'] = compute_metrics(test_orig.iloc[:, -1], svr_pred)

svr_model_enhanced = train_svr(train_enh.T, val_enh.T.mean(axis=1))
svr_pred_enhanced = svr_model_enhanced.predict(test_enh.T)
results['SVR_enhanced'] = compute_metrics(test_enh.iloc[:, -1], svr_pred_enhanced)

# Результаты
print("Results:")
for model, metrics in results.items():
    print(f"{model}: MAE={metrics[0]}, RMSE={metrics[1]}, MAPE={metrics[2]}%")