In [40]:
import os
import sys
from pathlib import Path
sys.path.append(str(Path(os.getcwd()).resolve().parent))

import math
import numpy as np
import pandas as pd
import numpy.linalg as la
from utils.load_data import load_data
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from statsmodels.tsa.arima.model import ARIMA
from tqdm import tqdm
from tabulate import tabulate

import warnings
warnings.filterwarnings("ignore")


# Предобработка данных
def preprocess_data(data, time_len, rate, seq_len, pre_len):
    data = np.array(data)
    train_size = int(time_len * rate)
    train_data = data[:train_size]
    test_data = data[train_size:time_len]

    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[0: seq_len])
        trainY.append(a[seq_len: seq_len + pre_len])
    for i in range(len(test_data) - seq_len - pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[0: seq_len])
        testY.append(b[seq_len: seq_len + pre_len])
    return np.array(trainX), np.array(trainY), np.array(testX), np.array(testY)

# Метрики
def evaluation(a, b):
    rmse = math.sqrt(mean_squared_error(a, b))
    mae = mean_absolute_error(a, b)
    F_norm = la.norm(a - b) / la.norm(a)
    r2 = 1 - ((a - b) ** 2).sum() / ((a - a.mean()) ** 2).sum()
    var = 1 - (np.var(a - b)) / np.var(a)
    return rmse, mae, 1 - F_norm, r2, var

In [41]:
# Модель HA
def run_ha(data, pre_len):
    """
    Простая модель HA (Historical Average).

    Параметры:
    - data: данные в формате [ts, nodes].
    - pre_len: длина предсказания.

    Возвращает:
    - Список метрик: ['HA', rmse, mae, accuracy, r2, var].
    """
    historical_data = data[:-pre_len, :]  # Все данные, кроме последних pre_len
    ha_prediction = np.mean(historical_data, axis=0)  # Среднее по временной оси
    result = np.tile(ha_prediction, (pre_len, 1))  # Повторяем предсказание для pre_len шагов

    testY = data[-pre_len:, :]  # Реальные значения для сравнения
    rmse, mae, accuracy, r2, var = evaluation(testY, result)  # Расчет метрик

    return ['HA', rmse, mae, accuracy, r2, var]

# Модель SVR
def run_svr(trainX, trainY, testX, testY, seq_len, pre_len, num_nodes):
    result = []
    
    for i in tqdm(range(num_nodes), desc="Processing SVR"):
        # Преобразование входных и выходных данных
        a_X = trainX[:, :, i].reshape(-1, seq_len)  # Входные данные
        a_Y = trainY[:, :, i].reshape(-1, pre_len).mean(axis=1)  # Усреднение по следующим шагам
        
        t_X = testX[:, :, i].reshape(-1, seq_len)  # Данные для предсказания

        # Инициализация и обучение модели
        svr_model = SVR(kernel='linear')
        svr_model.fit(a_X, a_Y)

        # Предсказания для каждого шага
        pre = svr_model.predict(t_X)
        
        # Рекурсивное обновление предсказаний для нескольких шагов (если pre_len > 1)
        if pre_len > 1:
            pred_sequence = pre[:, None]
            for j in range(1, pre_len):
                t_X_new = np.roll(t_X, shift=-1, axis=0)
                t_X_new[-1, :] = pred_sequence[-1, :]
                pred = svr_model.predict(t_X_new)
                pred_sequence = np.concatenate([pred_sequence, pred[:, None]], axis=1)
            pre = pred_sequence.mean(axis=1)  # Усреднение предсказаний, если требуется
        
        # Расширение предсказаний до нужной размерности
        pre = np.tile(pre[:, None], (1, pre_len))  
        result.append(pre)

    # Конвертация в нужную форму
    result = np.array(result).transpose(1, 2, 0).reshape(-1, num_nodes) 
    testY = testY.reshape(-1, num_nodes) 

    # Оценка модели
    rmse, mae, accuracy, r2, var = evaluation(testY, result)
    return ['SVR', rmse, mae, accuracy, r2, var]


# Модель ARIMA
# Модель ARIMA
def run_arima(data, pre_len, num_nodes):
    rmse, mae, accuracy, r2, var = [], [], [], [], []
    
    for i in tqdm(range(num_nodes), desc="Processing ARIMA"):
        # Получаем временной ряд для текущего узла
        ts = data[:, i]
        
        # Убираем последние `pre_len` шагов из обучающего ряда
        ts_train = ts[:-pre_len]  # Все, кроме последних pre_len шагов
        ts_test = ts[-pre_len:]   # Последние pre_len шагов для тестирования
        
        # Логарифмируем временной ряд для стабильности модели
        ts_log = np.log1p(ts_train)
        ts_series = pd.Series(ts_log)
        
        try:
            # Обучаем модель ARIMA на всем обучающем ряду
            model = ARIMA(ts_series, order=(1, 0, 0))
            properModel = model.fit()

            # Предсказание на следующие `pre_len` шагов
            predict_ts = properModel.predict(start=len(ts_series), end=len(ts_series) + pre_len - 1, dynamic=True)
            log_recover = np.expm1(predict_ts)  # Преобразуем обратно в исходную шкалу

            # Оценка ошибки предсказания
            er_rmse, er_mae, er_acc, r2_score, var_score = evaluation(ts_test, log_recover)

        except Exception as e:
            er_rmse = er_mae = er_acc = r2_score = var_score = np.nan

        rmse.append(er_rmse)
        mae.append(er_mae)
        accuracy.append(er_acc)
        r2.append(r2_score)
        var.append(var_score)

    # Возвращаем усредненные метрики
    accuracy = np.clip(accuracy, 0, None)
    return ['ARIMA', np.nanmean(rmse), np.nanmean(mae), np.nanmean(accuracy), np.nanmean(r2), np.nanmean(var)]


# METR-LA

In [48]:
data_dir = '../data/all_data/METR-LA'
metr_la, metadata, adj_matrix = load_data(data_dir)
metr_la = metr_la[:, :12, 0]

In [51]:
# Основные параметры
time_len = metr_la.shape[0]
num_nodes = metr_la.shape[1]
train_rate = 0.8
seq_len = 12
pre_len_list = [3, 6, 12]  # Шаги предсказания

# Словарь для хранения результатов
results_dict = {pre_len: [] for pre_len in pre_len_list}

# Запуск для каждого шага предсказания
for pre_len in pre_len_list:
    trainX, trainY, testX, testY = preprocess_data(metr_la, time_len, train_rate, seq_len, pre_len)
    
    # HA
    results_dict[pre_len].append(run_ha(metr_la, pre_len))
    # SVR
    # results_dict[pre_len].append(run_svr(trainX, trainY, testX, testY, seq_len, pre_len, num_nodes))
    # ARIMA
    results_dict[pre_len].append(run_arima(metr_la, pre_len, num_nodes))

# Вывод результатов
for pre_len, results in results_dict.items():
    print(f"\nResults for pre_len={pre_len}:")
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

Processing ARIMA:   0%|          | 0/12 [00:00<?, ?it/s]

Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.26it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.75it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.78it/s]


Results for pre_len=3:
Method       RMSE      MAE    Accuracy        R2        Var
--------  -------  -------  ----------  --------  ---------
HA        8.36438  6.99219    0.871012  -11.2082   -2.73174
ARIMA     6.17932  5.69297    0.904676  -86.6429  -10.2661

Results for pre_len=6:
Method       RMSE     MAE    Accuracy        R2       Var
--------  -------  ------  ----------  --------  --------
HA        8.55453  7.1496    0.868181  -10.1381  -2.62996
ARIMA     8.51614  7.9938    0.868742  -36.4206  -2.86803

Results for pre_len=12:
Method       RMSE      MAE    Accuracy         R2       Var
--------  -------  -------  ----------  ---------  --------
HA         8.3993  6.99218    0.870008   -6.63941  -1.7203
ARIMA     10.4309  9.79772    0.838421  -29.8846   -2.13142





# PEMS-BAY

In [47]:
data_dir = '../data/all_data/PEMS-BAY'
pems_bay, metadata, adj_matrix = load_data(data_dir)
pems_bay = pems_bay[:, :12, 0]

In [50]:
# Основные параметры
time_len = pems_bay.shape[0]
num_nodes = pems_bay.shape[1]
train_rate = 0.8
seq_len = 12
pre_len_list = [3, 6, 12]  # Шаги предсказания

# Словарь для хранения результатов
results_dict = {pre_len: [] for pre_len in pre_len_list}

# Запуск для каждого шага предсказания
for pre_len in pre_len_list:
    trainX, trainY, testX, testY = preprocess_data(pems_bay, time_len, train_rate, seq_len, pre_len)
    
    # HA
    results_dict[pre_len].append(run_ha(pems_bay, pre_len))
    # SVR
    # results_dict[pre_len].append(run_svr(trainX, trainY, testX, testY, seq_len, pre_len, num_nodes))
    # ARIMA
    results_dict[pre_len].append(run_arima(pems_bay, pre_len, num_nodes))

# Вывод результатов
for pre_len, results in results_dict.items():
    print(f"\nResults for pre_len={pre_len}:")
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

Processing ARIMA:   0%|          | 0/12 [00:00<?, ?it/s]

Processing ARIMA: 100%|██████████| 12/12 [00:05<00:00,  2.25it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:04<00:00,  2.47it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.01it/s]


Results for pre_len=3:
Method        RMSE       MAE    Accuracy         R2        Var
--------  --------  --------  ----------  ---------  ---------
HA        4.00398   3.55908     0.939388   -1.50307   0.474659
ARIMA     0.577818  0.518097    0.991202  -28.2      -0.876185

Results for pre_len=6:
Method        RMSE       MAE    Accuracy        R2        Var
--------  --------  --------  ----------  --------  ---------
HA        4.13708   3.66051     0.937462  -1.82843   0.385902
ARIMA     0.854843  0.772709    0.987048  -8.33254  -0.463787

Results for pre_len=12:
Method       RMSE      MAE    Accuracy        R2        Var
--------  -------  -------  ----------  --------  ---------
HA        3.88132  3.38935    0.940983  -1.12122   0.388085
ARIMA     1.73009  1.57327    0.973341  -5.978    -0.547816





# PEMS03

In [60]:
data_dir = '../data/all_data/PEMS03'
pems03, metadata, adj_matrix = load_data(data_dir)
pems03 = pems03[:, :12, 0]

In [55]:
# Основные параметры
time_len = pems03.shape[0]
num_nodes = pems03.shape[1]
train_rate = 0.8
seq_len = 12
pre_len_list = [3, 6, 12]  # Шаги предсказания

# Словарь для хранения результатов
results_dict = {pre_len: [] for pre_len in pre_len_list}

# Запуск для каждого шага предсказания
for pre_len in pre_len_list:
    trainX, trainY, testX, testY = preprocess_data(pems03, time_len, train_rate, seq_len, pre_len)
    
    # HA
    results_dict[pre_len].append(run_ha(pems03, pre_len))
    # SVR
    # results_dict[pre_len].append(run_svr(trainX, trainY, testX, testY, seq_len, pre_len, num_nodes))
    # ARIMA
    results_dict[pre_len].append(run_arima(pems03, pre_len, num_nodes))

# Вывод результатов
for pre_len, results in results_dict.items():
    print(f"\nResults for pre_len={pre_len}:")
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

Processing ARIMA: 100%|██████████| 12/12 [00:02<00:00,  4.10it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.30it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:05<00:00,  2.34it/s]


Results for pre_len=3:
Method        RMSE       MAE    Accuracy         R2         Var
--------  --------  --------  ----------  ---------  ----------
HA        115.382   107.743    -0.174197   -6.94592  -0.0173395
ARIMA      13.1865   12.2542    0.819217  -25.0549   -0.232226

Results for pre_len=6:
Method        RMSE       MAE    Accuracy        R2        Var
--------  --------  --------  ----------  --------  ---------
HA        109.775   102.624   -0.0575718  -5.60094   0.168007
ARIMA      14.7647   12.6849   0.818581   -2.01118  -0.164438

Results for pre_len=12:
Method       RMSE      MAE    Accuracy        R2         Var
--------  -------  -------  ----------  --------  ----------
HA        99.0735  91.6067    0.158388  -2.70034   0.463252
ARIMA     41.6243  37.587     0.618807  -4.60484  -0.0727865





# PEMS04

In [61]:
data_dir = '../data/all_data/PEMS04'
pems04, metadata, adj_matrix = load_data(data_dir)
pems04 = pems04[:, :12, 0]

In [62]:
# Основные параметры
time_len = pems04.shape[0]
num_nodes = pems04.shape[1]
train_rate = 0.8
seq_len = 12
pre_len_list = [3, 6, 12]  # Шаги предсказания

# Словарь для хранения результатов
results_dict = {pre_len: [] for pre_len in pre_len_list}

# Запуск для каждого шага предсказания
for pre_len in pre_len_list:
    trainX, trainY, testX, testY = preprocess_data(pems04, time_len, train_rate, seq_len, pre_len)
    
    # HA
    results_dict[pre_len].append(run_ha(pems04, pre_len))
    # SVR
    # results_dict[pre_len].append(run_svr(trainX, trainY, testX, testY, seq_len, pre_len, num_nodes))
    # ARIMA
    results_dict[pre_len].append(run_arima(pems04, pre_len, num_nodes))

# Вывод результатов
for pre_len, results in results_dict.items():
    print(f"\nResults for pre_len={pre_len}:")
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

Processing ARIMA: 100%|██████████| 12/12 [00:04<00:00,  2.73it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.01it/s]
Processing ARIMA: 100%|██████████| 12/12 [00:03<00:00,  3.01it/s]


Results for pre_len=3:
Method        RMSE       MAE    Accuracy         R2        Var
--------  --------  --------  ----------  ---------  ---------
HA        126.875   121.943    -0.987286  -16.5322   -0.336638
ARIMA      15.0642   13.6095    0.705606   -6.69337  -0.400227

Results for pre_len=6:
Method        RMSE        MAE    Accuracy         R2        Var
--------  --------  ---------  ----------  ---------  ---------
HA        124.074   119.436     -0.865392  -15.0722   -0.179204
ARIMA      12.1685    9.92754    0.78215    -1.63309  -0.543739

Results for pre_len=12:
Method        RMSE       MAE    Accuracy         R2         Var
--------  --------  --------  ----------  ---------  ----------
HA        120       115.588    -0.687947  -12.3567    0.0359209
ARIMA      30.6987   28.6245    0.509471   -9.15834  -0.331052



