In [1]:
import h5py
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from statsmodels.tsa.arima.model import ARIMA
import math
import numpy.linalg as la
from tqdm import tqdm
from tabulate import tabulate

# Загрузка данных
with h5py.File('../data/raw_data/METR-LA/METR-LA.h5', 'r') as file:
    axis0 = file['df']['axis0'][:]  # Идентификаторы датчиков
    axis1 = file['df']['axis1'][:]  # Метки времени
    timestamps = pd.to_datetime(axis1)  # Преобразование меток времени в формат datetime
    df_data = file['df']['block0_values'][:]  # Данные замеров скорости

axis0 = [x.decode('utf-8') for x in axis0]
metr_la = pd.DataFrame(df_data, index=timestamps, columns=axis0)
metr_la = metr_la.iloc[:, :2]

# Загрузка матрицы смежности
with open('../data/raw_data/METR-LA/adj_METR-LA.pkl', 'rb') as file:
    data = pickle.load(file, encoding='bytes')

node_ids = [x.decode('utf-8') for x in data[0]]  # Получаем список id узлов из data[0]
adj_matrix = data[2]  # Получаем матрицу смежности из data[2]
metr_la_adj = pd.DataFrame(adj_matrix, index=node_ids, columns=node_ids)  # Создание DataFrame

metr_la

Unnamed: 0,773869,767541
2012-03-01 00:00:00,64.375000,67.625000
2012-03-01 00:05:00,62.666667,68.555556
2012-03-01 00:10:00,64.000000,63.750000
2012-03-01 00:15:00,0.000000,0.000000
2012-03-01 00:20:00,0.000000,0.000000
...,...,...
2012-06-27 23:35:00,65.000000,65.888889
2012-06-27 23:40:00,61.375000,65.625000
2012-06-27 23:45:00,67.000000,59.666667
2012-06-27 23:50:00,66.750000,62.250000


In [4]:
import h5py
import pickle
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.svm import SVR
from statsmodels.tsa.arima.model import ARIMA
import math
import numpy.linalg as la
from tqdm import tqdm
from tabulate import tabulate

import warnings
warnings.filterwarnings("ignore")

# Загрузка данных
with h5py.File('../data/raw_data/METR-LA/METR-LA.h5', 'r') as file:
    axis0 = file['df']['axis0'][:]  # Идентификаторы датчиков
    axis1 = file['df']['axis1'][:]  # Метки времени
    timestamps = pd.to_datetime(axis1)  # Преобразование меток времени в формат datetime
    df_data = file['df']['block0_values'][:]  # Данные замеров скорости

axis0 = [x.decode('utf-8') for x in axis0]
metr_la = pd.DataFrame(df_data, index=timestamps, columns=axis0)
metr_la = metr_la.iloc[:2016, :]

# Загрузка матрицы смежности
with open('../data/raw_data/METR-LA/adj_METR-LA.pkl', 'rb') as file:
    data = pickle.load(file, encoding='bytes')

node_ids = [x.decode('utf-8') for x in data[0]]  # Получаем список id узлов из data[0]
adj_matrix = data[2]  # Получаем матрицу смежности из data[2]
metr_la_adj = pd.DataFrame(adj_matrix, index=node_ids, columns=node_ids)  # Создание DataFrame

# Предобработка данных
def preprocess_data(data, time_len, rate, seq_len, pre_len):
    data = np.array(data)
    train_size = int(time_len * rate)
    train_data = data[:train_size]
    test_data = data[train_size:time_len]

    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[0: seq_len])
        trainY.append(a[seq_len: seq_len + pre_len])
    for i in range(len(test_data) - seq_len - pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[0: seq_len])
        testY.append(b[seq_len: seq_len + pre_len])
    return np.array(trainX), np.array(trainY), np.array(testX), np.array(testY)

# Метрики
def evaluation(a, b):
    rmse = math.sqrt(mean_squared_error(a, b))
    mae = mean_absolute_error(a, b)
    F_norm = la.norm(a - b) / la.norm(a)
    r2 = 1 - ((a - b) ** 2).sum() / ((a - a.mean()) ** 2).sum()
    var = 1 - (np.var(a - b)) / np.var(a)
    return rmse, mae, 1 - F_norm, r2, var

# Основные параметры
time_len = metr_la.shape[0]
num_nodes = metr_la.shape[1]
train_rate = 0.8
seq_len = 12
pre_len = 3
trainX, trainY, testX, testY = preprocess_data(metr_la.values, time_len, train_rate, seq_len, pre_len)
method = 'HA'  # HA or SVR or ARIMA

# Модель HA
if method == 'HA':
    result = []
    for i in tqdm(range(len(testX)), desc="Processing HA"):
        a = np.array(testX[i])
        tempResult = []

        a1 = np.mean(a, axis=0)
        tempResult.append(a1)
        a = a[1:]
        a = np.append(a, [a1], axis=0)
        a1 = np.mean(a, axis=0)
        tempResult.append(a1)
        a = a[1:]
        a = np.append(a, [a1], axis=0)
        a1 = np.mean(a, axis=0)
        tempResult.append(a1)

        result.append(tempResult)
    result1 = np.array(result)
    result1 = np.reshape(result1, [-1, num_nodes])
    testY1 = np.array(testY)
    testY1 = np.reshape(testY1, [-1, num_nodes])
    rmse, mae, accuracy, r2, var = evaluation(testY1, result1)
    results = [['HA', rmse, mae, accuracy, r2, var]]
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

# Модель SVR
method = 'SVR'
if method == 'SVR':
    total_rmse, total_mae, total_acc, result = [], [], [], []
    for i in tqdm(range(num_nodes), desc="Processing SVR"):
        data1 = np.array(metr_la)  # Заменяем np.mat на np.array
        a = data1[:, i]
        a_X, a_Y, t_X, t_Y = preprocess_data(a, time_len, train_rate, seq_len, pre_len)
        a_X = np.array(a_X)
        a_X = np.reshape(a_X, [-1, seq_len])
        a_Y = np.array(a_Y)
        a_Y = np.reshape(a_Y, [-1, pre_len])
        a_Y = np.mean(a_Y, axis=1)
        t_X = np.array(t_X)
        t_X = np.reshape(t_X, [-1, seq_len])
        t_Y = np.array(t_Y)
        t_Y = np.reshape(t_Y, [-1, pre_len])

        svr_model = SVR(kernel='linear')
        svr_model.fit(a_X, a_Y)
        pre = svr_model.predict(t_X)
        pre = np.array(pre).reshape(-1, 1)  # Преобразуем в двумерный массив
        pre = pre.repeat(pre_len, axis=1)  # Повторяем по оси 1
        result.append(pre)

    result1 = np.array(result)
    result1 = np.reshape(result1, [num_nodes, -1])
    result1 = np.transpose(result1)
    testY1 = np.array(testY)

    testY1 = np.reshape(testY1, [-1, num_nodes])
    total = np.array(total_acc)  # Заменяем np.mat(total_acc) на np.array(acc1)
    total[total < 0] = 0
    rmse1, mae1, acc1, r2, var = evaluation(testY1, result1)
    results = [['SVR', rmse1, mae1, acc1, r2, var]]
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

# Модель ARIMA
method = 'ARIMA'
if method == 'ARIMA':
    num = metr_la.shape[1]
    rmse, mae, acc, r2, var, pred, ori = [], [], [], [], [], [], []
    for i in tqdm(range(num), desc="Processing ARIMA"):
        ts = metr_la.iloc[:, i]
        ts_log = np.log(ts + 1e-10)
        ts_log = np.array(ts_log, dtype=np.float32)
        where_are_inf = np.isinf(ts_log)
        ts_log[where_are_inf] = 0
        ts_log = pd.Series(ts_log)
        ts_log.index = metr_la.index

        model = ARIMA(ts_log, order=[1, 0, 0])
        properModel = model.fit()
        predict_ts = properModel.predict(4, dynamic=True)
        log_recover = np.exp(predict_ts)
        ts = ts[log_recover.index]
        er_rmse, er_mae, er_acc, r2_score, var_score = evaluation(ts, log_recover)
        rmse.append(er_rmse)
        mae.append(er_mae)
        acc.append(er_acc)
        r2.append(r2_score)
        var.append(var_score)
    acc1 = np.array(acc)  # Заменяем np.mat(acc) на np.array(acc)
    acc1[acc1 < 0] = 0
    results = [['ARIMA', np.mean(rmse), np.mean(mae), np.mean(acc1), np.mean(r2), np.mean(var)]]
    print(tabulate(results, headers=["Method", "RMSE", "MAE", "Accuracy", "R2", "Var"]))

Processing HA: 100%|██████████| 389/389 [00:00<00:00, 11123.50it/s]


Method       RMSE      MAE    Accuracy        R2       Var
--------  -------  -------  ----------  --------  --------
HA        14.6614  7.73084    0.740025  0.484554  0.484592


Processing SVR: 100%|██████████| 207/207 [25:58<00:00,  7.53s/it] 


Method       RMSE      MAE    Accuracy        R2       Var
--------  -------  -------  ----------  --------  --------
SVR       14.5101  6.56491    0.742708  0.495136  0.495549


Processing ARIMA: 100%|██████████| 207/207 [00:44<00:00,  4.66it/s]

Method       RMSE      MAE    Accuracy        R2         Var
--------  -------  -------  ----------  --------  ----------
ARIMA     32.8929  31.0584     0.43296  -4.36163  -0.0171074





In [5]:
# Method       RMSE      MAE    Accuracy        R2       Var
# HA        14.6614  7.73084    0.740025  0.484554  0.484592
# SVR       14.5101  6.56491    0.742708  0.495136  0.495549
# ARIMA     32.8929  31.0584     0.43296  -4.36163  -0.0171074
