# pems-bay.h5

In [2]:
import h5py
import pickle
import folium
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [3]:
with h5py.File('data/PEMS-BAY/pems-bay.h5', 'r') as file:

    axis0 = file['speed']['axis0'][:]               # Идентификаторы датчиков
    block0_items = file['speed']['block0_items'][:] # Идентификаторы датчиков
    axis1 = file['speed']['axis1'][:]               # Метки времени
    timestamps = pd.to_datetime(axis1)              # Преобразование меток времени в формат datetime
    speed_data = file['speed']['block0_values'][:]  # Данные замеров скорости

perms_bay = pd.DataFrame(speed_data, index=timestamps, columns=axis0)

In [4]:
perms_bay.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52116 entries, 2017-01-01 00:00:00 to 2017-06-30 23:55:00
Columns: 325 entries, 400001 to 414694
dtypes: float64(325)
memory usage: 129.6 MB


In [5]:
# Открытие .pkl файла
with open('data/PEMS-BAY/adj_mx_bay.pkl', 'rb') as file:
    data = pickle.load(file, encoding='bytes')

In [6]:
node_ids = [x.decode('utf-8') for x in data[0]]                     # Получаем список id узлов из data[0]
adj_matrix = data[2]                                                # Получаем матрицу смежности из data[2]
adj_df = pd.DataFrame(adj_matrix, index=node_ids, columns=node_ids) # Создание DataFrame с использованием id узлов как индексов и названий колонок

In [7]:
distances_df = pd.read_csv('data/PEMS-BAY/distances_bay_2017.csv', header=None)
locations_df = pd.read_csv('data/PEMS-BAY/graph_sensor_locations_bay.csv', header=None)

In [8]:
distances_df.columns = ['from', 'to', 'distance']

# Model


In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

# 4. Генерация данных
def generate_data(num_nodes, time_steps):
    # Матрица смежности
    A = np.random.rand(num_nodes, num_nodes)
    A = (A > 0.9).astype(np.float32)  # Случайный граф с плотностью 0.1
    np.fill_diagonal(A, 1)  # Убедимся, что узлы связаны сами с собой
    A = torch.tensor(A, dtype=torch.float32)

    # Временной ряд средней скорости
    X = np.random.rand(time_steps, num_nodes, 1).astype(np.float32)  # (time_steps, num_nodes, 1)
    X = torch.tensor(X, dtype=torch.float32)

    return A, X

class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.reset_gate = nn.Linear(input_dim + hidden_dim, hidden_dim)
        self.update_gate = nn.Linear(input_dim + hidden_dim, hidden_dim)
        self.new_gate = nn.Linear(input_dim + hidden_dim, hidden_dim)

    def forward(self, x, h):
        # x: (batch_size, num_nodes, input_dim)
        # h: (batch_size, num_nodes, hidden_dim)
        combined = torch.cat((x, h), dim=2)
        r = torch.sigmoid(self.reset_gate(combined))
        z = torch.sigmoid(self.update_gate(combined))
        combined_r = torch.cat((x, r * h), dim=2)
        n = torch.tanh(self.new_gate(combined_r))
        h_new = (1 - z) * h + z * n
        return h_new

class GCN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCN, self).__init__()
        self.weight = nn.Parameter(torch.randn(input_dim, output_dim))

    def forward(self, A, X):
        # A: (num_nodes, num_nodes)
        # X: (batch_size, num_nodes, input_dim)
        # Нормализация матрицы смежности
        D = torch.sum(A, dim=1)
        D_inv_sqrt = torch.diag(torch.pow(D, -0.5))
        A_norm = torch.mm(torch.mm(D_inv_sqrt, A), D_inv_sqrt)
        
        # Применяем GCN свертку
        output = torch.matmul(A_norm, X)
        output = torch.matmul(output, self.weight)
        return output

class GRU_GCN_Decoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, window_size, prediction_size):
        super(GRU_GCN_Decoder, self).__init__()
        self.gru = GRU(input_dim, hidden_dim)
        self.gcn = GCN(hidden_dim, hidden_dim)
        self.linear_decoder = nn.Linear(hidden_dim, prediction_size)
        self.hidden_dim = hidden_dim
        self.window_size = window_size

    def forward(self, A, X, h):
        # A: (num_nodes, num_nodes)
        # X: (batch_size, window_size, num_nodes, input_dim)
        # h: (batch_size, num_nodes, hidden_dim)
        batch_size, _, num_nodes, _ = X.shape
        
        # Проход GRU по окну
        for t in range(self.window_size):
            x_t = X[:, t, :, :]  # (batch_size, num_nodes, input_dim)
            h = self.gru(x_t, h)  # (batch_size, num_nodes, hidden_dim)

        # Проход GCN
        gcn_output = self.gcn(A, h)  # (batch_size, num_nodes, hidden_dim)

        # Декодер
        output = self.linear_decoder(gcn_output)  # (batch_size, num_nodes, prediction_size)
        return output, h

class TrafficDataset(Dataset):
    def __init__(self, A, X, window_size, prediction_size):
        self.A = A
        self.X = X
        self.window_size = window_size
        self.prediction_size = prediction_size

    def __len__(self):
        return len(self.X) - self.window_size - self.prediction_size + 1

    def __getitem__(self, idx):
        x = self.X[idx:idx + self.window_size]
        y = self.X[idx + self.window_size:idx + self.window_size + self.prediction_size]
        return self.A, x, y

def train_model(model, dataloader, epochs, lr, num_nodes, device):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    model = model.to(device)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        # Инициализируем h с правильной размерностью
        h = torch.zeros(1, num_nodes, model.hidden_dim, device=device)

        for A, x, y in dataloader:
            # Перемещаем данные на устройство
            A = A.to(device)
            x = x.to(device)
            y = y.to(device)
            
            optimizer.zero_grad()
            output, h = model(A, x.unsqueeze(0), h)
            loss = criterion(output, y.unsqueeze(0))
            loss.backward()
            optimizer.step()
            
            # Отсоединяем h от графа вычислений
            h = h.detach()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataloader)}")

# Параметры
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_nodes = 50
time_steps = 1000
input_dim = 1
hidden_dim = 64
output_dim = 1
window_size = 12
prediction_size = 3
epochs = 10
lr = 0.001
batch_size = 32

# Генерация данных
A, X = generate_data(num_nodes, time_steps)
A = A.to(device)
X = X.to(device)

# Создание датасета и загрузчика данных
dataset = TrafficDataset(A, X, window_size, prediction_size)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Инициализация модели
model = GRU_GCN_Decoder(input_dim, hidden_dim, output_dim, window_size, prediction_size)

# Обучение модели
train_model(model, dataloader, epochs, lr, num_nodes, device)

RuntimeError: Sizes of tensors must match except in dimension 2. Expected size 32 but got size 1 for tensor number 1 in the list.

In [12]:
for A, x, y in dataloader:
    print(f'{A.shape, x.shape, y.shape = }')
    break

A.shape, x.shape, y.shape = (torch.Size([32, 50, 50]), torch.Size([32, 12, 50, 1]), torch.Size([32, 3, 50, 1]))
