In [2]:
import pandas as pd
import numpy as np
import os
import typing
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
route_distances = pd.read_csv(
    "PeMSD7_Full/PeMSD7_W_228.csv", header=None
)
speeds_array = pd.read_csv("PeMSD7_Full/PeMSD7_V_228.csv", header=None)


In [None]:
route_distances

In [None]:
speeds_array

In [None]:
class Dataset(object):
    def __init__(self, data, stats):
        self.__data = data
        self.mean = stats['mean']
        self.std = stats['std']

    def get_data(self, type):
        return self.__data[type]

    def get_stats(self):
        return {'mean': self.mean, 'std': self.std}

    def get_len(self, type):
        return len(self.__data[type])

    def z_inverse(self, type):
        return self.__data[type] * self.std + self.mean
    
def seq_gen(len_seq, data_seq, offset, n_frame, n_route, day_slot, C_0=1):
    '''
    Generate data in the form of standard sequence unit.
    :param len_seq: int, the length of target date sequence.
    :param data_seq: np.ndarray, source data / time-series.
    :param offset:  int, the starting index of different dataset type.
    :param n_frame: int, the number of frame within a standard sequence unit,
                         which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min).
    :param n_route: int, the number of routes in the graph.
    :param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default).
    :param C_0: int, the size of input channel.
    :return: np.ndarray, [len_seq, n_frame, n_route, C_0].
    '''
    n_slot = day_slot - n_frame + 1

    tmp_seq = np.zeros((len_seq * n_slot, n_frame, n_route, C_0))
    for i in range(len_seq):
        for j in range(n_slot):
            sta = (i + offset) * day_slot + j
            end = sta + n_frame
            tmp_seq[i * n_slot + j, :, :, :] = np.reshape(data_seq[sta:end, :], [n_frame, n_route, C_0])
    return tmp_seq

def data_gen(file_path, data_config, n_route, n_frame=21, day_slot=288):
    '''
    Source file load and dataset generation.
    :param file_path: str, the file path of data source.
    :param data_config: tuple, the configs of dataset in train, validation, test.
    :param n_route: int, the number of routes in the graph.
    :param n_frame: int, the number of frame within a standard sequence unit,
                         which contains n_his = 12 and n_pred = 9 (3 /15 min, 6 /30 min & 9 /45 min).
    :param day_slot: int, the number of time slots per day, controlled by the time window (5 min as default).
    :return: dict, dataset that contains training, validation and test with stats.
    '''
    n_train, n_val, n_test = data_config
    
    # generate training, validation and test data
    try:
        data_seq = pd.read_csv(file_path, header=None).values
    except FileNotFoundError:
        print(f'ERROR: input file was not found in {file_path}.')

    seq_train = seq_gen(n_train, data_seq, 0, n_frame, n_route, day_slot)
    seq_val = seq_gen(n_val, data_seq, n_train, n_frame, n_route, day_slot)
    seq_test = seq_gen(n_test, data_seq, n_train + n_val, n_frame, n_route, day_slot)

    # x_stats: dict, the stats for the train dataset, including the value of mean and standard deviation.
    x_stats = {'mean': np.mean(seq_train), 'std': np.std(seq_train)}

    # x_train, x_val, x_test: np.array, [sample_size, n_frame, n_route, channel_size].
    x_train = z_score(seq_train, x_stats['mean'], x_stats['std'])
    x_val = z_score(seq_val, x_stats['mean'], x_stats['std'])
    x_test = z_score(seq_test, x_stats['mean'], x_stats['std'])

    x_data = {'train': x_train, 'val': x_val, 'test': x_test}
    dataset = Dataset(x_data, x_stats)
    return dataset

def z_score(x, mean, std):
    '''
    Z-score normalization function: $z = (X - \mu) / \sigma $,
    where z is the z-score, X is the value of the element,
    $\mu$ is the population mean, and $\sigma$ is the standard deviation.
    :param x: np.ndarray, input array to be normalized.
    :param mean: float, the value of mean.
    :param std: float, the value of standard deviation.
    :return: np.ndarray, z-score normalized array.
    '''
    return (x - mean) / std

In [23]:
n_train, n_val, n_test = 34, 5, 5
n, n_his, n_pred = 228, 12, 9
PeMS = data_gen("PeMSD7_Full/PeMSD7_V_228.csv", (n_train, n_val, n_test), n, n_his + n_pred)
print(f'>> Loading dataset with Mean: {PeMS.mean:.2f}, STD: {PeMS.std:.2f}')

>> Loading dataset with Mean: 58.50, STD: 13.73


In [1]:
import torch

print(torch.__version__)

  from .autonotebook import tqdm as notebook_tqdm


1.13.1


In [2]:
torch.cuda.is_available()

True

In [3]:
from torch_geometric.datasets import KarateClub

dataset = KarateClub()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

Dataset: KarateClub():
Number of graphs: 1
Number of features: 34
Number of classes: 4


In [18]:
from torch_geometric_temporal.dataset import METRLADatasetLoader
from torch_geometric_temporal.signal import StaticGraphTemporalSignal

loader = METRLADatasetLoader()

dataset = loader.get_dataset()

In [19]:
dataset

<torch_geometric_temporal.signal.static_graph_temporal_signal.StaticGraphTemporalSignal at 0x1e23f3217f0>

In [1]:
print("Number of samples / sequences: ",  len(set(dataset)))

NameError: name 'dataset' is not defined

In [67]:
import torch
from torch_geometric_temporal.dataset import METRLADatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split

In [68]:
loader = METRLADatasetLoader()

dataset = loader.get_dataset()

train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio = 0.8)

In [69]:

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

In [176]:
class TrafficLSTM(nn.Module):
    def __init__(self, input_size: int, hidden_size:  int, num_layers: int, sequence_len: int):
        super().__init__()
        self.sequence_len = sequence_len
        self.input_size = input_size
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True)
        self.linear1 = nn.Linear(hidden_size*sequence_len, int(sequence_len*input_size/2))
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(int(sequence_len*input_size/2), sequence_len*input_size)

    def forward(self, x):
        batch_size = x.shape[0]
        x, _ = self.lstm(x)
        x = x.reshape(batch_size, -1)
        #print("LSTM output: ", x.shape)
        x = self.linear1(x)
        #print("Linear output: ", x.shape)
        x = self.relu1(x)
        x = self.linear2(x)
        x = x.reshape(batch_size, self.sequence_len, self.input_size)
        return x

In [71]:
temp = next(iter(dataset))
print(temp.edge_attr)
print(temp.edge_index)
print(temp.y.shape)
#print(len(train_dataset.features))
#print(len(train_dataset.features[0]))
features = np.array(train_dataset.features)
print(features.shape)
targets = np.array(train_dataset.targets)
print(targets.shape)


tensor([1.0000, 0.2223, 0.5088,  ..., 0.2779, 0.3204, 1.0000])
tensor([[  0,   0,   0,  ..., 206, 206, 206],
        [  0,  13,  37,  ..., 187, 198, 206]])
torch.Size([207, 12])
(27399, 207, 2, 12)
(27399, 207, 12)


In [140]:
# we want to take in features and make tensors of each point
def create_dataset(train, test):
    train_data_x = np.array(train.features)
    # just get speed
    train_data_x = train_data_x[:,:,0,:]
    N, S, T = train_data_x.shape
    #print(N,S,T)
    #train_data_x = train_data_x.reshape((N, S*T))
    train_data_y = np.array(train.targets)
    #train_data_y = train_data_y.reshape((N, S*T))
    x_train = torch.tensor(train_data_x).swapaxes(1,2)
    y_train = torch.tensor(train_data_y).swapaxes(1,2)
    #x_train = torch.tensor(train_data_x)
    #y_train = torch.tensor(train_data_y)
    print(x_train.shape)
    print(y_train.shape)
    return data.TensorDataset(x_train, y_train)
    

In [178]:
dataset = create_dataset(train_dataset, test_dataset)
loader = data.DataLoader(dataset, batch_size = 8, drop_last=True)
for idx, (label, data1) in enumerate(loader):
    if idx > 0:
        break
    print('label.shape: {}'.format(label.shape))
    print('label: {}'.format(label))
    print('data.shape: {}'.format(data1.shape))
temp = next(iter(loader))
print(len(temp))
print(len(temp[0]))
print(len(temp[0][0]))

torch.Size([27399, 12, 207])
torch.Size([27399, 12, 207])
label.shape: torch.Size([8, 12, 207])
label: tensor([[[ 0.5332,  0.6940,  0.6692,  ...,  0.2796,  0.7620,  0.4095],
         [ 0.4486,  0.7400,  0.5861,  ...,  0.1133,  0.7345,  0.4590],
         [ 0.5146,  0.5022,  0.3167,  ...,  0.3847,  0.8044,  0.4157],
         ...,
         [ 0.4899,  0.3909,  0.4404,  ..., -0.1410,  0.6259,  0.4280],
         [ 0.5751,  0.4981,  0.5696,  ...,  0.3442,  0.6741,  0.3167],
         [ 0.4280,  0.7002,  0.6569,  ...,  0.1683,  0.6383,  0.2884]],

        [[ 0.4486,  0.7400,  0.5861,  ...,  0.1133,  0.7345,  0.4590],
         [ 0.5146,  0.5022,  0.3167,  ...,  0.3847,  0.8044,  0.4157],
         [-2.6522, -2.6522, -2.6522,  ..., -2.6522, -2.6522, -2.6522],
         ...,
         [ 0.5751,  0.4981,  0.5696,  ...,  0.3442,  0.6741,  0.3167],
         [ 0.4280,  0.7002,  0.6569,  ...,  0.1683,  0.6383,  0.2884],
         [ 0.3724,  0.6631,  0.2425,  ..., -0.0235,  0.8115,  0.4899]],

        [[ 0.

In [189]:
import time

def train(model, dataloader, loss_func, device, optimizer):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 500
    start_time = time.time()

    for idx, (label, data1) in enumerate(dataloader):
        label = label.to(device)
        data1 = data1.to(device)
        optimizer.zero_grad()
        
        out = None
        ###########################################################################
        # TODO: compute the logits of the input, get the loss, and do the         #
        # gradient backpropagation.
        ###########################################################################
        out = model(data1)
        if(idx == 0):
            print("input shape: ", data1.shape)
            print("output shape, ", out.shape)
            print("label shape: ", label.shape)
            print("out: ", out[0][0])
            print("label: ", label[0][0])
        loss = loss_func(out, label)
        loss.backward()
        ###########################################################################
        #                             END OF YOUR CODE                            #
        ###########################################################################
        
        optimizer.step()

        
        train_rmse = torch.sqrt(loss)
        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches '
                  '| rmse {:8.3f}'.format(epoch, idx, len(dataloader),
                                              train_rmse))
            total_acc, total_count = 0, 0
            start_time = time.time()

def evaluate(model, dataloader, loss_func, device):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (label, data1) in enumerate(dataloader):
            label = label.to(device)
            data1 = data1.to(device)
            
            ###########################################################################
            # TODO: compute the logits of the input, get the loss.                    #
            ###########################################################################
            logits = model(data1)
            loss = loss_func(logits, label)
            ###########################################################################
            #                             END OF YOUR CODE                            #
            ###########################################################################
            
            total_acc += (logits.argmax(1) == label).sum().item()
            total_count += label.size(0)
    return total_acc/total_count

In [190]:
from torch.utils.data.dataset import random_split
import torch.nn.functional as F
#from torchtext.data.functional import to_map_style_dataset

assert torch.cuda.is_available()
# device = 'cuda'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyper parameters
epochs = 10 # epoch
lr = 0.001 # learning rate
input_size = 207
hidden_size = 32

###########################################################################
# TODO: Deinfe the classifier and loss function.
###########################################################################
model = TrafficLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=3, sequence_len=12)
loss_func = F.mse_loss
###########################################################################
#                             END OF YOUR CODE                            #
###########################################################################

# copy the model to the specified device (GPU)
model = model.to(device)
        
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, 1e-8)
total_accu = None


for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(model, loader, loss_func, device, optimizer)
    #accu_val = evaluate(model, valid_dataloader, loss_func, device)
    # if total_accu is not None and total_accu > accu_val:
    #     scheduler.step()
    # else:
    #     total_accu = accu_val
    # print('-' * 59)
    # print('| end of epoch {:3d} | time: {:5.2f}s | '
    #       'valid accuracy {:8.3f} '.format(epoch,
    #                                        time.time() - epoch_start_time,
    #                                        accu_val))
    # print('-' * 59)

input shape:  torch.Size([8, 12, 207])
output shape,  torch.Size([8, 12, 207])
label shape:  torch.Size([8, 12, 207])
out:  tensor([ 0.0126, -0.0483, -0.0025, -0.0257, -0.0033,  0.0090, -0.0059,  0.0535,
        -0.0059,  0.0056,  0.0214, -0.0115,  0.0307, -0.0194,  0.0170,  0.0071,
         0.0023, -0.0091,  0.0482,  0.0170, -0.0178,  0.0584, -0.0619,  0.0122,
        -0.0243, -0.0016, -0.0361, -0.0636,  0.0320, -0.0056, -0.0523, -0.0033,
         0.0146,  0.0129,  0.0034,  0.0400, -0.0219, -0.0330,  0.0213, -0.0245,
         0.0010,  0.0062,  0.0323,  0.0061,  0.0463, -0.0196, -0.0188, -0.0005,
         0.0099, -0.0384, -0.0645,  0.0310,  0.0042,  0.0107,  0.0180, -0.0139,
         0.0474, -0.0106, -0.0015,  0.0028, -0.0191,  0.0198,  0.0490, -0.0051,
        -0.0134,  0.0129, -0.0093,  0.0222, -0.0005,  0.0027, -0.0068, -0.0042,
        -0.0579,  0.0237, -0.0188,  0.0022,  0.0349, -0.0145,  0.0179,  0.0315,
         0.0265,  0.0104,  0.0573,  0.0308,  0.0401, -0.0157, -0.0336,  0.03