In [5]:
import torch
import matplotlib.pyplot as plt
from statistics import mean
from tqdm import tqdm
import pandas as pd
import numpy as np
from datetime import datetime
%matplotlib inline

In [6]:
from graph_generation_trade import dict_snapshots_trade, temporal_data_trade

snapshots_trade = list(dict_snapshots_trade.values())

from models_and_utils import (
    data_split, train_snapshots, test_snapshots, apply_negative_sampling
)

In [7]:
def train_snapshots(model, graph_snapshots_train, optimiser, epochs, neg_sampling_ratio=0.0):

    if neg_sampling_ratio > 0.0:
        graph_snapshots_train_neg = []
        for snapshot in graph_snapshots_train:
            nodes = torch.arange(snapshot.num_nodes)
            all_possible_edges = torch.cartesian_prod(nodes, nodes)
            snapshot_neg = apply_negative_sampling(snapshot, all_possible_edges, ratio=neg_sampling_ratio)
            graph_snapshots_train_neg.append(snapshot_neg)

        graph_snapshots_train = graph_snapshots_train_neg

    model.train()
    loss_epochs_dict = {}

    for epoch in tqdm(range(epochs)):
        losses_list = []
        for snapshot in graph_snapshots_train:
            edge_weights_pred = model(snapshot)
            loss = torch.mean((edge_weights_pred - snapshot.edge_weights)**2)
            loss.backward()
            losses_list.append(loss.item())
            optimiser.step()
            optimiser.zero_grad()

        # if epoch % 10 == 0:
        #     plt.scatter(edge_weights_pred.detach().numpy(), snapshot.edge_weights.detach().numpy())
        #     plt.xlabel('pred')
        #     plt.ylabel('true')
        #     plt.xlim(left=0)
        #     plt.show()

        avg_epoch_loss = mean(losses_list)
        loss_epochs_dict[epoch+1] = avg_epoch_loss 

    loss_epochs_df = pd.DataFrame(loss_epochs_dict.items(), columns=['epoch', 'loss'])
    return loss_epochs_df

@torch.no_grad()
def test_snapshots(model, graph_snapshots_test, neg_sampling=False):
    model.eval()
    loss = 0
    i = 0
    for snapshot in graph_snapshots_test:
        print(i)
        i+=1
        if neg_sampling:
                nodes = torch.arange(snapshot.num_nodes)
                all_possible_edges = torch.cartesian_prod(nodes, nodes)
                snapshot = apply_negative_sampling(snapshot, all_possible_edges)
        edge_weights_pred = model(snapshot)
        mse = torch.mean((edge_weights_pred - snapshot.edge_weights)**2)
        loss += mse
    loss = loss / (len(graph_snapshots_test))
    print(f'MSE = {loss.item():.4f}')

In [8]:
from models_and_utils import ModelMPNN

data = snapshots_trade
data_train, data_test = data_split(graph_snapshots=data, split_ratio=0.9)

in_channels = data[0].x.shape[1]
hidden_size = 64
num_nodes = data[0].x.shape[0]
window = 1
dropout_p = 0.2

model1 = ModelMPNN(in_channels, hidden_size, num_nodes, window, dropout_p)

lr = 0.05
epochs = 50
optimiser = torch.optim.Adam(model1.parameters(), lr=lr)

results = train_snapshots(model1, data_train, optimiser, epochs)
# results['loss'].plot()
# plt.show()

# results = test_snapshots(model1, data_test)

  0%|          | 0/50 [00:00<?, ?it/s]


RuntimeError: shape '[-1, 1, 14, 1]' is invalid for input of size 15

In [None]:
snapshots_trade[0].x.shape

torch.Size([14, 1])

In [None]:
from models_and_utils import ModelEVOLVE

data = snapshots_trade
data_train, data_test = data_split(graph_snapshots=data, test_ratio=0.9)

in_channels = hidden_size = data[0].x.shape[1]
num_nodes = data[0].x.shape[0]
window = 1
dropout_p = 0.5

model2 = ModelEVOLVE(num_nodes, hidden_size, in_channels, dropout_p)

lr = 0.05
neg_sampling_rate=0
epochs = 100
optimiser = torch.optim.Adam(model2.parameters(), lr=lr)

results = train_snapshots(model2, data_train, optimiser, epochs, neg_sampling_rate)
results['loss'].plot()
plt.show()

# results = test_snapshots(model2, data_test)

  0%|          | 0/100 [00:00<?, ?it/s]


RuntimeError: Expected hidden size (1, 2, 1), got [1, 1, 1]