# Notebook Content
In this notebook the 3 datasets are being setup and tested using the GAT model

## 1. Import packages
We will use _PyTorch_ as the main Neural Networks package, and we will add _PyTorch Geometric_ as a utility package that implements functions

In [1]:
from gat import *
import pandas as pd
from utils import *

## 1.1. Read input data
Read the node features and edge list from the respective files.
Construct a Data object from the features and edge list and split the data into train/val/test sets.

In [2]:
node_features = pd.read_pickle('data/dataset3.pkl')
edge_list = pd.read_pickle('data/graph_weighted_edgeList_61.pkl')
data = graph_to_data_object(node_features, edge_list, 0.85, 0.05, 0.1)
print(f"\nNodes: {data.x.shape[0]}, Edges: {int(data.edge_index.shape[1] / 2)}, Node Features: {data.num_features}")


Train Nodes: 16076, Val Nodes: 1102, Test Nodes: 1838

Train Set Class Distribution: Left: 9336, Middle: 805, Right: 5935
Val Set Class Distribution: Left: 836, Middle: 152, Right: 114
Test Set Class Distribution: Left: 634, Middle: 570, Right: 634

Nodes: 23607, Edges: 253528.0, Node Features: 17


## 1.2. Grid search GAT Models

In [3]:
start_time = time.time()

param_grid = [
  {'architecture': ['two-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'epochs': [500], 'dim_h1': [8], 'dim_h2': [0]},
  {'architecture': ['three-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'epochs': [500], 'dim_h1': [64], 'dim_h2': [8]},
]

best_model, best_config = grid_search_cv(data, 3, param_grid)
print(f"\nRun time: {time.time() - start_time} seconds")
print(f"\nBest model config: {best_config}")

print(f'\nGAT Dataset Test F1 score: {test_f1(best_model, data, data.test_mask):.5f}')
print(f'\nGAT Dataset Test AUC score: {test_auc(best_model, data, data.test_mask):.5f}\n')

Config: {'architecture': 'two-layer', 'lr': 0.001, 'l2': 0.005, 'epochs': 500, 'dim_h1': 8, 'dim_h2': 0}, Run time: 15 seconds
Config: {'architecture': 'two-layer', 'lr': 0.001, 'l2': 0.0005, 'epochs': 500, 'dim_h1': 8, 'dim_h2': 0}, Run time: 13 seconds
Config: {'architecture': 'two-layer', 'lr': 0.0001, 'l2': 0.005, 'epochs': 500, 'dim_h1': 8, 'dim_h2': 0}, Run time: 13 seconds
Config: {'architecture': 'two-layer', 'lr': 0.0001, 'l2': 0.0005, 'epochs': 500, 'dim_h1': 8, 'dim_h2': 0}, Run time: 14 seconds
Config: {'architecture': 'three-layer', 'lr': 0.001, 'l2': 0.005, 'epochs': 500, 'dim_h1': 64, 'dim_h2': 8}, Run time: 25 seconds
Config: {'architecture': 'three-layer', 'lr': 0.001, 'l2': 0.0005, 'epochs': 500, 'dim_h1': 64, 'dim_h2': 8}, Run time: 25 seconds
Config: {'architecture': 'three-layer', 'lr': 0.0001, 'l2': 0.005, 'epochs': 500, 'dim_h1': 64, 'dim_h2': 8}, Run time: 26 seconds
Config: {'architecture': 'three-layer', 'lr': 0.0001, 'l2': 0.0005, 'epochs': 500, 'dim_h1': 64,

## 1.3. Train and test the best GAT Model

In [4]:
best_config['epochs'] = 10000

model = GAT(best_config, data.num_features, 3)
model.fit(data, True)

data.to(model.device)

model.eval()
_, out = model(data.x, data.edge_index)

print(f'\nGAT Dataset Test F1 score: {test_f1(model, data, data.test_mask):.5f}')
print(f'\nGAT Dataset Test AUC score: {test_auc(model, data, data.test_mask):.5f}')

Epoch   0 | Train Loss: 1.295 | Train F1: 0.050074645434187603:.2f
Epoch  10 | Train Loss: 1.291 | Train F1: 0.050074645434187603:.2f
Epoch  20 | Train Loss: 1.287 | Train F1: 0.050074645434187603:.2f
Epoch  30 | Train Loss: 1.283 | Train F1: 0.050074645434187603:.2f
Epoch  40 | Train Loss: 1.279 | Train F1: 0.050074645434187603:.2f
Epoch  50 | Train Loss: 1.275 | Train F1: 0.050074645434187603:.2f
Epoch  60 | Train Loss: 1.271 | Train F1: 0.050074645434187603:.2f
Epoch  70 | Train Loss: 1.267 | Train F1: 0.050074645434187603:.2f
Epoch  80 | Train Loss: 1.263 | Train F1: 0.050074645434187603:.2f
Epoch  90 | Train Loss: 1.259 | Train F1: 0.050074645434187603:.2f
Epoch 100 | Train Loss: 1.255 | Train F1: 0.050074645434187603:.2f
Epoch 110 | Train Loss: 1.251 | Train F1: 0.050074645434187603:.2f
Epoch 120 | Train Loss: 1.247 | Train F1: 0.050074645434187603:.2f
Epoch 130 | Train Loss: 1.244 | Train F1: 0.050074645434187603:.2f
Epoch 140 | Train Loss: 1.240 | Train F1: 0.050074645434187603