# Notebook Content
In this notebook the 3 datasets are being setup and tested using the GraphSAGE model

## 1. Import packages
We will use _PyTorch_ as the main Neural Networks package, and we will add _PyTorch Geometric_ as a utility package that implements Graph functions

In [1]:
from graphsage import *
import pandas as pd

## 1.1. Read input data
Read the node features and edge list from the respective files.
Construct a Data object from the features and edge list and split the data into train/val/test sets.

In [2]:
node_features = pd.read_pickle('data/dataset3.pkl')
edge_list = pd.read_pickle('data/graph_weighted_edgeList_61.pkl')
data = graph_to_data_object(node_features, edge_list, 0.01, 0.05, 0.1)
print(f"\nNodes: {data.x.shape[0]}, Edges: {int(data.edge_index.shape[1] / 2)}, Node Features: {data.num_features}")


Train Nodes: 190, Val Nodes: 950, Test Nodes: 1901

Train Set Class Distribution: Left: 63, Middle: 63, Right: 64
Val Set Class Distribution: Left: 317, Middle: 316, Right: 317
Test Set Class Distribution: Left: 635, Middle: 633, Right: 633

Nodes: 23607, Edges: 253528, Node Features: 17


## 1.2. Grid search GraphSAGE Models

In [3]:
start_time = time.time()

param_grid = [
  {'architecture': ['two-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'batch': [16], 'aggr': ['mean'], 'proj': [False], 'epochs': [200], 'dim_h1': [8], 'dim_h2': [0]},
  {'architecture': ['two-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'batch': [16], 'aggr': ['max'], 'proj': [True], 'epochs': [200], 'dim_h1': [8], 'dim_h2': [0]},
  {'architecture': ['three-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'batch': [16], 'aggr': ['mean'], 'proj': [False], 'epochs': [200], 'dim_h1': [64], 'dim_h2': [8]},
  {'architecture': ['three-layer'], 'lr': [1e-3, 1e-4], 'l2': [5e-3, 5e-4], 'batch': [16], 'aggr': ['max'], 'proj': [True], 'epochs': [200], 'dim_h1': [64], 'dim_h2': [8]},
]

best_model, best_config = grid_search_cv(data, 3, param_grid)
print(f"\nRun time: {time.time() - start_time} seconds")
print(f"Best model config: {best_config}")

print(f'\nGraphSAGE Dataset Test F1 score: {test_f1(best_model, data, data.test_mask):.2f}')
print(f'\nGraphSAGE Dataset Test AUC score: {test_auc(best_model, data, data.test_mask):.2f}')

Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.001, 'l2': 0.005, 'aggr': 'mean', 'proj': False, 'epochs': 200, 'dim_h1': 8, 'dim_h2': 0}, Run time: 38 seconds
Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.001, 'l2': 0.0005, 'aggr': 'mean', 'proj': False, 'epochs': 200, 'dim_h1': 8, 'dim_h2': 0}, Run time: 38 seconds
Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.0001, 'l2': 0.005, 'aggr': 'mean', 'proj': False, 'epochs': 200, 'dim_h1': 8, 'dim_h2': 0}, Run time: 38 seconds
Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.0001, 'l2': 0.0005, 'aggr': 'mean', 'proj': False, 'epochs': 200, 'dim_h1': 8, 'dim_h2': 0}, Run time: 38 seconds
Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.001, 'l2': 0.005, 'aggr': 'max', 'proj': True, 'epochs': 200, 'dim_h1': 8, 'dim_h2': 0}, Run time: 43 seconds
Config: {'architecture': 'two-layer', 'batch': 16, 'lr': 0.001, 'l2': 0.0005, 'aggr': 'max', 'proj': True, 'epochs': 200, 'dim_h1': 8, 'dim_h2'

## 1.3. Train and test the best GraphSAGE Model

In [4]:
best_config['epochs'] = 2000

model = GraphSAGE(best_config, data.num_features, 3)
model.fit(data, True)

data.to(model.device)

model.eval()
_, out = model(data.x, data.edge_index)

print(f'\nGraphSAGE Dataset Test F1 score: {test_f1(model, data, data.test_mask):.5f}')
print(f'\nGraphSAGE Dataset Test AUC score: {test_auc(model, data, data.test_mask):.5f}')

Epoch  10 | Train Loss: 0.091 | Train F1: 0.02
Epoch  20 | Train Loss: 0.090 | Train F1: 0.03
Epoch  30 | Train Loss: 0.090 | Train F1: 0.03
Epoch  40 | Train Loss: 0.091 | Train F1: 0.03
Epoch  50 | Train Loss: 0.093 | Train F1: 0.03
Epoch  60 | Train Loss: 0.092 | Train F1: 0.03
Epoch  70 | Train Loss: 0.094 | Train F1: 0.02
Epoch  80 | Train Loss: 0.092 | Train F1: 0.03
Epoch  90 | Train Loss: 0.088 | Train F1: 0.03
Epoch 100 | Train Loss: 0.092 | Train F1: 0.03
Epoch 110 | Train Loss: 0.092 | Train F1: 0.02
Epoch 120 | Train Loss: 0.092 | Train F1: 0.02
Epoch 130 | Train Loss: 0.089 | Train F1: 0.03
Epoch 140 | Train Loss: 0.089 | Train F1: 0.03
Epoch 150 | Train Loss: 0.088 | Train F1: 0.04
Epoch 160 | Train Loss: 0.089 | Train F1: 0.03
Epoch 170 | Train Loss: 0.085 | Train F1: 0.04
Epoch 180 | Train Loss: 0.088 | Train F1: 0.04
Epoch 190 | Train Loss: 0.090 | Train F1: 0.03
Epoch 200 | Train Loss: 0.087 | Train F1: 0.04
Epoch 210 | Train Loss: 0.088 | Train F1: 0.03
Epoch 220 | T