In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error
from torch_geometric.loader import DataLoader

from exploration.dataset import PyGAcademicGraph

from utils import train, evaluate

from exploitation.models import GAT, MPGCN_Net, GCN

In [2]:
batch_size = 1
threshold = None

# train dataset
train_dataset = PyGAcademicGraph(split="train", setting="transductive", sparcify_threshold=threshold)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# val dataset
val_dataset = PyGAcademicGraph(split="val", setting="transductive", sparcify_threshold=threshold)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# test dataset
test_dataset = PyGAcademicGraph(split="test", setting="transductive", sparcify_threshold=threshold)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# number of features
n_features = train_dataset[0].x.shape[1]

print("Number of samples in the train dataset: ", len(train_dataset))
print("Number of samples in the val dataset: ", len(test_dataset))
print("Number of samples in the test dataset: ", len(test_dataset))
print("Output of one sample from the train dataset: ", train_dataset[0])
print("Edge_index :")
print(train_dataset[0].edge_index)
print("Number of features per node: ", n_features)

Number of samples in the train dataset:  9
Number of samples in the val dataset:  9
Number of samples in the test dataset:  9
Output of one sample from the train dataset:  Data(edge_index=[2, 36936], y=[359], x=[359, 8], edge_attr=[36936, 3], domain='Academia', train_mask=[359], val_mask=[359], test_mask=[359], mask=[359])
Edge_index :
tensor([[  0,   0,   0,  ..., 357, 358, 358],
        [  1,  89, 121,  ..., 352,  89, 358]])
Number of features per node:  8


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("\nDevice: ", device)


num_epochs = 20
graph_convolution_no_weights = MPGCN_Net(
    in_channels=n_features,
    hidden_channels=64,
    out_channels=1,
    ).to(device)

loss_fcn = nn.MSELoss()

optimizer = torch.optim.Adam(graph_convolution_no_weights.parameters(), lr=0.005)

epoch_list, GCN_MSE = train(graph_convolution_no_weights, loss_fcn, device, optimizer, num_epochs, train_dataloader, val_dataloader)


Device:  cuda
Epoch 00001 | Loss: 0.3448
MSE: 0.3361
Epoch 00002 | Loss: 0.3448
Epoch 00003 | Loss: 0.3448
Epoch 00004 | Loss: 0.3448
Epoch 00005 | Loss: 0.3448
Epoch 00006 | Loss: 0.3448
MSE: 0.3361
Epoch 00007 | Loss: 0.3448
Epoch 00008 | Loss: 0.3448
Epoch 00009 | Loss: 0.3448
Epoch 00010 | Loss: 0.3448
Epoch 00011 | Loss: 0.3448
MSE: 0.3361
Epoch 00012 | Loss: 0.3448
Epoch 00013 | Loss: 0.3448
Epoch 00014 | Loss: 0.3448
Epoch 00015 | Loss: 0.3448
Epoch 00016 | Loss: 0.3448
MSE: 0.3361
Epoch 00017 | Loss: 0.3448
Epoch 00018 | Loss: 0.3448
Epoch 00019 | Loss: 0.3448
Epoch 00020 | Loss: 0.3448


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("\nDevice: ", device)


num_epochs = 150
graph_convolution_no_weights = GAT(
    input_size=n_features,
    hidden_size=32,
    output_size=1,
    num_layers=1,
    heads=2).to(device)

loss_fcn = nn.MSELoss()

optimizer = torch.optim.Adam(graph_convolution_no_weights.parameters(), lr=0.005)

epoch_list, GCN_MSE = train(graph_convolution_no_weights, loss_fcn, device, optimizer, num_epochs, train_dataloader, val_dataloader)


Device:  cuda
Epoch 00001 | Loss: 43187.3887
MSE: 36131.2617
Epoch 00002 | Loss: 30311.8243
Epoch 00003 | Loss: 19599.6699
Epoch 00004 | Loss: 14692.8077
Epoch 00005 | Loss: 10291.0937
Epoch 00006 | Loss: 6866.1989
MSE: 4436.2729
Epoch 00007 | Loss: 4464.9954
Epoch 00008 | Loss: 3334.9351
Epoch 00009 | Loss: 2987.4729
Epoch 00010 | Loss: 2803.0264
Epoch 00011 | Loss: 2307.8507
MSE: 1744.7559
Epoch 00012 | Loss: 2415.5899
Epoch 00013 | Loss: 2314.9784
Epoch 00014 | Loss: 2158.3478
Epoch 00015 | Loss: 1983.9828
Epoch 00016 | Loss: 1849.4295
MSE: 1196.8413
Epoch 00017 | Loss: 1608.6947
Epoch 00018 | Loss: 922.9636
Epoch 00019 | Loss: 2283.1604
Epoch 00020 | Loss: 616.1161
Epoch 00021 | Loss: 2086.6201
MSE: 436.1094
Epoch 00022 | Loss: 510.7840
Epoch 00023 | Loss: 475.6896
Epoch 00024 | Loss: 401.0600
Epoch 00025 | Loss: 354.5045
Epoch 00026 | Loss: 305.6034
MSE: 215.6701
Epoch 00027 | Loss: 256.3865
Epoch 00028 | Loss: 228.8221
Epoch 00029 | Loss: 188.7701
Epoch 00030 | Loss: 157.7618
Ep

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("\nDevice: ", device)


num_epochs = 50
graph_convolution_no_weights = GCN(
    input_size=n_features,
    hidden_size=64,
    output_size=1).to(device)

loss_fcn = nn.MSELoss()

optimizer = torch.optim.Adam(graph_convolution_no_weights.parameters(), lr=0.005)

epoch_list, GCN_MSE = train(graph_convolution_no_weights, loss_fcn, device, optimizer, num_epochs, train_dataloader, val_dataloader)


Device:  cuda
Epoch 00001 | Loss: 0.3448
MSE: 0.3360
Epoch 00002 | Loss: 0.3428
Epoch 00003 | Loss: 0.2871
Epoch 00004 | Loss: 0.2500
Epoch 00005 | Loss: 0.2454
Epoch 00006 | Loss: 0.2426
MSE: 0.2436
Epoch 00007 | Loss: 0.2414
Epoch 00008 | Loss: 0.2410
Epoch 00009 | Loss: 0.2409
Epoch 00010 | Loss: 0.2408
Epoch 00011 | Loss: 0.2408
MSE: 0.2431
Epoch 00012 | Loss: 0.2407
Epoch 00013 | Loss: 0.2407
Epoch 00014 | Loss: 0.2407
Epoch 00015 | Loss: 0.2407
Epoch 00016 | Loss: 0.2407
MSE: 0.2432
Epoch 00017 | Loss: 0.2407
Epoch 00018 | Loss: 0.2407
Epoch 00019 | Loss: 0.2407
Epoch 00020 | Loss: 0.2407
Epoch 00021 | Loss: 0.2407
MSE: 0.2432
Epoch 00022 | Loss: 0.2407
Epoch 00023 | Loss: 0.2407
Epoch 00024 | Loss: 0.2407
Epoch 00025 | Loss: 0.2407
Epoch 00026 | Loss: 0.2407
MSE: 0.2432
Epoch 00027 | Loss: 0.2407
Epoch 00028 | Loss: 0.2407
Epoch 00029 | Loss: 0.2407
Epoch 00030 | Loss: 0.2407
Epoch 00031 | Loss: 0.2407
MSE: 0.2431
Epoch 00032 | Loss: 0.2407
Epoch 00033 | Loss: 0.2407
Epoch 0003

## Add a threshold 

In [12]:
batch_size = 1
threshold = 15

# train dataset
train_dataset = PyGAcademicGraph(split="train", setting="transductive", sparcify_threshold=threshold)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# val dataset
val_dataset = PyGAcademicGraph(split="val", setting="transductive", sparcify_threshold=threshold)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# test dataset
test_dataset = PyGAcademicGraph(split="test", setting="transductive", sparcify_threshold=threshold)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# number of features
n_features = train_dataset[0].x.shape[1]

print("Number of samples in the train dataset: ", len(train_dataset))
print("Number of samples in the val dataset: ", len(test_dataset))
print("Number of samples in the test dataset: ", len(test_dataset))
print("Output of one sample from the train dataset: ", train_dataset[0])
print("Edge_index :")
print(train_dataset[0].edge_index)
print("Number of features per node: ", n_features)

Preparing domain: Academia
Preparing domain: Applied Sciences
Preparing domain: Education
Preparing domain: Engineering
Preparing domain: Humanities
Preparing domain: Mathematics and Computing
Preparing domain: Medicine and Health
Preparing domain: Natural Sciences
Preparing domain: Social Sciences
Number of samples in the train dataset:  9
Number of samples in the val dataset:  9
Number of samples in the test dataset:  9
Output of one sample from the train dataset:  Data(edge_index=[2, 3641], y=[359], x=[359, 8], edge_attr=[3641, 3], domain='Academia', train_mask=[359], val_mask=[359], test_mask=[359], mask=[359])
Edge_index :
tensor([[  0,   0,   0,  ..., 354, 355, 356],
        [ 89, 183,   0,  ..., 354, 355, 356]])
Number of features per node:  8


In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("\nDevice: ", device)


num_epochs = 200
graph_convolution_no_weights = MPGCN_Net(
    in_channels=n_features,
    hidden_channels=64,
    out_channels=1,
    ).to(device)

loss_fcn = nn.MSELoss()

optimizer = torch.optim.Adam(graph_convolution_no_weights.parameters(), lr=0.005)

epoch_list, GCN_MSE = train(graph_convolution_no_weights, loss_fcn, device, optimizer, num_epochs, train_dataloader, val_dataloader)


Device:  cuda
Epoch 00001 | Loss: 0.1918
MSE: 0.1688
Epoch 00002 | Loss: 0.1739
Epoch 00003 | Loss: 0.1677
Epoch 00004 | Loss: 0.1599
Epoch 00005 | Loss: 0.1561
Epoch 00006 | Loss: 0.1555
MSE: 0.1422
Epoch 00007 | Loss: 0.1538
Epoch 00008 | Loss: 0.1531
Epoch 00009 | Loss: 0.1526
Epoch 00010 | Loss: 0.1514
Epoch 00011 | Loss: 0.1500
MSE: 0.1385
Epoch 00012 | Loss: 0.1484
Epoch 00013 | Loss: 0.1449
Epoch 00014 | Loss: 0.1391
Epoch 00015 | Loss: 0.1062
Epoch 00016 | Loss: 0.0995
MSE: 0.0702
Epoch 00017 | Loss: 0.0654
Epoch 00018 | Loss: 0.1545
Epoch 00019 | Loss: 0.1903
Epoch 00020 | Loss: 0.1932
Epoch 00021 | Loss: 0.1898
MSE: 0.1956
Epoch 00022 | Loss: 0.1825
Epoch 00023 | Loss: 0.1689
Epoch 00024 | Loss: 0.0972
Epoch 00025 | Loss: 0.0589
Epoch 00026 | Loss: 0.0553
MSE: 0.0542
Epoch 00027 | Loss: 0.0481
Epoch 00028 | Loss: 0.0872
Epoch 00029 | Loss: 0.0834
Epoch 00030 | Loss: 0.0667
Epoch 00031 | Loss: 0.1217
MSE: 0.1130
Epoch 00032 | Loss: 0.0596
Epoch 00033 | Loss: 0.1424
Epoch 0003

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("\nDevice: ", device)


num_epochs = 50
graph_convolution_no_weights = GCN(
    input_size=n_features,
    hidden_size=64,
    output_size=1).to(device)

loss_fcn = nn.MSELoss()

optimizer = torch.optim.Adam(graph_convolution_no_weights.parameters(), lr=0.005)

epoch_list, GCN_MSE = train(graph_convolution_no_weights, loss_fcn, device, optimizer, num_epochs, train_dataloader, val_dataloader)


Device:  cuda
Epoch 00001 | Loss: 0.2402
MSE: 0.2408
Epoch 00002 | Loss: 0.2402
Epoch 00003 | Loss: 0.2402
Epoch 00004 | Loss: 0.2402
Epoch 00005 | Loss: 0.2402
Epoch 00006 | Loss: 0.2402
MSE: 0.2408
Epoch 00007 | Loss: 0.2402
Epoch 00008 | Loss: 0.2402
Epoch 00009 | Loss: 0.2402
Epoch 00010 | Loss: 0.2402
Epoch 00011 | Loss: 0.2402
MSE: 0.2408
Epoch 00012 | Loss: 0.2402
Epoch 00013 | Loss: 0.2402
Epoch 00014 | Loss: 0.2402
Epoch 00015 | Loss: 0.2402
Epoch 00016 | Loss: 0.2402
MSE: 0.2408
Epoch 00017 | Loss: 0.2402
Epoch 00018 | Loss: 0.2402
Epoch 00019 | Loss: 0.2402
Epoch 00020 | Loss: 0.2402
Epoch 00021 | Loss: 0.2402
MSE: 0.2408
Epoch 00022 | Loss: 0.2402
Epoch 00023 | Loss: 0.2402
Epoch 00024 | Loss: 0.2402
Epoch 00025 | Loss: 0.2402
Epoch 00026 | Loss: 0.2402
MSE: 0.2408
Epoch 00027 | Loss: 0.2402
Epoch 00028 | Loss: 0.2402
Epoch 00029 | Loss: 0.2402
Epoch 00030 | Loss: 0.2402
Epoch 00031 | Loss: 0.2402
MSE: 0.2408
Epoch 00032 | Loss: 0.2402
Epoch 00033 | Loss: 0.2402
Epoch 0003

In [1]:
batch_size = 1
threshold = 15

# train dataset
train_dataset = PyGAcademicGraph(split="train", setting="inductive", sparcify_threshold=threshold)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)

# val dataset
val_dataset = PyGAcademicGraph(split="val", setting="inductive", sparcify_threshold=threshold)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# test dataset
test_dataset = PyGAcademicGraph(split="test", setting="inductive", sparcify_threshold=threshold)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# number of features
n_features = train_dataset[0].x.shape[1]

print("Number of samples in the train dataset: ", len(train_dataset))
print("Number of samples in the val dataset: ", len(test_dataset))
print("Number of samples in the test dataset: ", len(test_dataset))
print("Output of one sample from the train dataset: ", train_dataset[0])
print("Edge_index :")
print(train_dataset[0].edge_index)
print("Number of features per node: ", n_features)

NameError: name 'PyGAcademicGraph' is not defined