# Baseline GNN model

For the baseline model we will use a fully connected graph, and in the first iteration ignore the edge features (weights). This model should do better than a simple MLP at classifying the nodes.

In [3]:
import pickle
import numpy as np
import torch

from pathlib import Path
from itertools import permutations
from torch_geometric.data import Data, Dataset
from scipy import stats

In [4]:
INSTANCE_PATH = Path("data")
RESULTS_PATH = Path("results")

## Pre-processing data

Pre-processing the data so that it's in the required format.

In [5]:
with open(INSTANCE_PATH / 'dataset.pkl', 'rb') as f:
    data = pickle.load(f)

In [6]:
graph = data[0]
graph

(array([[0.5       , 0.5       , 0.        ],
        [0.06505159, 0.94888554, 2.        ],
        [0.96563203, 0.80839735, 4.        ],
        [0.30461377, 0.09767211, 8.        ],
        [0.68423303, 0.44015249, 7.        ],
        [0.12203823, 0.49517691, 9.        ],
        [0.03438852, 0.9093204 , 8.        ],
        [0.25877998, 0.66252228, 5.        ],
        [0.31171108, 0.52006802, 2.        ],
        [0.54671028, 0.18485446, 5.        ],
        [0.96958463, 0.77513282, 8.        ]]),
 array([-1,  0,  1,  2,  1,  0,  0,  0,  2,  2,  1]))

For the graph is fully connected and the edge features (representing the distance between each neighbour) is ignored.

It's not yet clear how the the demand capacities of the vehicles will be modeled.

In [9]:
x = torch.tensor(graph[0])
y = torch.tensor(graph[1] + 1)
edge_indices = torch.tensor(list(permutations(np.arange(x.size(0)), r=2)), dtype=torch.long).t().contiguous()

In [10]:
x.shape, y.shape, edge_indices.shape

(torch.Size([11, 3]), torch.Size([11]), torch.Size([2, 110]))

In [9]:
def get_data(graph):
    x = torch.tensor(graph[0], dtype=torch.float)
    y = torch.tensor(graph[1] + 1, dtype=torch.long)
    edge_indices = torch.tensor(list(permutations(np.arange(x.size(0)), r=2)), dtype=torch.long).t().contiguous()
    
    return Data(x=x, edge_index=edge_indices, y=y)

In [10]:
get_data(graph)

Data(x=[11, 3], edge_index=[2, 110], y=[11])

In [11]:
class VRPDataset(Dataset):
    def __init__(self, raw_data):
        super(VRPDataset, self).__init__()
        self.graph_data = self._process(raw_data)
        
    def _process(self, raw_data):
        return [get_data(d) for d in raw_data]
        
    def len(self):
        return len(self.graph_data)

    def get(self, index):
        return self.graph_data[index]

In [12]:
dataset = VRPDataset(data)

In [13]:
dataset, dataset[0]

(VRPDataset(2000), Data(x=[11, 3], edge_index=[2, 110], y=[11]))

## Building the model

Build the actual baseline model.

In [8]:
from torch_geometric.nn import GCNConv, GATConv
import torch.nn as nn
import torch.nn.functional as F

NODE_HIDDEN = 1
EDGE_HIDDEN = 1
GCN_NUM_LAYERS = 1

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.node_embedding = nn.Linear(dataset.num_features, NODE_HIDDEN)
        self.edge_embedding = nn.Linear()
        
        self.conv1 = GATConv(dataset.num_features, hidden_channels)
#         self.conv2 = GATConv(hidden_channels, hidden_channels)
        self.fc = torch.nn.Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
#         x = F.relu(self.conv2(x, edge_index))
        x = self.fc(x)
        
        return x

In [30]:
learning_rate = 1e-4
weight_decay = 0.96

model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.CrossEntropyLoss()

In [31]:
model

GCN(
  (conv1): GATConv(3, 16, heads=1)
  (fc): Linear(in_features=16, out_features=5, bias=True)
)

In [32]:
def train(data):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()

    return loss

def test(data):
    model.eval()
    
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = out.softmax(dim=1).argmax(dim=1)

        test_correct = pred == data.y
        test_acc = int(test_correct.sum()) / int(test_correct.size(0))
    
        return test_acc, pred

In [33]:
training_set = dataset[:int(len(dataset) * 0.8)]
test_set = dataset[int(len(dataset) * 0.8):]

training_set, test_set

(VRPDataset(1600), VRPDataset(400))

In [34]:
for epoch in range(1, 10):
    epoch_loss = 0
    for graph in training_set:
        loss = train(graph)
        epoch_loss += loss.item()
        
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

Epoch: 01, Loss: 1.3348
Epoch: 02, Loss: 1.3546
Epoch: 03, Loss: 1.3897
Epoch: 04, Loss: 1.4172
Epoch: 05, Loss: 1.4337
Epoch: 06, Loss: 1.4429
Epoch: 07, Loss: 1.4477
Epoch: 08, Loss: 1.4502
Epoch: 09, Loss: 1.4517


In [35]:
test(test_set[0])

(0.36363636363636365, tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))