<h1>Example of use with PyTorch Geometric</h1>

PyTorch Geometric is a Python library built upon PyTorch, specifically for GNN. Data is managed on its own data class, allows batches. In graphs with complex topologies can perform pretty good, using message passing schemes. Common layers to use are Graph Convolutional Networks (transforming neighboring nodes) or Edge Convolutional Networks (similar but allows also get information from edges).

<h2>Load the data</h2>
Here an "export" folder with .txt instance files used on the CTWVRP project, and a .csv file with the neighborhoods.

In [None]:
import os

data_files_list = ["./export/"+f for f in os.listdir("./export") ]
instance_dict = {}
for dir_str in data_files_list:
    with open(dir_str, 'r') as text_file:
        cnt = 0
        instance = ""
        for line in text_file:
            if cnt < 9:
                if cnt == 0:
                    instance = line.split()[0]
                    instance_dict[instance] = []
                cnt += 1
                continue
            split_line = line.split()
            instance_dict[instance].append([int(i) for i in split_line])
        text_file.close()

ng_dict = {}
cnt = -1
with open("ng_outs.csv", 'r') as text_file:
    for line in text_file:
        if cnt < 2:
            cnt += 1
            continue
        raw_line = line.strip()
        split_line_list = raw_line.split(sep=";")
        instance = split_line_list[3]
        if instance not in ng_dict:
            ng_dict[instance] = [[0 for i in range(101)]]
        ng_dict[instance].append([0] + [int(i) for i in split_line_list[5:-1]])
        if len(split_line_list[5:-1]) != 100:
            print("case found for instance "+instance)
    text_file.close()

On the following, required libraries for data processing.
TODO: filter unnecesary

In [3]:
from math import sqrt
import torch
import torch_geometric as tg
from torch_geometric.data import Data
import networkx as nx
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, GraphConv, global_add_pool
from torch_geometric.loader import DataLoader

In order to represent the original connection of the graph, a complete graph is computed.
TODO: provide some topology by filtering non-usable arcs

In [4]:
complete_graph_list = []
for i in range(101):
    for j in range(101):
        if i != j:
            complete_graph_list.append([i,j])
edge_index = torch.tensor(complete_graph_list, dtype=torch.long).t().contiguous()
n_edges = len(complete_graph_list)

Now, filter the self-edges from the NG sets

In [5]:
for instance_name in ng_dict:
    for i in range(101):
        for j in range(101):
            if i == j:
                ng_dict[instance_name][i][j] = 0

Organize data on the PyTorch Geometric data

In [6]:
data_list = []
for instance_name in ng_dict:
    y = torch.tensor(ng_dict[instance_name], dtype=torch.double)
    x = torch.tensor(instance_dict[instance_name], dtype=torch.double)
    attr = [[i] for i in range(n_edges)]
    loc_dict = {(i[0],j[0]): sqrt((i[1]-j[1])**2 + (i[2]-j[2])**2) for i in instance_dict[instance_name] for j in instance_dict[instance_name]}
    cnt = -1
    for i in range(101):
        for j in range(101):
            if i != j:
                cnt += 1
                attr[cnt].append(loc_dict[i,j])
    attr = torch.tensor(attr, dtype=torch.double)
    pos = []
    for i in instance_dict[instance_name]:
        pos.append([i[1], i[2]])
    pos = torch.tensor(pos, dtype=torch.double)
    data_list.append(Data(x=x, y=y, edge_index=edge_index, pos=pos, edge_attr=attr))

Format to Data Loader

In [9]:
# data_source = Instances(data_list)
dataloader = DataLoader(data_list[458:468], batch_size=1)
data_test = DataLoader(data_list[468:478], batch_size=1)
# datatorch = data_source.to_conv_nets(start=428, end=1458, batch_size=10)
# torchtest = data_source.to_conv_nets(start=1458, end=488, batch_size=10)

<h2>Neural networks</h2>
These create a node-based or edge-based convolution network

Graph Convolutions of this kind allow k-GNN layers (or node info pooling).

In [10]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        num_features = 7
        dim = 101*101

        self.conv1 = GraphConv(num_features, dim)
        self.conv2 = GraphConv(dim, dim)

        self.lin1 = Linear(dim, dim)

        self.double()

    def forward(self, x, edge_index, batch, edge_weight=None):
        # x, edge_index = data.x, data.edge_index

        # x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.sigmoid(self.conv2(x, edge_index))
        # x = F.dropout(x, training=self.training)
        # x = self.conv2(x, edge_index)

        x = self.conv1(x, edge_index, edge_weight).relu()
        x = self.conv2(x, edge_index, edge_weight).relu()
        x = global_add_pool(x, batch)
        x = self.lin1(x).relu()
        x = F.dropout(x, p=0.5, training=self.training)
        # x = self.lin2(x)
        x = torch.reshape(x, (101, 101))
        return x

Definitions of train and test stages

In [11]:
def train(epoch):
    model.train()

    if epoch == 51:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.5 * param_group['lr']

    loss_all = 0
    for data in dataloader:
        data = data.to(device)
        optimizer.zero_grad()
        # print(data.x, data.edge_index, data.batch)
        output = model(data.x, data.edge_index, data.batch)
        # print(output, data.y)
        # print(output)
        loss = F.l1_loss(output, data.y)
        loss.backward()
        loss_all += loss.item() * data.num_graphs
        optimizer.step()
        test_size = 101*100*1 #extract batch size
    return loss_all / test_size

def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.edge_index, data.batch)
        pred = output.max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
        test_size = 101*100*1 #extract batch size
    return correct / test_size

<h2>Train and immediate test per epoch</h2>

In [12]:
from copy import deepcopy

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model_dict = {}

for epoch in range(1, 11):
    loss = train(epoch)
    train_acc = test(dataloader)
    test_acc = test(data_test)
    model_dict[epoch] = {"model": deepcopy(model), "loss": loss, "trainAcc": train_acc, "testAcc": test_acc}
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, '
          f'Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 12404.8895, Train Acc: 27.9051, Test Acc: 9.3078
Epoch: 002, Loss: 16.6835, Train Acc: 29.7397, Test Acc: 9.9231
Epoch: 003, Loss: 5.7465, Train Acc: 28.8575, Test Acc: 9.6248
Epoch: 004, Loss: 2.2811, Train Acc: 29.7014, Test Acc: 9.9246
Epoch: 005, Loss: 5.9399, Train Acc: 28.5488, Test Acc: 9.5261
Epoch: 006, Loss: 8.6129, Train Acc: 29.1063, Test Acc: 9.6251
Epoch: 007, Loss: 10.0299, Train Acc: 29.4419, Test Acc: 9.8234
Epoch: 008, Loss: 10.0956, Train Acc: 28.2541, Test Acc: 9.4272
Epoch: 009, Loss: 6.9182, Train Acc: 29.7095, Test Acc: 9.9428
Epoch: 010, Loss: 3.1130, Train Acc: 29.4424, Test Acc: 9.8240


<h2>Storing and test results</h2>

In [14]:
import pickle

In [15]:
with open('torch_geometric.pickle', 'wb') as handle:
    pickle.dump(model_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

# with open('filename.pickle', 'rb') as handle:
#     model_dict_restored = pickle.load(handle)

In [16]:
model.eval()
data_show = data_list[430]
pred = model(data_show.x, data_show.edge_index, data_show.batch).tolist()
y = data_show.y
diff = 0
for j in range(len(pred)):
    a_list = [1 if i*100 > 1 else 0 for i in pred[j]]
    diff += sum([(y[j][i].tolist() - a_list[i])**2 for i in range(len(a_list))])
    print([1 if i*10 > 0.1 else 0 for i in pred[j]], sum(a_list))
print(diff)
print(sum([y[j][i].tolist() for i in range(101) for j in range(101)]))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 0
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 0
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 0
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 