## Paper: Efficiently solving the practical vehicle routing problem

Source: `Duan et al., ‘Efficiently Solving the Practical Vehicle Routing Problem’.`

In [1]:
import pickle
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import DataLoader, Dataset
from pathlib import Path

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Device", device)

Device cpu


In [3]:
INSTANCE_PATH = Path("data")
RESULTS_PATH = Path("results")

## Loading Data

In [4]:
with open(RESULTS_PATH / "vrp_4_25.pkl", 'rb') as f:
    results = pickle.load(f)
    
with open(INSTANCE_PATH / 'instances.pkl', 'rb') as f:
    instances = pickle.load(f)

In [5]:
routes_dataset = [(x, y['routes']) for (x, y) in zip(instances, results)]

In [47]:
graphs_dataset = [(x, y['classifications']) for (x, y) in zip(instances, results)]
graphs = graphs_dataset

In [48]:
graphs[0]

(array([[0.5       , 0.5       , 0.        ],
        [0.06505159, 0.94888554, 2.        ],
        [0.96563203, 0.80839735, 4.        ],
        [0.30461377, 0.09767211, 8.        ],
        [0.68423303, 0.44015249, 7.        ],
        [0.12203823, 0.49517691, 9.        ],
        [0.03438852, 0.9093204 , 8.        ],
        [0.25877998, 0.66252228, 5.        ],
        [0.31171108, 0.52006802, 2.        ],
        [0.54671028, 0.18485446, 5.        ],
        [0.96958463, 0.77513282, 8.        ]]),
 array([-1,  0,  1,  2,  1,  0,  0,  0,  2,  2,  1]))

In [49]:
def get_adj_matrix(num_nodes, routes):
    adj_matrix = torch.zeros((num_nodes, num_nodes), dtype=torch.float32)

    for path in routes.values():
        if len(path) == 0:
            continue

        path = [0] + path + [0]

        for i in range(0, len(path) - 1):
            x_i = path[i]
            x_j = path[i + 1]

            adj_matrix[x_i][x_j] = adj_matrix[x_j][x_i] = 1

    return adj_matrix

In [50]:
def get_distance(pointA, pointB):
    # Efficient way to calculate the euclidean distance
    return np.linalg.norm(pointA - pointB)

In [54]:
def pre_process_graph(graph):
    nodes, label = graph
    
    label = torch.tensor(label + 1, dtype=torch.int64)
#     label = get_adj_matrix(nodes.shape[0], label)
    graph = torch.tensor(nodes[:, :-1], dtype=torch.float32)
    demand = torch.tensor(nodes[:, -1], dtype=torch.float32)
    distance = np.zeros((nodes.shape[0], nodes.shape[0]))

    for i in range(len(distance)):
        for j in range(i + 1, len(distance)):
            d = get_distance(graph[i], graph[j])
            distance[i][j] = d
            distance[j][i] = d
            
    distance = torch.tensor(distance, dtype=torch.float32)
            
    return graph, demand, distance, label

In [55]:
class GraphDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.graphs = []
        self.demands = []
        self.distances = []
        self.labels = []
        
        for i in range(len(data)):
            processed = pre_process_graph(data[i])
            
            self.graphs.append(processed[0])
            self.demands.append(processed[1])
            self.distances.append(processed[2])
            self.labels.append(processed[3])

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        graph = self.graphs[idx]
        demand = self.demands[idx]
        distance = self.distances[idx]
        label = self.labels[idx]
        
        return graph, demand, distance, label
    
    def __repr__(self):
        return f"GraphDataset(graphs={len(self.graphs)})"

The dataset should still be split in a train and test dataset

In [56]:
cut_off = int(len(graphs) * 0.95)

graph_train_dataset = GraphDataset(graphs_dataset[:cut_off])
graph_test_dataset = GraphDataset(graphs_dataset[cut_off:])

graph_train_dataset, graph_test_dataset

(GraphDataset(graphs=1900), GraphDataset(graphs=100))

In [57]:
train_dataloader = DataLoader(graph_train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(graph_test_dataset, batch_size=64, shuffle=True)

## MLP

Own MLP used for edge prediction

In [58]:
class MLP(nn.Module):
    """
    Multi-layer Perceptron for output prediction.
    """
    def __init__(self, hidden_dim, output_dim, L=2):
        super(MLP, self).__init__()
        self.L = L
        U = []
        for layer in range(self.L - 1):
            U.append(nn.Linear(hidden_dim, hidden_dim, True))
        self.U = nn.ModuleList(U)
        self.V = nn.Linear(hidden_dim, output_dim, True)

    def forward(self, x):
        """
        Args:
            x: Input features (batch_size, hidden_dim)

        Returns:
            y: Output predictions (batch_size, output_dim)
        """
        Ux = x
        for U_i in self.U:
            Ux = U_i(Ux)  # B x H
            Ux = F.relu(Ux)  # B x H
        y = self.V(Ux)  # B x O
        
        return y

## Attention Encoder

In [59]:
class AttentionEncoder(nn.Module):
    def __init__(self, hidden_dim):
        super(AttentionEncoder, self).__init__()
        self.hidden_dim = hidden_dim

    def forward(self, x, neighbor):
        '''
        @param x: (batch_size, node_num, hidden_dim)
        @param neighbor: (batch_size, node_num, k, hidden_dim)
        '''
        # scaled dot-product attention
        x = x.unsqueeze(2)
        neighbor = neighbor.permute(0, 1, 3, 2)
        attn_score = F.softmax(torch.matmul(x, neighbor) / np.sqrt(self.hidden_dim), dim=-1) # (batch_size, node_num, 1, k)
        weighted_neighbor = attn_score * neighbor
        
        # aggregation
        agg = x.squeeze(2) + torch.sum(weighted_neighbor, dim=-1)
        
        return agg

class AttentionPointer(nn.Module):
    def __init__(self, hidden_dim, use_tanh=False, use_cuda=False):
        super(AttentionPointer, self).__init__()
        self.hidden_dim = hidden_dim
        self.use_tanh = use_tanh

        self.project_hidden = nn.Linear(hidden_dim, hidden_dim)
        self.project_x = nn.Conv1d(hidden_dim, hidden_dim, 1, 1)
        self.C = 10
        self.tanh = nn.Tanh()

        v = torch.FloatTensor(hidden_dim)
        if use_cuda:
            v = v.cuda()
        self.v = nn.Parameter(v)
        self.v.data.uniform_(-(1. / math.sqrt(hidden_dim)) , 1. / math.sqrt(hidden_dim))

    def forward(self, hidden, x):
        '''
        @param hidden: (batch_size, hidden_dim)
        @param x: (node_num, batch_size, hidden_dim)
        '''
        x = x.permute(1, 2, 0)
        q = self.project_hidden(hidden).unsqueeze(2)  # batch_size x hidden_dim x 1
        e = self.project_x(x)  # batch_size x hidden_dim x node_num 
        # expand the hidden by node_num
        # batch_size x hidden_dim x node_num
        expanded_q = q.repeat(1, 1, e.size(2)) 
        # batch x 1 x hidden_dim
        v_view = self.v.unsqueeze(0).expand(expanded_q.size(0), len(self.v)).unsqueeze(1)
        # (batch_size x 1 x hidden_dim) * (batch_size x hidden_dim x node_num)
        u = torch.bmm(v_view, self.tanh(expanded_q + e)).squeeze(1)
        if self.use_tanh:
            logits = self.C * self.tanh(u)
        else:
            logits = u  
        return e, logits

## GCN

In [60]:
class GCN(nn.Module):
    def __init__(self,
                 node_hidden_dim,
                 edge_hidden_dim,
                 gcn_num_layers,
                 k):
        super(GCN, self).__init__()

        self.node_hidden_dim = node_hidden_dim
        self.edge_hidden_dim = edge_hidden_dim
        self.gcn_num_layers = gcn_num_layers
        self.k = k
        
        self.W1 = nn.Linear(2, self.node_hidden_dim)      # node_W1
        self.W2 = nn.Linear(2, self.node_hidden_dim // 2) # node_W2
        self.W3 = nn.Linear(1, self.node_hidden_dim // 2) # node_W3
        self.W4 = nn.Linear(1, self.edge_hidden_dim // 2) # edge_W4
        self.W5 = nn.Linear(1, self.edge_hidden_dim // 2) # edge_W5
        
        self.node_embedding = nn.Linear(self.node_hidden_dim, self.node_hidden_dim, bias=False) # Eq5
        self.edge_embedding = nn.Linear(self.edge_hidden_dim, self.edge_hidden_dim, bias=False) # Eq6

        self.gcn_layers = nn.ModuleList([GCNLayer(self.node_hidden_dim) for i in range(self.gcn_num_layers)])
        
        # Concat of the data (OWN)
        num_classes = 5
        self.mlp = MLP(hidden_dim=self.node_hidden_dim, L=2, output_dim=num_classes)
        
        self.relu = nn.ReLU()

    def adjacency(self, m):
        '''
        @param m: distance (node_num, node_num)
        '''
        a = torch.zeros_like(m)
        idx = torch.argsort(m, dim=1)[:, 1:(self.k+1)]
        a.scatter_(1, idx, 1)
        a.fill_diagonal_(-1)

        return a

    def find_neighbors(self, m):
        ''' find index of neighbors for each node
        @param m: distance (batch_size, node_num, node_num)
        '''
        neighbor_idx = []
        for i in range(m.shape[0]):
            idx = torch.argsort(m[i, :, :], dim=1)[:, 1:(self.k+1)].numpy()
            neighbor_idx.append(idx)
        return torch.LongTensor(neighbor_idx).to(device)

    def forward(self, x_c, x_d, m):
        '''
        @param x_c: coordination (batch_size, node_num(N+1), 2)
        @param x_d: demand (batch_size, node_num(N+1))
        @param m: distance (batch_size, node_num(N+1), node_num(N+1))
        '''
        # Eq 2
        x0 = self.relu(self.W1(x_c[:, :1, :])) # (batch_size, 1, node_hidden_dim)
        xi = self.relu(torch.cat((self.W2(x_c[:, 1:, :]), self.W3(x_d.unsqueeze(2)[:, 1:, :])), dim=-1)) # (batch_size, node_num(N), node_hidden_dim)
        x = torch.cat((x0, xi), dim=1)
        # Eq 3
        a = torch.Tensor([self.adjacency(m[i, :, :]).numpy() for i in range(m.shape[0])]).to(device)
        # Eq 4
        y = self.relu(torch.cat((self.W4(m.unsqueeze(3)), self.W5(a.unsqueeze(3))), dim=-1))
        # Eq 5
        h_node = self.node_embedding(x)
        # Eq 6
        h_edge = self.edge_embedding(y)

        # index of neighbors
        N = self.find_neighbors(m)

        # GCN layers
        for gcn_layer in self.gcn_layers:
            h_node, h_edge = gcn_layer(h_node, h_edge, N)
            
        # Merge together (OWN)
#         print(h_node.shape, h_edge.shape)
        f = self.mlp(h_node)
    
#         return h_node, h_edge
        return f


class GCNLayer(nn.Module):
    def __init__(self, hidden_dim):
        super(GCNLayer, self).__init__()

        # node GCN layers
        self.W_node = nn.Linear(hidden_dim, hidden_dim)
        self.V_node_in = nn.Linear(hidden_dim, hidden_dim)
        self.V_node = nn.Linear(2 * hidden_dim, hidden_dim)
        self.attn = AttentionEncoder(hidden_dim)
        self.relu = nn.ReLU()
        self.ln1_node = nn.LayerNorm(hidden_dim)
        self.ln2_node = nn.LayerNorm(hidden_dim)

        # edge GCN layers
        self.W_edge = nn.Linear(hidden_dim, hidden_dim)
        self.V_edge_in = nn.Linear(hidden_dim, hidden_dim)
        self.V_edge = nn.Linear(2 * hidden_dim, hidden_dim)
        self.W1_edge = nn.Linear(hidden_dim, hidden_dim)
        self.W2_edge = nn.Linear(hidden_dim, hidden_dim)
        self.W3_edge = nn.Linear(hidden_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.ln1_edge = nn.LayerNorm(hidden_dim)
        self.ln2_edge = nn.LayerNorm(hidden_dim)

        self.hidden_dim = hidden_dim

    def forward(self, x, e, neighbor_index):
        '''
        @param x: (batch_size, node_num(N+1), node_hidden_dim)
        @param e: (batch_size, node_num(N+1), node_num(N+1), edge_hidden_dim)
        @param neighbor_index: (batch_size, node_num(N+1), k)
        '''
        # node embedding
        batch_size, node_num = x.size(0), x.size(1)
        node_hidden_dim = x.size(-1)
        t = x.unsqueeze(1).repeat(1, node_num, 1, 1)

        neighbor_index = neighbor_index.unsqueeze(3).repeat(1, 1, 1, node_hidden_dim)
        neighbor = t.gather(2, neighbor_index)
        neighbor = neighbor.view(batch_size, node_num, -1, node_hidden_dim)
        
        # Eq 7/9
        h_nb_node = self.ln1_node(x + self.relu(self.W_node(self.attn(x, neighbor))))
        # Eq 12, Eq 8
        h_node = self.ln2_node(h_nb_node + self.relu(self.V_node(torch.cat([self.V_node_in(x), h_nb_node], dim=-1))))

        # edge embedding
        x_from = x.unsqueeze(2).repeat(1, 1, node_num, 1)
        x_to = x.unsqueeze(1).repeat(1, node_num, 1, 1)
        # Eq 7/10, Eq 11
        h_nb_edge = self.ln1_edge(e + self.relu(self.W_edge(self.W1_edge(e) + self.W2_edge(x_from) + self.W3_edge(x_to))))
        # Eq 13, Eq 8
        h_edge = self.ln2_edge(h_nb_edge + self.relu(self.V_edge(torch.cat((self.V_edge_in(e), h_nb_edge), dim=-1))))

        return h_node, h_edge

In [61]:
NODE_HIDDEN_DIM = 100
EDGE_HIDDEN_DIM = 100
GCN_LAYER = 1
k = 1

learning_rate = 1e-4
weight_decay = 1.01

model = GCN(node_hidden_dim=NODE_HIDDEN_DIM,
            edge_hidden_dim=EDGE_HIDDEN_DIM,
            gcn_num_layers=GCN_LAYER,
            k=k).to(device)

optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)
criterion = nn.CrossEntropyLoss()

In [62]:
model

GCN(
  (W1): Linear(in_features=2, out_features=100, bias=True)
  (W2): Linear(in_features=2, out_features=50, bias=True)
  (W3): Linear(in_features=1, out_features=50, bias=True)
  (W4): Linear(in_features=1, out_features=50, bias=True)
  (W5): Linear(in_features=1, out_features=50, bias=True)
  (node_embedding): Linear(in_features=100, out_features=100, bias=False)
  (edge_embedding): Linear(in_features=100, out_features=100, bias=False)
  (gcn_layers): ModuleList(
    (0): GCNLayer(
      (W_node): Linear(in_features=100, out_features=100, bias=True)
      (V_node_in): Linear(in_features=100, out_features=100, bias=True)
      (V_node): Linear(in_features=200, out_features=100, bias=True)
      (attn): AttentionEncoder()
      (relu): ReLU()
      (ln1_node): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
      (ln2_node): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
      (W_edge): Linear(in_features=100, out_features=100, bias=True)
      (V_edge_in): Linear(in_fea

## Node classification prediction

In [63]:
def accuracy(out, labels):
    out = out.numpy()
    labels = labels.numpy()
    
    acc = np.sum(out == labels, axis=1) / out.shape[1]
    
    return np.mean(acc)

In [66]:
def test(x_c, x_d, m, y):
    model.eval()

    with torch.no_grad():
        out = model(x_c, x_d, m)
        out = out.reshape(out.size(0), 5, -1)
        out = out.softmax(1)
        out = out.argmax(1)

        return accuracy(out, y), out
    
def train_one_epoch(x_c, x_d, m, y):
    model.train()
    
    optimizer.zero_grad()
    
    out = model(x_c, x_d, m)
    out = out.permute(0, 2, 1)
    
    loss = criterion(out, y)
    loss.backward()
    
    optimizer.step()

    return loss

def train(num_epochs):
    for epoch in range(num_epochs):
        epoch_loss = 0
    
        for x_c, x_d, m, y in train_dataloader:
            x_c, x_d, m, y = x_c.to(device), x_d.to(device), m.to(device), y.to(device)

            loss = train_one_epoch(x_c, x_d, m, y)
            epoch_loss += loss.item()

        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

In [67]:
train(10)

Epoch: 00, Loss: 1.4454
Epoch: 01, Loss: 1.4333
Epoch: 02, Loss: 1.4162
Epoch: 03, Loss: 1.3676
Epoch: 04, Loss: 1.4035
Epoch: 05, Loss: 1.3799
Epoch: 06, Loss: 1.3267
Epoch: 07, Loss: 1.3401
Epoch: 08, Loss: 1.3741
Epoch: 09, Loss: 1.3038


In [68]:
x_c, x_d, m, y = next(iter(test_dataloader))

In [69]:
test(x_c, x_d, m, y)

(0.18181818181818182,
 tensor([[1, 1, 4, 3, 2, 1, 0, 4, 3, 2, 1],
         [1, 1, 4, 3, 2, 1, 0, 0, 3, 2, 1],
         [1, 1, 4, 3, 2, 1, 0, 4, 3, 2, 2],
         [2, 1, 4, 3, 3, 1, 0, 4, 3, 2, 1],
         [1, 1, 4, 3, 3, 2, 0, 4, 3, 2, 1],
         [1, 1, 4, 4, 2, 1, 0, 4, 3, 2, 1],
         [1, 1, 4, 4, 2, 1, 0, 4, 4, 2, 1],
         [1, 1, 4, 3, 2, 1, 0, 0, 4, 3, 1],
         [2, 1, 4, 4, 3, 2, 0, 4, 4, 2, 1],
         [1, 1, 4, 3, 2, 2, 0, 4, 4, 2, 1],
         [1, 1, 4, 3, 2, 1, 1, 4, 3, 2, 2],
         [1, 1, 4, 3, 2, 1, 0, 4, 3, 2, 1],
         [1, 1, 4, 3, 2, 1, 0, 4, 3, 3, 1],
         [1, 1, 4, 3, 2, 1, 0, 0, 3, 2, 2],
         [1, 1, 4, 3, 2, 1, 1, 4, 3, 3, 1],
         [1, 1, 4, 3, 2, 1, 1, 4, 3, 3, 1],
         [1, 1, 4, 3, 2, 1, 0, 4, 3, 3, 1],
         [1, 1, 4, 4, 3, 1, 1, 4, 3, 3, 1],
         [2, 1, 4, 3, 2, 1, 0, 4, 3, 2, 1],
         [1, 1, 4, 3, 2, 1, 0, 4, 4, 2, 2],
         [1, 1, 4, 4, 3, 1, 0, 0, 3, 2, 2],
         [1, 1, 4, 3, 2, 1, 0, 4, 3, 2, 2],
         [

## Edge probability prediction

In [149]:
criterion = nn.NLLLoss

In [186]:
def test(x_c, x_d, m, y):
    model.eval()

    with torch.no_grad():
        out = model(x_c, x_d, m)
        out = out.log_softmax(dim=3)
        out = out.permute(0, 3, 1, 2)
        
        return out
    
def train_one_epoch(x_c, x_d, m, y):
    model.train()
    
    optimizer.zero_grad()
    
    out = model(x_c, x_d, m)
    out = F.log_softmax(out, dim=3)
    out = out.permute(0, 3, 1, 2).contiguous()
    
    loss = criterion()(out, y)
    
    loss.backward()
    
    optimizer.step()

    return loss

def train(num_epochs):
    for epoch in range(num_epochs):
        epoch_loss = 0
    
        for x_c, x_d, m, y in train_dataloader:
            x_c, x_d, m, y = x_c.to(device), x_d.to(device), m.to(device), y.to(device).to(torch.long)

            loss = train_one_epoch(x_c, x_d, m, y)
            epoch_loss += loss.item()

        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

In [189]:
train(5)

Epoch: 00, Loss: 0.5906
Epoch: 01, Loss: 0.5840
Epoch: 02, Loss: 0.5787
Epoch: 03, Loss: 0.5745
Epoch: 04, Loss: 0.5721


In [197]:
graph, demand, distance, label = next(iter(train_dataloader))

out = test(graph, demand, distance, label)

In [198]:
out.shape

torch.Size([64, 2, 11, 11])

In [199]:
out.argmax(dim=1)

tensor([[[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        ...,

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0,