In [None]:
# import tensorflow as tf
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# pytorch lightning
import lightning as pl
file_name = "../preparing_data/surat_shortest_distance_matrix.npy"

# read the shortest distance matrix
print("Reading the shortest distance matrix: ", file_name)
sdm = np.load(file_name)

# normalize the distance matrix
maxLengthy = np.max(sdm)
sdm = sdm/maxLengthy
print("Max length: ", maxLengthy)
print("Min length: ", np.min(sdm))

# get the number of nodes
n= sdm.shape[0]
print("Number of nodes: ", n)

def get_node(index):
    node1_index = index // (n-1)  ## row index
    node2_index = index % (n-1)  ## column index
    return node1_index, node2_index


def get_batch(index_list):
    l = len(index_list)
    x1_batch = np.zeros((l, n))
    x2_batch = np.zeros((l, n))
    y_batch = np.zeros((l, 1))
    z = 0
    for i in index_list:
        node1, node2 = get_node(i)
        if node2 >= node1:
            node2 += 1
        print(type(node1), type(node2), type(z))
        x1_batch[z][node1] = 1
        x2_batch[z][node2] = 1
        y_batch[z] = sdm[node1][node2]
        z += 1
    return x1_batch, x2_batch, y_batch

Reading the shortest distance matrix:  ../preparing_data/surat_shortest_distance_matrix.npy
Max length:  50954.239672192
Min length:  0.0
Number of nodes:  2508


In [11]:
get_node(30)

(0, 30)

In [12]:
get_batch([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>
<class 'int'> <class 'int'> <class 'int'>


(array([[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]]),
 array([[0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[0.02260066],
        [0.04162863],
        [0.03606479],
        [0.11176997],
        [0.09337557],
        [0.07152452],
        [0.14513639],
        [0.09688669],
        [0.09988087],
        [0.07802231]]))

In [14]:
if torch.backends.mps.is_available():
    print("MPS is available.")
else:
    print("MPS is not available.")

MPS is available.


## dry run

In [None]:
# import tensorflow as tf
import numpy as np
from tqdm import tqdm

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# read the shortest distance matrix
file_name = "../preparing_data/surat_shortest_distance_matrix.npy"
print("Reading the shortest distance matrix: ", file_name)
# shortest distance matrix (sdm)
sdm = np.load(file_name)

# normalize the distance matrix
maxLengthy = np.max(sdm)
sdm = sdm/maxLengthy
print("Max length: ", maxLengthy)
print("Min length: ", np.min(sdm))

# get the number of nodes
n= sdm.shape[0]
print("Number of nodes: ", n)


def get_node(index):
    node1_index = index // (n-1)  ## row index
    node2_index = index % (n-1)  ## column index
    return node1_index, node2_index

def get_batch(index_list):
    l = len(index_list)
    x1_batch = np.zeros((l, n))
    x2_batch = np.zeros((l, n))
    y_batch = np.zeros((l, 1))
    z = 0
    for i in index_list:
        node1, node2 = get_node(i)
        if node2 >= node1:
            node2 += 1
        x1_batch[z][node1] = 1
        x2_batch[z][node2] = 1
        y_batch[z] = sdm[node1][node2]
        z += 1
    return x1_batch, x2_batch, y_batch

# Parameters
learning_rate = 0.01
training_epochs = 20
batch_size = n
display_step = 1
input_l = (n - 1)*n  # total number of input data samples (removing the diagonal elements)

# Network Parameters
n_input = n
n_hidden_1 = int(n*0.2)
n_hidden_2 = 100
n_hidden_3 = 20
n_output = 1

# # tf Graph input
# # node1 one-hot layer
# x1 = tf.placeholder("float32", [None, n_input], name="x1")
# # node2 one-hot layer
# x2 = tf.placeholder("float32", [None, n_input], name="x2")
# # output layer
# y = tf.placeholder("float32", [None, n_output], name="y")

# def multilayer_perceptron(x1, x2, weights, biases):
#     # shared layer
#     layer_11 = tf.add(tf.matmul(x1, weights['h1']), biases['b1'])
#     layer_12 = tf.add(tf.matmul(x2, weights['h1']), biases['b1'])
#     layer_1 = tf.concat([layer_11, layer_12], 1)
#     # layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     # layer_2 = tf.nn.relu(layer_2)
#     layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
#     layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
#     out_layer = tf.sigmoid(tf.add(tf.matmul(layer_3, weights['out']), biases['out']))
#     return out_layer

# # Store layers weight & bias
# weights = {
#     'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1],  mean=0.0, stddev=0.01, dtype=tf.float32), name='h1'),
#     'h2': tf.Variable(tf.truncated_normal([n_hidden_1*2, n_hidden_2], mean=0.0, stddev=0.01, dtype=tf.float32), name='h2'),
#     'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3], mean=0.0, stddev=0.01, dtype=tf.float32), name='h3'),
#     'out': tf.Variable(tf.truncated_normal([n_hidden_3, n_output], mean=0.0, stddev=0.01, dtype=tf.float32), name='wout')
# }
# biases = {
#     'b1': tf.Variable(tf.truncated_normal([n_hidden_1], mean=0.0, stddev=0.01, dtype=tf.float32), name='b1'),
#     'b2': tf.Variable(tf.truncated_normal([n_hidden_2], mean=0.0, stddev=0.01, dtype=tf.float32), name='b2'),
#     'b3': tf.Variable(tf.truncated_normal([n_hidden_3], mean=0.0, stddev=0.01, dtype=tf.float32), name='b3'),
#     'out': tf.Variable(tf.truncated_normal([n_output], mean=0.0, stddev=0.01, dtype=tf.float32), name='bout')
# }

# Construct model
class MLP(nn.Module):
    def __init__(self, n, n_hidden_1, n_hidden_2, n_hidden_3, n_output):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(n, n_hidden_1)
        self.fc2 = nn.Linear(n_hidden_1*2, n_hidden_2)
        self.fc3 = nn.Linear(n_hidden_2, n_hidden_3)
        self.fc4 = nn.Linear(n_hidden_3, n_output)

        # Initialize weights and biases with truncated normal distribution
        for m in self.modules():
            if isinstance(m, nn.Linear):
                torch.nn.init.trunc_normal_(m.weight, mean=0.0, std=0.01)
                if m.bias is not None:
                    torch.nn.init.trunc_normal_(m.bias, mean=0.0, std=0.01)


    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x2 = self.fc1(x2)
        x = torch.cat((x1, x2), 1)
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x
    
# select the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print("Device: ", device)

# Create the model and move it to the device
model = MLP(n, n_hidden_1, n_hidden_2, n_hidden_3, n_output).to(device)

# Define loss and optimizer
# cost = tf.losses.mean_squared_error(y, pred)
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# tf.add_to_collection("optimizer", optimizer)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

# # Initializing the variables
# init = tf.global_variables_initializer()

# sess = tf.Session()
# sess.run(init)
# saver = tf.train.Saver()

# Training cycle
for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(input_l/batch_size) + 1
    # Loop over all batches
    random_index = np.random.permutation(input_l)
    for j in tqdm(range(total_batch), desc=f"Epoch: {epoch+1}"):
        start = j * batch_size
        end = (j+1) * batch_size
        if end >= input_l:
            end = input_l
        if start >= end:
            break

        # prepare the batch data
        batch_x1, batch_x2, batch_y = get_batch(random_index[start:end])
        batch_x1 = torch.tensor(batch_x1, dtype=torch.float32).to(device)
        batch_x2 = torch.tensor(batch_x2, dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)

        # Run optimization op (backprop) and cost op (to get loss value)
        optimizer.zero_grad()
        outputs = model(batch_x1, batch_x2)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        # Compute average loss
        avg_cost += loss.item() / total_batch
    # Display logs per epoch step
    save_path = f"./model.pth"
    print("Model saved in path: ", save_path)
    # save the model
    torch.save(model.state_dict(), save_path)
    if epoch % display_step == 0:
        print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")

Reading the shortest distance matrix:  ../preparing_data/surat_shortest_distance_matrix.npy
Max length:  50954.239672192
Min length:  0.0
Number of nodes:  2508
Device:  cpu


Epoch: 1, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:05<00:00, 13.48it/s]


Model saved in path:  ./model.pth
Epoch: 0001 cost= 0.000619888


Epoch: 2, Loss: 0.0: 100%|█████████▉| 2507/2508 [02:59<00:00, 13.96it/s]


Model saved in path:  ./model.pth
Epoch: 0002 cost= 0.000058712


Epoch: 3, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:07<00:00, 13.34it/s]


Model saved in path:  ./model.pth
Epoch: 0003 cost= 0.000042520


Epoch: 4, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:08<00:00, 13.27it/s]


Model saved in path:  ./model.pth
Epoch: 0004 cost= 0.000034051


Epoch: 5, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:06<00:00, 13.43it/s]


Model saved in path:  ./model.pth
Epoch: 0005 cost= 0.000028458


Epoch: 6, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:00<00:00, 13.91it/s]


Model saved in path:  ./model.pth
Epoch: 0006 cost= 0.000024631


Epoch: 7, Loss: 0.0: 100%|█████████▉| 2507/2508 [03:00<00:00, 13.89it/s]


Model saved in path:  ./model.pth
Epoch: 0007 cost= 0.000022155


Epoch: 8, Loss: 0.0:  76%|███████▌  | 1903/2508 [02:20<00:44, 13.50it/s]


KeyboardInterrupt: 

In [39]:
# load model from the saved file
print("device: ", device)
print("Loading the model from the saved file: ", save_path)
model.load_state_dict(torch.load(save_path, weights_only=True, map_location=device))

device:  cpu
Loading the model from the saved file:  ./model.pth


<All keys matched successfully>

In [40]:
# evaluation

def get_eval_batch(p1, p2):
    x1_batch = np.zeros(((p2-p1),n))
    x2_batch = np.zeros(((p2-p1),n))
    y_batch = np.zeros(((p2-p1),))
    z = 0
    for j in range(p1, p2):
        node1, node2 = get_node(j)
        if node2 >= node1:
            node2 += 1
        x1_batch[z][node1] = 1
        x2_batch[z][node2] = 1
        y_batch[z] = sdm[node1][node2]
        z += 1
    return x1_batch, x2_batch, y_batch


batch_size = 10000
total_batch = int(input_l/batch_size) + 1
result = []
real_dis = []
for i in tqdm(range(total_batch)):
    start = i * batch_size
    end = (i+1)*batch_size
    # print(start, end)
    if end >= input_l:
        end = input_l
    
    # print(start, end)
    # result_temp = sess.run(pred, feed_dict={x1: batch_x1, x2:batch_x2})
    # result = np.append(result, result_temp)
    # real_dis = np.append(real_dis, batch_y)
    
    batch_x1, batch_x2, batch_y = get_eval_batch(start, end)
    batch_x1 = torch.tensor(batch_x1, dtype=torch.float32).to(device)
    batch_x2 = torch.tensor(batch_x2, dtype=torch.float32).to(device)
    batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
    
    result_temp = model(batch_x1, batch_x2)
    result = np.append(result, result_temp.detach().cpu().numpy())
    real_dis = np.append(real_dis, batch_y.detach().cpu())


real_dis = real_dis * maxLengthy
result = result * maxLengthy

abe = np.fabs(real_dis - result)
re = abe/real_dis

mse = (abe ** 2).mean()
maxe = np.max(abe ** 2)
mine = np.min(abe ** 2)
mabe = abe.mean()
maxae = np.max(abe)
minae = np.min(abe)
mre = re.mean()
maxre = np.max(re)
minre = np.min(re)
print ("mean square error:", mse)
print ("max square error:", maxe)
print ("min square error:", mine)
print ("mean absolute error:", mabe)
print ("max absolute error:", maxae)
print ("min absolute error:", minae)
print ("mean relative error:", mre)
print ("max relative error:", maxre)
print ("min relative error:", minre)

  0%|          | 0/629 [00:00<?, ?it/s]

100%|██████████| 629/629 [01:49<00:00,  5.73it/s]


mean square error: 10119.797403883565
max square error: 24750738.319608063
min square error: 0.0
mean absolute error: 72.79610066853664
max absolute error: 4975.011388892297
min absolute error: 0.0
mean relative error: 0.017901513460987197
max relative error: 191.2367618938278
min relative error: 0.0


In [None]:
# 100%|██████████| 629/629 [01:47<00:00,  5.87it/s]
# mean square error: 39575.61283054177
# max square error: 190603220.47132424
# min square error: 0.0
# mean absolute error: 143.21570140377924
# max absolute error: 13805.912518603189
# min absolute error: 0.0
# mean relative error: 0.036053427847838056
# max relative error: 379.9848707625733
# min relative error: 0.0