In [None]:
import scipy.io as scipy
import os
import os.path as osp
import pandas as pd
import numpy as np
import pickle
import networkx as nx
from tqdm import tqdm

In [None]:
BASEDIR = os.getcwd()
MAP_NAME = 'NYC_Manhattan_Map.mat'
REQ_NAME = 'Requests.mat'
OUTPUT_NAME = 'Manhattan_Map' 

In [None]:
map = scipy.loadmat(osp.join(BASEDIR,MAP_NAME))

In [None]:
map_Arcs = pd.DataFrame(map['Arcs']) #ArcsID, Node_Origin, Node_destination, starts from 1 
map_CityArcs = pd.DataFrame(map['CityArcs']) #NodeID, NodeID, starting from 1, 1 means connected.
map_EdgeTimes = pd.DataFrame(map['EdgeTimes']) #ArcTime in seconds, position is ArcID
map_Nodes = pd.DataFrame(map['Nodes']) # Node ID, latitude, longtitude

In [None]:
# Make the index start from 1
map_CityArcs.index = map_CityArcs.index + 1
map_CityArcs.columns = map_CityArcs.columns + 1

In [None]:
map_Arcs.rename(columns={0:'ArcID',1:'Oid',2:'Did'},inplace=True)
map_EdgeTimes.rename(columns={0:'ArcTime'},inplace=True)
map_Nodes.rename(columns={0:'NodeID',1:'Latitude',2:'Longitude'},inplace=True)

In [None]:
map_Nodes['NodeID'] = map_Nodes['NodeID'].astype(int)
map_Nodes

In [None]:
map_Arcs

In [None]:
# Add self-Connection to map_CityArcs
for i in range(1, map_Nodes.shape[0]+1):
    map_CityArcs.loc[i,i] = 1

map_CityArcs

In [None]:
map_EdgeTimes.insert(0, "ArcID", range(1, len(map_EdgeTimes) + 1))
map_EdgeTimes

In [None]:
# Generate Graph
G = nx.DiGraph()
num_edges = len(map_Arcs) #num of edges in the map
arcs = tqdm(map_Arcs.iterrows(), total=num_edges, ncols=100, desc='Building network...') #ncols is the width of the progress bar

for idx, arcs in arcs:
    Oid = arcs['Oid']
    Did = arcs['Did']
    ArcID = arcs['ArcID']-1 #EdgeTime is indexed from 0
    G.add_node(Oid) #add current node to the graph
    G.add_edge(Oid, Did, TimeCost=map_EdgeTimes.iloc[ArcID]['ArcTime']) #add the arc to the graph

print('Network data loaded.')
print(f'Network has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.')

In [None]:
all_path_matrix = np.zeros((G.number_of_nodes()+1, G.number_of_nodes()+1))
num_of_nodes = G.number_of_nodes()

In [None]:
# Compute all path matrix (Efficient!)
nodes = tqdm(range(1, num_of_nodes+1), ncols=100, desc='Computing all_path_matrix')
for current_node in nodes:
    path = dict(nx.single_source_dijkstra_path(G, current_node, cutoff=None, weight='TimeCost'))
    timeCost = dict(nx.single_source_dijkstra_path_length(G, current_node, cutoff=None, weight='TimeCost'))
    for target_node_ID in range(1, num_of_nodes+1):
        current_path = path[target_node_ID]
        for node_idx in range(len(current_path)):
            if node_idx == len(current_path)-1: #last node
                all_path_matrix[current_path[node_idx]][target_node_ID] = int(target_node_ID)
                continue
            all_path_matrix[current_path[node_idx]][target_node_ID] = int(current_path[node_idx+1])

with open(BASEDIR + '/NYC_Manhattan_AllPathMatrix.pickle', 'wb') as f:
    pickle.dump(all_path_matrix, f)

In [None]:
# Compute all path time matrix (Efficient!)
map_Arcs_with_Time = map_Arcs.merge(map_EdgeTimes, on='ArcID')
all_path_time_matrix = np.zeros((G.number_of_nodes()+1, G.number_of_nodes()+1))

ArcIDs = tqdm(range(1, len(map_Arcs_with_Time)+1), ncols=100, desc='Computing all_path_time_matrix')
for current_Arc_ID in ArcIDs:
    Oid = map_Arcs_with_Time.loc[map_Arcs_with_Time['ArcID'] == current_Arc_ID, 'Oid']
    Oid = Oid.values[0]
    Did = map_Arcs_with_Time.loc[map_Arcs_with_Time['ArcID'] == current_Arc_ID, 'Did']
    Did = Did.values[0]
    ArcTime = map_Arcs_with_Time.loc[map_Arcs_with_Time['ArcID'] == current_Arc_ID, 'ArcTime']
    ArcTime = ArcTime.values[0]
    all_path_time_matrix[Oid][Did] = ArcTime
all_path_time_matrix
with open(BASEDIR + '/NYC_Manhattan_AllPathTimeMatrix.pickle', 'wb') as f:
    pickle.dump(all_path_time_matrix, f)

In [None]:
# Compute All Path Table (Inefficient!)

print('Computing the shortest path for every node pair')
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

all_path_table = {} 

rng = tqdm(range(1, num_nodes+1), ncols=100, desc='Computing all_path_table')
for node in rng:
    all_path_table[node] = dict(nx.single_source_dijkstra_path(G, node, cutoff=None, weight='TimeCost'))
    path = dict(nx.single_source_dijkstra_path(G, node, cutoff=None, weight='TimeCost'))
    timeCost = dict(nx.single_source_dijkstra_path_length(G, node, cutoff=None, weight='TimeCost'))
    path_timeCost_dict = {k: (path[k], timeCost[k]) for k in path.keys()}
    all_path_table[node] = path_timeCost_dict #store the path and time cost for each node

with open(BASEDIR + '/NYC_Manhattan_AllPathTable.pickle', 'wb') as f:
    pickle.dump(all_path_table, f)

In [None]:
# Load Requests File
req = scipy.loadmat(osp.join(BASEDIR,REQ_NAME))
req_matrix = pd.DataFrame(req['Requests']) #Orid, Did, ReqTime(mins, need to add random secs for this), size of req

In [None]:
req_matrix.rename(columns={0:'Oid',1:'Did',2:'ReqTime',3:'Size'},inplace=True)
req_matrix

In [None]:
# Convert ReqTime to seconds
req_matrix['ReqTime'] = req_matrix['ReqTime'] * 60

# Generate random numbers between 0 and 59
random_seconds = np.random.randint(0, 60, size=len(req_matrix))

# Add random seconds to ReqTime column
req_matrix['ReqTime'] += random_seconds

# Add reqID 
req_matrix.insert(0, "ReqID", range(1, len(req_matrix) + 1))

# save as csv
req_matrix.to_csv(BASEDIR + '/' + 'NYC_Manhattan_Requests.csv', index=False)