In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

import ast

from tabulate import tabulate

In [2]:
data_path = '/Users/nguyenduykhang/Documents/Projects/Data'
os_path = '/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgs'

In [3]:
all_routes = pd.read_csv(f"{os_path}/data/all_routes.csv")
station_link = pd.read_csv(f"{os_path}/data/station_link.csv")
station_node = pd.read_csv(f"{os_path}/data/station_node.csv")

num_routes = len(all_routes)
num_nodes = len(station_node) # number of stations
num_segs = num_links = len(station_link)

## **Seg Travel Time**
- Average travel time of each segment by interval and day of the week
- theo interval cho rieng tung seg hay dua tren thoi gian bat dau cua trip hay seg dau tien

In [4]:
seg_travel_time = list()

for day in tqdm(range(0, 7)): # monday to sunday
    for t in range(18000, 79200, 3600):
        print(f"{day} -- {t}")
        
        for seg in tqdm(range(num_segs)):
            segmentId = station_link.iloc[seg]['LinkId']
            share_routeNo = ast.literal_eval(station_link.iloc[seg]['share_RouteNo'])
            list_travel_time = list()
            
            for route in share_routeNo:
                for dir in [0, 1]:
                    bus_trips = pd.read_csv(f"{data_path}/dummy-data/{route}_{dir}.csv")
                    if bus_trips[bus_trips.SegmentId == seg] is None:
                        continue
                    
                    sub_trips = bus_trips[(bus_trips.SegmentId == seg) &
                                          (bus_trips.DayOfWeek == day) &
                                          (bus_trips.StartSeg >= t) &
                                          (bus_trips.StartSeg <= t+3600)]
                    
                    seg_duration = list(sub_trips.Duration)
                    list_travel_time += seg_duration
            
            avg_travel_time = sum(list_travel_time) / len(list_travel_time) if len(list_travel_time) != 0 else 0
            seg_travel_time.append([segmentId, day, t/3600, avg_travel_time])
            
seg_travel_time_df = pd.DataFrame(seg_travel_time, columns=['SegmentId', 'DayOfWeek', 'Hour', 'AvgTravelTime'])
print(tabulate(seg_travel_time_df, headers = 'keys', tablefmt = 'psql'))

  0%|          | 0/7 [00:00<?, ?it/s]

0 -- 18000


  1%|          | 51/5305 [51:53<89:05:52, 61.05s/it]
  0%|          | 0/7 [51:53<?, ?it/s]


OSError: [Errno 89] Operation canceled

## **T_Matrix**

In [7]:
print(tabulate(station_link, headers = 'keys', tablefmt = 'psql'))

+------+----------+------------+------------+------------+------------+------------+---------------------------------------------------------------------------------------------------------------------------+
|      |   LinkId |   NodeId_1 |   NodeId_2 |   StopId_1 |   StopId_2 |   LinkName | share_RouteNo                                                                                                             |
|------+----------+------------+------------+------------+------------+------------+---------------------------------------------------------------------------------------------------------------------------|
|    0 |        0 |          0 |          1 |         33 |         79 |      33_79 | ['1']                                                                                                                     |
|    1 |        1 |          1 |          2 |         79 |         76 |      79_76 | ['1', '3', '19', '45', '56', '88']                                             

## **P_Matrix**

### Matrix P_1

In [3]:
list_nodes = list(station_node.StopId)
P_matrix = np.zeros((num_nodes, num_nodes), dtype=int)

bus_line_path = '/Users/nguyenduykhang/Documents/Projects/HCMC-Bus-Network/data/buyttphcm'

for route in tqdm(range(0, num_routes)):
    routeNo = all_routes.iloc[route]['RouteNo']
    for dir in [0, 1]:
        if dir == 0:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/stops_by_var.csv")
        else:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/rev_stops_by_var.csv")
            
        num_stops = len(bus_stops)
        for stop in range(num_stops-1):
            node_i = list_nodes.index(bus_stops.iloc[stop]['StopId'])
            for j in range(stop+1, num_stops):
                node_j = list_nodes.index(bus_stops.iloc[j]['StopId'])
                P_matrix[node_i][node_j] = 1

100%|██████████| 118/118 [02:40<00:00,  1.36s/it]


In [4]:
P_matrix

array([[0, 1, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [8]:
np.save('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/P_Path/P_1.npy', P_matrix)

### Matrix P_2 & Switch Station Matrix
i -> sw_matrix (switch station 1) -> j

In [12]:
def intersection_route(path_1, path_2):
    inter_set = set(path_1) & set(path_2)
    return list(inter_set)

In [5]:
# caculate Matrix P_2 ==> Matrix B_2
# list_nodes = list(station_node.StopId)
P_2_matrix = np.zeros((num_nodes, num_nodes), dtype=int)
B_2_matrix = np.empty((num_nodes, num_nodes), dtype=object)
SW_matrix = np.empty((num_nodes, num_nodes), dtype=object)

for i in range(num_nodes):
    for j in range(num_nodes):
        if P_matrix[i][j] == 0:
            B_2_matrix[i][j] = list()
            SW_matrix[i][j] = list()

In [6]:
for node_i in tqdm(range(num_nodes)):
    for node_j in range(num_nodes):
        if (node_i == node_j) or (P_matrix[node_i][node_j] == 1):
            continue
        for sw in range(num_nodes):
            if (P_matrix[node_i][sw] == 1) and (P_matrix[sw][node_j] == 1):
                # inter_route = intersection_route(path_1=matrix_B_1[node_i][sw], path_2=matrix_B_1[sw][node_j])
                    P_2_matrix[node_i][node_j] = 1
                    SW_matrix[node_i][node_j].append(sw)
                    # B_2_matrix[node_i][node_j] += inter_route             

  0%|          | 0/4363 [00:00<?, ?it/s]

100%|██████████| 4363/4363 [9:25:43<00:00,  7.78s/it]     


In [7]:
np.save('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/P_Path/P_2.npy', P_2_matrix)
np.save('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/P_Path/SW_1.npy', SW_matrix)

In [10]:
num_routes

118

## The best optimal switch station by interval
* P1[i][j] = 0
* P2[i][j] = 1
* i -> sw1 -> j

sw1 (list) --> min( T_t[i][sw] + T_t[sw][j]) ==> sw

## **B_Matrix**

### Matrix B_1

In [6]:
# B_matrix = np.zeros((num_nodes, num_nodes), dtype=int)
B_matrix = np.empty((num_nodes, num_nodes), dtype=object)

for i in range(num_nodes):
    for j in range(num_nodes):
        B_matrix[i][j] = list()
        
list_nodes = list(station_node.StopId)
bus_line_path = '/Users/nguyenduykhang/Documents/Projects/HCMC-Bus-Network/data/buyttphcm'

for route in tqdm(range(0, num_routes)):
    routeNo = all_routes.iloc[route]['RouteNo']
    for dir in [0, 1]:
        if dir == 0:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/stops_by_var.csv")
        else:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/rev_stops_by_var.csv")
            
        num_stops = len(bus_stops)
        for stop in range(num_stops-1):
            node_i = list_nodes.index(bus_stops.iloc[stop]['StopId'])
            for j in range(stop+1, num_stops):
                node_j = list_nodes.index(bus_stops.iloc[j]['StopId'])
                B_matrix[node_i][node_j].append(routeNo)
                # B_matrix[node_i][node_j] = [routeNo]
                

100%|██████████| 118/118 [00:12<00:00,  9.42it/s]


In [7]:
np.save('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/B_BusRoute/B_1.npy', B_matrix)

### Matrix B_2
i -> sw1 (switch station) -> j

In [None]:
# above lines

In [11]:
# seg_travel_time_df.to_csv('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/seg_travel_time.csv', index=False)

## **Matrix-based calculations**

In [None]:
seg_travel_time = pd.read_csv('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/seg_travel_time.csv')

In [5]:
matrix_P_1 = np.load('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/P_Path/P_1.npy')
matrix_B_1 = np.load('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/matrix/B_BusRoute/B_1.npy', allow_pickle=True)

  seg_travel_time = pd.read_csv('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/seg_travel_time.csv')


In [9]:
matrix_B_1

array([[list([]), list(['1']), list(['1']), ..., list([]), list([]),
        list([])],
       [list([]), list([]), list(['1', '3', '19', '45', '56', '88']),
        ..., list([]), list([]), list([])],
       [list([]), list([]), list([]), ..., list([]), list([]), list([])],
       ...,
       [list([]), list([]), list([]), ..., list([]), list(['D4']),
        list(['D4'])],
       [list([]), list([]), list([]), ..., list([]), list([]),
        list(['D4'])],
       [list([]), list([]), list([]), ..., list([]), list([]), list([])]],
      dtype=object)

In [6]:
# B_matrix = np.zeros((num_nodes, num_nodes), dtype=int)
B_matrix = np.empty((num_nodes, num_nodes), dtype=object)

for i in range(num_nodes):
    for j in range(num_nodes):
        B_matrix[i][j] = list()
        
list_nodes = list(station_node.StopId)
bus_line_path = '/Users/nguyenduykhang/Documents/Projects/HCMC-Bus-Network/data/buyttphcm'

for route in tqdm(range(0, num_routes)):
    routeNo = all_routes.iloc[route]['RouteNo']
    for dir in [0, 1]:
        if dir == 0:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/stops_by_var.csv")
        else:
            bus_stops = pd.read_csv(f"{bus_line_path}/{routeNo}/rev_stops_by_var.csv")
            
        num_stops = len(bus_stops)
        for stop in range(num_stops-1):
            node_i = list_nodes.index(bus_stops.iloc[stop]['StopId'])
            for j in range(stop+1, num_stops):
                node_j = list_nodes.index(bus_stops.iloc[j]['StopId'])
                B_matrix[node_i][node_j].append(routeNo)
                # B_matrix[node_i][node_j] = [routeNo]
                

array([[list([]), list(['1']), list(['1']), ..., list([]), list([]),
        list([])],
       [list([]), list([]), list(['1', '3', '19', '45', '56', '88']),
        ..., list([]), list([]), list([])],
       [list([]), list([]), list([]), ..., list([]), list([]), list([])],
       ...,
       [list([]), list([]), list([]), ..., list([]), list(['D4']),
        list(['D4'])],
       [list([]), list([]), list([]), ..., list([]), list([]),
        list(['D4'])],
       [list([]), list([]), list([]), ..., list([]), list([]), list([])]],
      dtype=object)

In [None]:
# # Average travel time of each segment by interval and day of the week
# seg_travel_time = list()
# t0 = 18000 # 05:00:00
# routeNo = 1
# route_segments = set(bus_trips.SegmentId)
# for segId in route_segments:
#     for day in [0, 6]: # monday to sunday
#         sub_trips = bus_trips[(bus_trips.SegmentId == segId) & (bus_trips.DayOfWeek == day)]
#         for t in range(18000, 79200, 3600):
#             trips_t = sub_trips[(sub_trips.StartSeg >= t) & (sub_trips.StartSeg <= t+3600)]
#             ls_travel_time = list(trips_t.Duration)
#             avg_travel_time = sum(ls_travel_time) / len(ls_travel_time) if len(ls_travel_time) != 0 else 0
#             hour = t / 3600
#             seg_travel_time.append([routeNo, segId, day, hour, avg_travel_time])
            
# # seg_travel_time_df = pd.DataFrame(seg_travel_time, columns=['RouteNo', 'SegmentId', 'DayOfWeek', 'Hour', 'Avg_TravelTime'])
# print(seg_travel_time)

[[1, 0, 0, 5.0, 80.23626373626374], [1, 0, 0, 6.0, 81.0521978021978], [1, 0, 0, 7.0, 78.77197802197803], [1, 0, 0, 8.0, 78.51098901098901], [1, 0, 0, 9.0, 80.56318681318682], [1, 0, 0, 10.0, 80.06318681318682], [1, 0, 0, 11.0, 80.04395604395604], [1, 0, 0, 12.0, 78.92032967032966], [1, 0, 0, 13.0, 77.08516483516483], [1, 0, 0, 14.0, 77.24450549450549], [1, 0, 0, 15.0, 78.82967032967034], [1, 0, 0, 16.0, 80.39285714285714], [1, 0, 0, 17.0, 79.18406593406593], [1, 0, 0, 18.0, 78.38461538461539], [1, 0, 0, 19.0, 0], [1, 0, 0, 20.0, 0], [1, 0, 0, 21.0, 0], [1, 0, 6, 5.0, 79.34501347708895], [1, 0, 6, 6.0, 77.10512129380054], [1, 0, 6, 7.0, 83.4366576819407], [1, 0, 6, 8.0, 80.14824797843666], [1, 0, 6, 9.0, 78.62533692722371], [1, 0, 6, 10.0, 77.7466307277628], [1, 0, 6, 11.0, 74.59299191374663], [1, 0, 6, 12.0, 79.78975741239893], [1, 0, 6, 13.0, 76.89757412398922], [1, 0, 6, 14.0, 79.97304582210242], [1, 0, 6, 15.0, 78.62533692722371], [1, 0, 6, 16.0, 77.277628032345], [1, 0, 6, 17.0, 81

In [None]:
matrix = np.load('/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos/algorithms/ver_1/P_matrix.npy')
matrix

In [None]:
# ospath = '/Users/nguyenduykhang/Documents/Projects/HPCC-BusAlgos'

# all_routes = pd.read_csv(f"{ospath}/data/all_routes.csv")
# num_routes = len(all_routes)

# all_hub_nodes = pd.read_csv(f"{ospath}/data/hub_nodes_information.csv")
# num_hub_nodes = len(all_hub_nodes)
# list_hub_nodes = list(all_hub_nodes.StopId)


- Bang thong tin cac hub node: HubId, StopId, Route 
- Matix duong di P

In [None]:
# P_matrix = np.zeros((num_hub_nodes, num_hub_nodes), dtype=int)
# P_matrix = pd.DataFrame(0, index=range(num_hub_nodes), columns=range(num_hub_nodes))

In [None]:
# for route in tqdm(range(num_routes)):
#     routeNo = all_routes.iloc[route]['RouteNo']
#     for dir_flag in [0, 1]:
#         hub_nodes = pd.read_csv(f"{ospath}/data/bus-lines/{routeNo}/{dir_flag}_hub_nodes.csv")
#         for hub in range(len(hub_nodes)):
#             node_i = list_hub_nodes.index(hub_nodes.iloc[hub]['StopId'])
#             P_matrix[node_i][node_i] = 1
            
#             for i in range(hub, len(hub_nodes)-1):
#                 node_j = list_hub_nodes.index(hub_nodes.iloc[i+1]['StopId'])
#                 P_matrix[node_i][node_j] = 1

  0%|          | 0/128 [00:00<?, ?it/s]

100%|██████████| 128/128 [00:01<00:00, 95.68it/s] 


In [None]:
# T_matrix = np.zeros((24, num_hub_nodes, num_hub_nodes), dtype=int)

# # Create a mask for P_matrix where it equals 1
# mask = P_matrix == 1

# for t in tqdm(range(4, 22)):
#     # Use broadcasting to apply random values where mask is True
#     T_matrix[t][mask] = np.random.randint(100, 1001, size=(num_hub_nodes, num_hub_nodes))[mask]

# # for t in tqdm(range(0, 24)):
# #     t_matrix = T_matrix[t]
# #     for i in range(num_hub_nodes):
# #         for j in range(num_hub_nodes):
# #             if P_matrix[i][j] == 1:
# #                 t_matrix[i][j] = np.random.randint(100, 1001)

100%|██████████| 24/24 [00:00<00:00, 125.98it/s]


In [None]:
# P_matrix_2 = np.zeros((24, num_hub_nodes, num_hub_nodes), dtype=int)

# for t in tqdm(range(4, 22)):
#     for hub_i in range(num_hub_nodes):
#         for hub_j in range(num_hub_nodes):
#             if P_matrix[hub_i][hub_j] == 0:
#                 min_travel_time = 1000
#                 switch = 0
#                 for sw in range(num_hub_nodes):
#                     if (P_matrix[hub_i][sw] == 1) and (P_matrix[sw][hub_j] == 1):
#                         travel_time = T_matrix[t][hub_i, sw] + T_matrix[t][sw, hub_j]
#                         if travel_time < min_travel_time:
#                             min_travel_time = travel_time
#                             switch = sw
                            
#             P_matrix_2[t][hub_i, hub_j] = switch if switch != 0 else 0

100%|██████████| 18/18 [43:00<00:00, 143.37s/it]
