### 00 packages

In [4]:
##################################################################################################
##################################################################################################

import numpy as np

import pandas as pd

import geopandas as gpd

import folium

import copy

import geopandas as gp

import networkx as nx

from sklearn.cluster import DBSCAN

from collections import defaultdict

from shapely.geometry import Point

from scipy.spatial import KDTree

from datetime import datetime, timedelta

import os

import matplotlib.pyplot as plt

import ast

from folium.plugins import HeatMap

import random

##################################################################################################
##################################################################################################

import warnings

warnings.filterwarnings('ignore')

### 06 passenger trips boarding and alighting statistics

https://www.data.act.gov.au/Transport/Boardings-By-Stop-By-Hr/tf32-54vh/about_data



In [5]:
##################################################################################################
##################################################################################################

bus_stop_data=pd.read_csv("./02result/bus_data/abstracted_bus_stop_data.csv")

bus_stop_data=bus_stop_data.drop(columns=["Unnamed: 0"])

valid_stop_ids=bus_stop_data.stop_id.unique()

##################################################################################################
##################################################################################################

stop_node_dic=np.load('./02result/bus_data/stop_node_dic.npy',allow_pickle=True).item()

node_suburb_dic=np.load('./02result/road_network/node_suburb_dic.npy',allow_pickle=True).item()

suburb_node_dic=np.load('./02result/road_network/suburb_node_dic.npy',allow_pickle=True).item()

avaiable_nodes=list(node_suburb_dic.keys())

##################################################################################################
##################################################################################################

passenger_path="./01data/canberra_passenger/"

##################################################################################################
##################################################################################################

passenger_df=pd.read_csv(passenger_path+"TC_Sydney_Uni_research_extract_2_202405__20240615_0400.csv")

##################################################################################################
##################################################################################################

passenger_df=passenger_df[["JOURNEY_NUMBER","ORIGIN_DATE","ORIGIN_STOP","DESTINATION_STOP"]]

passenger_df=passenger_df.dropna()

passenger_df=passenger_df.loc[~passenger_df['ORIGIN_STOP'].str.contains('Unknown')]

passenger_df=passenger_df.loc[~passenger_df['DESTINATION_STOP'].str.contains('Unknown')]

passenger_df=passenger_df.reset_index(drop=True)

##################################################################################################
##################################################################################################

passenger_df['ORIGIN_STOP']=passenger_df.apply(lambda x:int(x.ORIGIN_STOP.split(":")[0]),axis=1)

passenger_df['DESTINATION_STOP']=passenger_df.apply(lambda x:int(x.DESTINATION_STOP.split(":")[0]),axis=1)

##################################################################################################
##################################################################################################

passenger_df=passenger_df.loc[passenger_df['ORIGIN_STOP'].isin(valid_stop_ids)]

passenger_df=passenger_df.loc[passenger_df['DESTINATION_STOP'].isin(valid_stop_ids)]

##################################################################################################
##################################################################################################

passenger_df=passenger_df.rename(columns={"JOURNEY_NUMBER":"passenger_id",\
                                         "ORIGIN_DATE":"arrival_date",\
                                         "ORIGIN_STOP":"origin_stop",\
                                         "DESTINATION_STOP":"destination_stop"})

##################################################################################################
##################################################################################################

passenger_df['origin_node']=passenger_df.apply(lambda x:stop_node_dic[x.origin_stop],axis=1)

passenger_df['destination_node']=passenger_df.apply(lambda x:stop_node_dic[x.destination_stop],axis=1)

passenger_df=passenger_df.loc[passenger_df['origin_node'].isin(avaiable_nodes)]

passenger_df=passenger_df.loc[passenger_df['destination_node'].isin(avaiable_nodes)]

##################################################################################################
##################################################################################################

passenger_df['origin_suburb']=passenger_df.apply(lambda x:node_suburb_dic[x.origin_node],axis=1)

passenger_df['destination_suburb']=passenger_df.apply(lambda x:node_suburb_dic[x.destination_node],axis=1)

##################################################################################################
##################################################################################################

passenger_df['real_origin_node']=passenger_df.apply(lambda x:random.choice(suburb_node_dic[x.origin_suburb]),axis=1)

passenger_df['real_destination_node']=passenger_df.apply(lambda x:random.choice(suburb_node_dic[x.destination_suburb]),axis=1)

passenger_df=passenger_df.reset_index(drop=True)

##################################################################################################
##################################################################################################

passenger_df.to_csv("./02result/passenger_demand/passenger_df.csv")

passenger_df=passenger_df[['arrival_date', 'passenger_id', 'origin_stop', 'destination_stop', 'origin_suburb', 'destination_suburb',
       'real_origin_node', 'real_destination_node']]


passenger_df


Unnamed: 0,arrival_date,passenger_id,origin_stop,destination_stop,origin_suburb,destination_suburb,real_origin_node,real_destination_node
0,2024-05-01 07:43:20,2024050411530565000059484,2268,3356,102,629,"(-35.33292, 149.094179)","(-35.277588534181575, 149.1320861675914)"
1,2024-05-01 08:16:22,2024050411530565000044270,2803,2818,645,664,"(-35.32504246088015, 149.03357150124833)","(-35.303830114628965, 149.03083196151215)"
2,2024-05-01 15:35:57,2024050411530565000110501,2817,2805,664,645,"(-35.303204217587705, 149.0208378421446)","(-35.3242318340623, 149.033536718141)"
3,2024-05-01 10:08:04,2024050411530565000032280,2508,2206,651,680,"(-35.337166235707556, 149.02857977040554)","(-35.324878223388644, 149.05847758841566)"
4,2024-05-01 10:25:10,2024050411530565000032280,2205,4530,680,629,"(-35.320316732345574, 149.05627002720917)","(-35.28012950560014, 149.13539902115758)"
...,...,...,...,...,...,...,...,...
1250978,2024-05-31 12:01:05,2024060410360838000226095,2373,4333,146,23,"(-35.317101849769045, 149.1522537015997)","(-35.22741181931549, 149.01116179571736)"
1250979,2024-05-31 08:35:37,2024060410360838000164639,3321,2258,146,145,"(-35.31680870130822, 149.14723358627225)","(-35.306076, 149.133149)"
1250980,2024-05-31 17:23:11,2024060410360838000187587,2259,3360,145,146,"(-35.30924845652158, 149.13236222673416)","(-35.31431347889792, 149.1506838660207)"
1250981,2024-05-31 17:14:55,2024060410360838000192372,2925,4530,45,629,"(-35.31691076239271, 149.12103102496496)","(-35.28349100345248, 149.13265645309397)"


### aggeragate passenger data

In [6]:
agg_passenger_df=copy.copy(passenger_df)

agg_passenger_df['origin_stop_node']=agg_passenger_df.apply(lambda x:stop_node_dic[x.origin_stop],axis=1)

agg_passenger_df['destination_stop_node']=agg_passenger_df.apply(lambda x:stop_node_dic[x.destination_stop],axis=1)

agg_passenger_df=agg_passenger_df[['arrival_date', 'passenger_id', 'origin_stop', 'destination_stop', 'origin_suburb', 'destination_suburb',\
                                   'origin_stop_node','destination_stop_node',
                                   'real_origin_node', 'real_destination_node','origin_suburb', 'destination_suburb',]]

agg_passenger_df.to_csv("./02result/passenger_demand/agg_passenger_df.csv")

agg_passenger_df

Unnamed: 0,arrival_date,passenger_id,origin_stop,destination_stop,origin_suburb,destination_suburb,origin_stop_node,destination_stop_node,real_origin_node,real_destination_node,origin_suburb.1,destination_suburb.1
0,2024-05-01 07:43:20,2024050411530565000059484,2268,3356,102,629,"(-35.33554365516688, 149.10050423547844)","(-35.28082424007109, 149.13154628355372)","(-35.33292, 149.094179)","(-35.277588534181575, 149.1320861675914)",102,629
1,2024-05-01 08:16:22,2024050411530565000044270,2803,2818,645,664,"(-35.324151984803265, 149.0363062176146)","(-35.3063169704355, 149.02612728040793)","(-35.32504246088015, 149.03357150124833)","(-35.303830114628965, 149.03083196151215)",645,664
2,2024-05-01 15:35:57,2024050411530565000110501,2817,2805,664,645,"(-35.3063169704355, 149.02612728040793)","(-35.324960316794765, 149.03633577184365)","(-35.303204217587705, 149.0208378421446)","(-35.3242318340623, 149.033536718141)",664,645
3,2024-05-01 10:08:04,2024050411530565000032280,2508,2206,651,680,"(-35.331837653168115, 149.03488880480825)","(-35.34238126064029, 149.0534600239524)","(-35.337166235707556, 149.02857977040554)","(-35.324878223388644, 149.05847758841566)",651,680
4,2024-05-01 10:25:10,2024050411530565000032280,2205,4530,680,629,"(-35.34238126064029, 149.0534600239524)","(-35.27685713442521, 149.12693230295977)","(-35.320316732345574, 149.05627002720917)","(-35.28012950560014, 149.13539902115758)",680,629
...,...,...,...,...,...,...,...,...,...,...,...,...
1250978,2024-05-31 12:01:05,2024060410360838000226095,2373,4333,146,23,"(-35.31313537430693, 149.14386494609437)","(-35.2226342251234, 149.0190663380898)","(-35.317101849769045, 149.1522537015997)","(-35.22741181931549, 149.01116179571736)",146,23
1250979,2024-05-31 08:35:37,2024060410360838000164639,3321,2258,146,145,"(-35.31980496869861, 149.14694890607882)","(-35.30588734331787, 149.13314142806104)","(-35.31680870130822, 149.14723358627225)","(-35.306076, 149.133149)",146,145
1250980,2024-05-31 17:23:11,2024060410360838000187587,2259,3360,145,146,"(-35.30588734331787, 149.13314142806104)","(-35.31954371128334, 149.14775313422848)","(-35.30924845652158, 149.13236222673416)","(-35.31431347889792, 149.1506838660207)",145,146
1250981,2024-05-31 17:14:55,2024060410360838000192372,2925,4530,45,629,"(-35.31505780913499, 149.12871430349455)","(-35.27685713442521, 149.12693230295977)","(-35.31691076239271, 149.12103102496496)","(-35.28349100345248, 149.13265645309397)",45,629


In [7]:
passenger_data = pd.read_csv("./02result/passenger_demand/agg_passenger_df.csv")
passenger_data = passenger_data.drop(columns=['Unnamed: 0'])

In [8]:
# Convert arrival_date to datetime
passenger_data['arrival_date'] = pd.to_datetime(passenger_data['arrival_date'])

# Add date, weekday, hour, and unix timestamp columns
passenger_data['date'] = passenger_data['arrival_date'].dt.date
passenger_data['weekday'] = passenger_data['arrival_date'].dt.day_name()
passenger_data['hour'] = passenger_data['arrival_date'].dt.hour
passenger_data['unix_timestamp'] = passenger_data['arrival_date'].astype("int64") // 10**9

# Split the dataframe by day and save each to a separate file

split_dataframes = {str(day): day_df for day, day_df in passenger_data.groupby('date')}

for day, day_df in split_dataframes.items():
    file_name = f"data_{day}.csv"
    day_df.to_csv("./02result/passenger_demand/"+file_name, index=False)

In [9]:
day_df

Unnamed: 0,arrival_date,passenger_id,origin_stop,destination_stop,origin_suburb,destination_suburb,origin_stop_node,destination_stop_node,real_origin_node,real_destination_node,origin_suburb.1,destination_suburb.1,date,weekday,hour,unix_timestamp
1208469,2024-05-31 06:38:17,2024060410360838000248366,6034,1801,656,592,"(-35.16969757059053, 149.13718913602017)","(-35.18673933745762, 149.13463141056633)","(-35.16856533508597, 149.1359128860454)","(-35.177176371252145, 149.13801748491665)",656,592,2024-05-31,Friday,6,1717137497
1208470,2024-05-31 06:58:50,2024060410360838000248366,7012,5504,592,11,"(-35.184769913369905, 149.13709231285029)","(-35.23825804684835, 149.06257594854475)","(-35.18873133728943, 149.14334846426257)","(-35.239589, 149.075065)",592,11,2024-05-31,Friday,6,1717138730
1208471,2024-05-31 16:18:09,2024060410360838000284377,5501,1801,11,592,"(-35.24014052155236, 149.06365674936117)","(-35.18673933745762, 149.13463141056633)","(-35.22833414143157, 149.06123274668934)","(-35.18676168749536, 149.12462480645564)",11,592,2024-05-31,Friday,16,1717172289
1208472,2024-05-31 10:02:29,2024060410360838000154982,4092,5521,33,11,"(-35.19493932965038, 149.06044666901005)","(-35.24044713311553, 149.07344772660616)","(-35.207268463667624, 149.06710825367725)","(-35.246351, 149.071482)",33,11,2024-05-31,Friday,10,1717149749
1208473,2024-05-31 13:39:32,2024060410360838000258758,5502,4091,11,33,"(-35.23825804684835, 149.06257594854475)","(-35.19493932965038, 149.06044666901005)","(-35.223384378124805, 149.0721490872886)","(-35.19388011925476, 149.0594079586432)",11,33,2024-05-31,Friday,13,1717162772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1250978,2024-05-31 12:01:05,2024060410360838000226095,2373,4333,146,23,"(-35.31313537430693, 149.14386494609437)","(-35.2226342251234, 149.0190663380898)","(-35.317101849769045, 149.1522537015997)","(-35.22741181931549, 149.01116179571736)",146,23,2024-05-31,Friday,12,1717156865
1250979,2024-05-31 08:35:37,2024060410360838000164639,3321,2258,146,145,"(-35.31980496869861, 149.14694890607882)","(-35.30588734331787, 149.13314142806104)","(-35.31680870130822, 149.14723358627225)","(-35.306076, 149.133149)",146,145,2024-05-31,Friday,8,1717144537
1250980,2024-05-31 17:23:11,2024060410360838000187587,2259,3360,145,146,"(-35.30588734331787, 149.13314142806104)","(-35.31954371128334, 149.14775313422848)","(-35.30924845652158, 149.13236222673416)","(-35.31431347889792, 149.1506838660207)",145,146,2024-05-31,Friday,17,1717176191
1250981,2024-05-31 17:14:55,2024060410360838000192372,2925,4530,45,629,"(-35.31505780913499, 149.12871430349455)","(-35.27685713442521, 149.12693230295977)","(-35.31691076239271, 149.12103102496496)","(-35.28349100345248, 149.13265645309397)",45,629,2024-05-31,Friday,17,1717175695
