In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import folium
from folium import GeoJson

In [52]:
bus_routes = pd.read_csv("../data/bus_routes_full.csv")
bus_stops = pd.read_csv("../data/bus_stops_full.csv")

In [53]:
bus_routes.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309


In [54]:
bus_stops.head()

Unnamed: 0,BusStopCode,RoadName,Description,Latitude,Longitude
0,1012,Victoria St,Hotel Grand Pacific,1.296848,103.852536
1,1013,Victoria St,St. Joseph's Ch,1.29771,103.853225
2,1019,Victoria St,Bras Basah Cplx,1.29699,103.853022
3,1029,Nth Bridge Rd,Opp Natl Lib,1.296673,103.854414
4,1039,Nth Bridge Rd,Bugis Cube,1.298208,103.855491


In [55]:
bus_routes_combined = pd.merge(bus_routes, bus_stops, on='BusStopCode', how='left')
bus_routes_combined.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,RoadName,Description,Latitude,Longitude
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300,Tampines Ctrl 1,Tampines Int,1.354076,103.943391
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302,Tampines Ave 5,Opp Our Tampines Hub,1.352962,103.941652
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304,Tampines Ave 5,Blk 147,1.348753,103.942086
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308,Simei Ave,Changi General Hosp,1.340055,103.948381
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309,Simei Ave,Opp Blk 3012,1.337371,103.950673


In [56]:
bus_routes_combined['geometry'] = bus_routes_combined.apply(lambda x: Point((x.Longitude, x.Latitude)), axis=1) 

In [57]:
bus_routes_combined = gpd.GeoDataFrame(bus_routes_combined, geometry='geometry')

In [58]:
bus_routes_combined.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,RoadName,Description,Latitude,Longitude,geometry
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300,Tampines Ctrl 1,Tampines Int,1.354076,103.943391,POINT (103.94339 1.35408)
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302,Tampines Ave 5,Opp Our Tampines Hub,1.352962,103.941652,POINT (103.94165 1.35296)
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304,Tampines Ave 5,Blk 147,1.348753,103.942086,POINT (103.94209 1.34875)
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308,Simei Ave,Changi General Hosp,1.340055,103.948381,POINT (103.94838 1.34005)
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309,Simei Ave,Opp Blk 3012,1.337371,103.950673,POINT (103.95067 1.33737)


In [64]:
bus_routes_combined.to_csv("../data/bus_routes_combined.csv", index=False)

In [59]:
routes = (
    bus_routes_combined.sort_values(by=['ServiceNo', 'Direction', 'StopSequence'])  # Ensure stops are in the correct order
    .groupby(['ServiceNo', 'Direction'])['geometry']
    .apply(lambda x: LineString(x.tolist()))  # Create LineString from points
    .reset_index()
)

bus_routes_ls = gpd.GeoDataFrame(routes, geometry='geometry', crs="EPSG:4326")

In [60]:
routes.head()

Unnamed: 0,ServiceNo,Direction,geometry
0,10,1,"LINESTRING (103.94339 1.35408, 103.94165 1.352..."
1,10,2,"LINESTRING (103.76988 1.29425, 103.76908 1.292..."
2,100,1,"LINESTRING (103.87169 1.35047, 103.87205 1.346..."
3,100,2,"LINESTRING (103.78932 1.31107, 103.78969 1.309..."
4,100A,1,"LINESTRING (103.87169 1.35047, 103.87205 1.346..."


In [61]:
bus_routes_ls.head()

Unnamed: 0,ServiceNo,Direction,geometry
0,10,1,"LINESTRING (103.94339 1.35408, 103.94165 1.352..."
1,10,2,"LINESTRING (103.76988 1.29425, 103.76908 1.292..."
2,100,1,"LINESTRING (103.87169 1.35047, 103.87205 1.346..."
3,100,2,"LINESTRING (103.78932 1.31107, 103.78969 1.309..."
4,100A,1,"LINESTRING (103.87169 1.35047, 103.87205 1.346..."


In [12]:
mrt_stations = pd.read_csv("../data/MRT Stations.csv")

In [13]:
mrt_stations.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,STN_NAME,STN_NO,geometry,Latitude,Longitude
0,0,1,EUNOS MRT STATION,EW7,POINT (103.9032524667383 1.319778951553637),1.319779,103.903252
1,1,2,CHINESE GARDEN MRT STATION,EW25,POINT (103.7325967380734 1.342352820874744),1.342353,103.732597
2,2,3,KHATIB MRT STATION,NS14,POINT (103.8329799077383 1.417383370153547),1.417383,103.83298
3,3,4,KRANJI MRT STATION,NS7,POINT (103.7621654109002 1.425177698770448),1.425178,103.762165
4,4,5,REDHILL MRT STATION,EW18,POINT (103.816816670149 1.289562726402453),1.289563,103.816817


In [14]:
DT_stations = mrt_stations[mrt_stations['STN_NO'].str.contains(r'\bDT', regex=True)]

In [15]:
DT_stations.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,STN_NAME,STN_NO,geometry,Latitude,Longitude
10,10,11,TAMPINES MRT STATION,EW2/DT32,POINT (103.945148688649 1.353301356342435),1.353301,103.945149
17,17,18,EXPO MRT STATION,CG1/DT35,POINT (103.9615482107079 1.334549777837812),1.33455,103.961548
83,83,94,MACPHERSON MRT STATION,CC10/DT26,POINT (103.8902870313661 1.326345371661258),1.326345,103.890287
85,85,96,TAMPINES EAST MRT STATION,DT33,POINT (103.9546344629294 1.356191483037002),1.356191,103.954634
86,86,97,TAMPINES WEST MRT STATION,DT31,POINT (103.9384369713533 1.34551530530169),1.345515,103.938437


In [16]:
DT_stations = DT_stations.assign(DT_Code=DT_stations['STN_NO'].str.extract(r'(DT\d+)')) 
DT_stations.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,STN_NAME,STN_NO,geometry,Latitude,Longitude,DT_Code
10,10,11,TAMPINES MRT STATION,EW2/DT32,POINT (103.945148688649 1.353301356342435),1.353301,103.945149,DT32
17,17,18,EXPO MRT STATION,CG1/DT35,POINT (103.9615482107079 1.334549777837812),1.33455,103.961548,DT35
83,83,94,MACPHERSON MRT STATION,CC10/DT26,POINT (103.8902870313661 1.326345371661258),1.326345,103.890287,DT26
85,85,96,TAMPINES EAST MRT STATION,DT33,POINT (103.9546344629294 1.356191483037002),1.356191,103.954634,DT33
86,86,97,TAMPINES WEST MRT STATION,DT31,POINT (103.9384369713533 1.34551530530169),1.345515,103.938437,DT31


In [17]:
DT_stations = DT_stations.assign(DT_Number = DT_stations['DT_Code'].str.extract(r'(\d+)').astype(int))
DT_sorted = DT_stations.sort_values(by='DT_Number').reset_index(drop=True)
DT_sorted = DT_sorted.drop(columns=['DT_Code','DT_Number'])
DT_sorted.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,STN_NAME,STN_NO,geometry,Latitude,Longitude
0,143,171,BUKIT PANJANG MRT STATION,DT1,POINT (103.7615351147329 1.379002116717668),1.379002,103.761535
1,144,172,CASHEW MRT STATION,DT2,POINT (103.7646944223108 1.369369831064344),1.36937,103.764694
2,116,136,HILLVIEW MRT STATION,DT3,POINT (103.7674182544565 1.362344868527861),1.362345,103.767418
3,165,199,HUME MRT STATION,DT4,POINT (103.769099241508 1.354517607525489),1.354518,103.769099
4,120,140,BEAUTY WORLD MRT STATION,DT5,POINT (103.7757942852948 1.34122317571135),1.341223,103.775794


In [18]:
DT_sorted['geometry'] = DT_sorted.apply(lambda row: Point(row['Longitude'], row['Latitude']), axis=1)
DT_sorted.head()

Unnamed: 0.1,Unnamed: 0,OBJECTID,STN_NAME,STN_NO,geometry,Latitude,Longitude
0,143,171,BUKIT PANJANG MRT STATION,DT1,POINT (103.76153511473294 1.379002116717668),1.379002,103.761535
1,144,172,CASHEW MRT STATION,DT2,POINT (103.76469442231075 1.3693698310643436),1.36937,103.764694
2,116,136,HILLVIEW MRT STATION,DT3,POINT (103.76741825445647 1.3623448685278603),1.362345,103.767418
3,165,199,HUME MRT STATION,DT4,POINT (103.76909924150804 1.3545176075254892),1.354518,103.769099
4,120,140,BEAUTY WORLD MRT STATION,DT5,POINT (103.77579428529484 1.34122317571135),1.341223,103.775794


In [19]:
DT_sorted = gpd.GeoDataFrame(DT_sorted, geometry='geometry')

route_line = LineString(DT_sorted['geometry'].tolist())
DT_ls = gpd.GeoDataFrame({'Line': ['DTL'], 'geometry': [route_line]}, crs="EPSG:4326") 

DT_ls.head()

Unnamed: 0,Line,geometry
0,DTL,"LINESTRING (103.76154 1.379, 103.76469 1.36937..."


In [65]:
DT_ls = DT_ls.to_crs(epsg=32648)
bus_routes_ls = bus_routes_ls.to_crs(epsg=32648)

DT_buffer_200= DT_ls.buffer(200).union_all()

In [66]:
def calculate_overlap(route):
    intersection = route.intersection(DT_buffer_200)
    overlap_length = intersection.length
    route_length = route.length
    overlap_percentage = (overlap_length/route_length)*100 if route_length > 0 else 0
    return pd.Series({'Overlap Length': overlap_length, 'Overlap Percentage': overlap_percentage})

In [67]:
buffer_overlap_200 = bus_routes_ls.copy()
buffer_overlap_200[['Overlap Length', 'Overlap Percentage']] = buffer_overlap_200['geometry'].apply(calculate_overlap)

In [68]:
service_overlap = buffer_overlap_200.groupby(['ServiceNo', 'Direction'])[['Overlap Length', 'Overlap Percentage']].sum().reset_index()
print(service_overlap)

    ServiceNo  Direction  Overlap Length  Overlap Percentage
0          10          1     1313.061318            4.463913
1          10          2     1312.544299            4.465529
2         100          1     1214.744649            5.563152
3         100          2     1214.398344            5.733302
4        100A          1      405.831949            9.254008
..        ...        ...             ...                 ...
717       992          1        0.000000            0.000000
718       992          2        0.000000            0.000000
719       993          1        0.000000            0.000000
720        9A          1     1327.436287           18.897593
721        9B          1     1327.436287           12.731487

[722 rows x 4 columns]


In [73]:
service_overlap_sorted = service_overlap.sort_values(by='Overlap Percentage', ascending=False)
service_overlap_sorted.head(10)

Unnamed: 0,ServiceNo,Direction,Overlap Length,Overlap Percentage
478,67,2,17698.606243,57.102368
252,23,1,14654.502783,52.717147
477,67,1,15720.74492,50.88497
164,170,1,14330.565692,48.195159
165,170,2,13605.444222,45.835109
198,184,1,8308.948652,37.996728
655,961M,2,11967.176097,34.309908
653,961,2,11188.289144,33.692184
654,961M,1,11613.438797,33.190888
446,65,1,7142.117194,32.908202


In [70]:
top5_overlap = service_overlap_sorted.head(5)['ServiceNo'].tolist()
top5_bus_routes = routes[routes['ServiceNo'].isin(top5_overlap)]

In [71]:
top5_bus_routes.head()

Unnamed: 0,ServiceNo,Direction,geometry
164,170,1,"LINESTRING (103.8565 1.30359, 103.85502 1.3043..."
165,170,2,"LINESTRING (103.74403 1.4939, 103.76827 1.4654..."
252,23,1,"LINESTRING (103.94339 1.35408, 103.94165 1.352..."
477,67,1,"LINESTRING (103.74579 1.38587, 103.743 1.38257..."
478,67,2,"LINESTRING (103.94339 1.35408, 103.94165 1.352..."


In [78]:
sgmap = folium.Map(location=[1.3521, 103.8198], zoom_start=12)

coords = [(lat, lon) for lon,lat in route_line.coords]

folium.PolyLine(
    locations = coords,
    color='blue',
    weight=5,
    opacity=0.8,
    tooltip="Downtown Line"
).add_to(sgmap)

for _,row in DT_sorted.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=row['STN_NAME'],
        icon=folium.Icon(color='blue', icon='train', prefix='fa')
    ).add_to(sgmap)

In [79]:
map_buffer = route_line.buffer(0.0018)
gdf_buffer = gpd.GeoDataFrame(geometry=[map_buffer], crs=DT_sorted.crs) 
buffer_geojson = gdf_buffer.to_json()

folium.GeoJson(
    buffer_geojson,
    style_function=lambda x: {
        'fillColor': 'blue',
        'color': 'blue',
        'weight': 1,
        'fillOpacity': 0.5
    },
).add_to(sgmap)

<folium.features.GeoJson at 0x125beee50>

In [82]:
colors = {
    '67':'red',
    '23':'green',
    '170':'orange'
}

for _, row in top5_bus_routes.iterrows():
    coords = [(lat, lon) for lon,lat in row['geometry'].coords]
    folium.PolyLine(
        locations = coords,
        color=colors.get(row['ServiceNo'], 'black'),
        weight=5,
        opacity=0.8,
        tooltip=f"{row['ServiceNo']} - Direction {row['Direction']}"
    ).add_to(sgmap)

    start_point = coords[0]
    end_point = coords[-1]

    folium.Marker(location=start_point, icon=folium.Icon(color=colors.get(row['ServiceNo'], 'black'))).add_to(sgmap)
    folium.Marker(location=end_point, icon=folium.Icon(color=colors.get(row['ServiceNo'], 'black'))).add_to(sgmap)

In [83]:
sgmap