In [66]:
import requests
import xlrd
import pandas as pd
import numpy as np
import json
import time as time_module 
import geopandas as gpd
import matplotlib.pyplot as plt
import polyline
from shapely.geometry import LineString, MultiLineString
from shapely.ops import linemerge
import folium
import geopandas as gpd
import pandas as pd

import os
from dotenv import load_dotenv
from shapely.geometry import Point



pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
os.environ['OGR_GEOMETRY_ACCEPT_UNCLOSED_RING'] = 'NO'

# Retrieving api key
load_dotenv("../key.env")
api_key = os.getenv("API_KEY")
TOKEN = os.getenv('ONEMAPTOKEN')

In [3]:
geospatial_train_path = "../datasets/geospatial_layer/TrainStation_Jul2024/RapidTransitSystemStation.shp"
train_stations = pd.read_excel("../datasets/Train_Stations.xls")
geospatial_train_gdf = gpd.read_file(geospatial_train_path)

In [67]:
%run get_bus_info_function.ipynb
bus_services_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusServices", api_key)
bus_routes_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusRoutes", api_key)
bus_stops_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusStops", api_key)

### Data-Pre Processing

In [68]:
bus_routes_stops = pd.merge(bus_routes_df, bus_stops_df, on = "BusStopCode", how = 'left')
bus_routes_stops = bus_routes_stops.merge(
    bus_services_df[['ServiceNo', 'Category']],  # Select only the columns needed for merging
    on='ServiceNo',  # Merge on BusStopCode
    how='left'  # Use 'left' join to keep all rows from bus_routes_stops
)

In [69]:
# Drop duplicates and assign it back to the original DataFrame
bus_routes_stops = bus_routes_stops.drop_duplicates().reset_index(drop=True)

# Filter the DataFrame for rows with 'Category' equal to 'TRUNK'
bus_routes_stops = bus_routes_stops[bus_routes_stops['Category'] == 'TRUNK']

In [71]:
bus_routes_stops.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,RoadName,Description,Latitude,Longitude,Category
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300,Tampines Ctrl 1,Tampines Int,1.354076,103.943391,TRUNK
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302,Tampines Ave 5,Opp Our Tampines Hub,1.352962,103.941652,TRUNK
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304,Tampines Ave 5,Blk 147,1.348753,103.942086,TRUNK
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308,Simei Ave,Changi General Hosp,1.340055,103.948381,TRUNK
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309,Simei Ave,Opp Blk 3012,1.337371,103.950673,TRUNK


In [8]:
# Step 1: Union the geometries for the same station
unioned_gdf = geospatial_train_gdf.dissolve(by='STN_NAM_DE',aggfunc='first')

# Step 2: Calculate the centroid of the unioned polygon
unioned_gdf['centroid'] = unioned_gdf.centroid

# Optional Step: Replace geometry with centroid point
unioned_gdf['geometry'] = unioned_gdf['centroid']

# Reset index to clean up
unioned_gdf.reset_index(inplace=True)

  merged_geom = block.unary_union


In [9]:
# Function to normalize station names in train_stations_df
def normalize_station_name(name):
    return name.strip().upper()  # Ensure names are uppercase for consistent merging

# Apply normalization function to train_stations_df
train_stations['Normalized_Station'] = train_stations['MRT_Station'].apply(normalize_station_name)

# Create a column to append " MRT STATION" or " LRT STATION" based on the MRT_Line
train_stations['Station_MRT_LRT'] = train_stations.apply(
    lambda row: f"{row['Normalized_Station']} MRT STATION" if "LRT" not in row['MRT_Line'] else f"{row['Normalized_Station']} LRT STATION",
    axis=1
)

# Apply normalization to geospatial_train_df
# Strip ' MRT STATION' and ' LRT STATION' and normalize to uppercase
unioned_gdf['Normalized_Station'] = unioned_gdf['STN_NAM_DE'].str.strip().str.upper()

# Perform the merge on 'Station_MRT_LRT' from train_stations and 'Normalized_Station' from unioned_gdf
merged_train_stations = train_stations.merge(
    unioned_gdf,
    how='left',
    left_on='Station_MRT_LRT',
    right_on='Normalized_Station'
)

# Keeping necessary columns
columns_to_keep = ['Station_Code', 'MRT_Station', 'MRT_Line', 'TYP_CD_DES', 'geometry']
merged_train_stations = merged_train_stations[columns_to_keep]

# Check the resulting column names and sample data
print(merged_train_stations.head())


  Station_Code    MRT_Station           MRT_Line TYP_CD_DES  \
0          NS1    Jurong East  North-South Line         MRT   
1          NS2    Bukit Batok  North-South Line         MRT   
2          NS3   Bukit Gombak  North-South Line         MRT   
3          NS4  Choa Chu Kang  North-South Line         MRT   
4          NS5        Yew Tee  North-South Line         MRT   

                      geometry  
0  POINT (17866.487 35045.184)  
1  POINT (18676.448 36790.872)  
2  POINT (18940.178 37860.706)  
3  POINT (18101.056 40790.989)  
4  POINT (18438.643 42159.628)  


In [10]:
#  Convert Pandas DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(merged_train_stations, geometry='geometry')

#  Reproject the GeoDataFrame to EPSG:4326 (WGS 84 - latitude/longitude)
gdf_4326 = gdf.to_crs(epsg=4326)

# Extract Longitude and Latitude from the reprojected geometries
gdf_4326['Longitude'] = gdf_4326.geometry.x
gdf_4326['Latitude'] = gdf_4326.geometry.y

#  Convert back to a Pandas DataFrame (if you don't need the geometry anymore)
merged_train_stations = pd.DataFrame(gdf_4326)

# Removing redundant columns
columns_to_keep = ['Station_Code', 'MRT_Station', 'MRT_Line', 'Longitude', 'Latitude']
merged_train_stations = merged_train_stations[columns_to_keep]
merged_train_stations['Train_Line'] = merged_train_stations['Station_Code'].str.extract(r'([A-Za-z]+)')
merged_train_stations['Station_No'] = merged_train_stations['Station_Code'].str.extract(r'(\d+)').fillna(1).astype(int)
print(merged_train_stations.head())

  Station_Code    MRT_Station           MRT_Line   Longitude  Latitude  \
0          NS1    Jurong East  North-South Line   103.742263  1.333209   
1          NS2    Bukit Batok  North-South Line   103.749541  1.348997   
2          NS3   Bukit Gombak  North-South Line   103.751910  1.358672   
3          NS4  Choa Chu Kang  North-South Line   103.744369  1.385172   
4          NS5        Yew Tee  North-South Line   103.747402  1.397550   

  Train_Line  Station_No  
0         NS           1  
1         NS           2  
2         NS           3  
3         NS           4  
4         NS           5  


### Fetching MRT Routes from OneMapSg

In [25]:
# Function to fetch rail route from OneMap API
def fetch_route(start, end, date, route_time, mode='RAIL'):
    url = 'https://www.onemap.gov.sg/api/public/routingsvc/route'
    params = {
        'start': start,
        'end': end,
        'routeType': 'pt',
        'date': date,
        'time': route_time,
        'mode': mode,
        'maxWalkDistance': 1000,
        'numItineraries': 3
    }
    headers = {
        'Authorization': f'Bearer {TOKEN}'
    }
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching route from {start} to {end}: {e}")
        return None

# Main function to iterate through MRT stations dataframe and fetch the route
def main(mrt_stations_df):
    # Iterate through each station to create routes between consecutive stations
    for i in range(len(mrt_stations_df) - 1):
        start_station = mrt_stations_df.iloc[i]
        end_station = mrt_stations_df.iloc[i + 1]

        start = f"{start_station['Latitude']},{start_station['Longitude']}"
        end = f"{end_station['Latitude']},{end_station['Longitude']}"

        date = '10-29-2024'  # Replace with actual date
        route_time = '12:00:00'  # Set initial time to 12:00 PM

        # Attempt fetching the route, with retries if needed
        for _ in range(5):  # Attempt up to 5 times with increasing time intervals
            data = fetch_route(start, end, date, route_time)
            if data is not None:
                break
            # Increment the time by 30 minutes for the next attempt
            hour, minute, second = map(int, route_time.split(':'))
            minute += 15
            if minute >= 60:
                minute -= 60
                hour += 1
            route_time = f"{hour:02d}:{minute:02d}:{second:02d}"

        # Save route to file if route data is fetched
        if data is not None:
            file_path = f"../datasets/routes/onemapsg_mrt/{start_station['Station_Code']}.json"
            with open(file_path, 'w') as f:
                json.dump(data, f, indent=2)
            print(f"Generated {file_path}")

            # Delay to avoid API rate limiting
            time_module.sleep(1)
        else:
            print(f"Failed to fetch route between {start_station['MRT_Station']} and {end_station['MRT_Station']}")

# Run the main function
main(merged_train_stations)

Generated ../datasets/routes/onemapsg_mrt/NS1.json
Generated ../datasets/routes/onemapsg_mrt/NS2.json
Generated ../datasets/routes/onemapsg_mrt/NS3.json


KeyboardInterrupt: 

In [50]:
import json
import glob
import polyline
from shapely.geometry import LineString
import folium
import geopandas as gpd
import pandas as pd

# Function to construct LineString from JSON files for each train line
def construct_train_linestring(station_code, missing_stations):
    file_path = f"../datasets/routes/onemapsg_mrt/{station_code}.json"
    
    # Check if file exists
    if not glob.glob(file_path):
        print(f"Warning: File {file_path} not found. Skipping...")
        missing_stations.append(station_code)
        return None
    
    with open(file_path, 'r') as f:
        data = json.load(f)

    all_coords = []

    # Check if the file contains a valid plan and itineraries
    if "plan" in data and "itineraries" in data["plan"]:
        itinerary = data["plan"]["itineraries"][0]  # Use the first itinerary
        for leg in itinerary['legs']:
            # Include both SUBWAY (MRT) and TRAM (LRT) segments
            if leg['mode'] in ['SUBWAY', 'TRAM'] and 'legGeometry' in leg and 'points' in leg['legGeometry']:
                # Decode the polyline to get the coordinates for this leg
                decoded_coords = polyline.decode(leg['legGeometry']['points'])
                # Reverse the order of latitude and longitude for each coordinate
                reversed_coords = [(lon, lat) for lat, lon in decoded_coords]
                
                # Append coordinates to the overall list
                if all_coords and all_coords[-1] != reversed_coords[0]:
                    # If the last point of the previous leg does not match the first point of this leg, add it to maintain connectivity
                    all_coords.append(reversed_coords[0])
                all_coords.extend(reversed_coords)

    # Construct the LineString from the collected coordinates
    return LineString(all_coords) if all_coords else None

# Main function to process all train routes and save results
def process_train_routes(train_stations):
    train_routes_dict = {}
    missing_stations = []

    # Iterate over each unique train line and station code
    for _, row in train_stations.iterrows():
        train_line = row['Train_Line']
        station_code = row['Station_Code']

        # Construct the LineString for the current station
        train_linestring = construct_train_linestring(station_code, missing_stations)
        if train_linestring is None:
            continue

        # Store the LineString by train line
        if train_line not in train_routes_dict:
            train_routes_dict[train_line] = [train_linestring]
        else:
            train_routes_dict[train_line].append(train_linestring)

    return train_routes_dict, missing_stations



# Process all train routes
train_routes_dict, missing_stations = process_train_routes(merged_train_stations)

# Visualize on Folium map
sg_map = folium.Map(location=[1.3521, 103.8198], zoom_start=12, tiles='CartoDB positron')

# Add each train route to the Folium map
for train_line, linestrings in train_routes_dict.items():
    for line in linestrings:
        folium.PolyLine(
            locations=[(lat, lon) for lon, lat in line.coords],
            color='red',
            weight=3,
            opacity=0.8,
            tooltip=train_line
        ).add_to(sg_map)

# Save the map to an HTML file or display it in a Jupyter notebook
sg_map




#### Additional Coordinates for TEL4

In [51]:
additional_coords = [
    (merged_train_stations[merged_train_stations['Station_Code'] == 'TE23']['Longitude'], merged_train_stations[merged_train_stations['Station_Code'] == 'TE23']['Latitude']),  # Coordinates for TE23 (unchanged)
    (103.874681, 1.297342),  # Mid-point coordinates (switched)
    (103.876271, 1.297529),  # Coordinates for TE24 (switched)
    (103.878307, 1.297751),  # Mid-point coordinates (switched)
    (103.878307, 1.297751),  # Duplicate mid-point (switched)
    (103.881241, 1.297971),  # Coordinates (switched)
    (103.883210, 1.298244),  # Coordinates (switched)
    (103.884594, 1.298201),  # Coordinates (switched)
    (merged_train_stations[merged_train_stations['Station_Code'] == 'TE24']['Longitude'], merged_train_stations[merged_train_stations['Station_Code'] == 'TE24']['Latitude']),  # Coordinates for TE24 (unchanged)
    (103.886982, 1.297915),  # Coordinates (switched)
    (103.889176, 1.297780),  # Coordinates (switched)
    (103.890437, 1.297518),  # Coordinates (switched)
    (103.891654, 1.297517),  # Coordinates (switched)
    (103.892334, 1.297653),  # Coordinates (switched)
    (103.895253, 1.298635),  # Coordinates (switched)
    (103.897019, 1.299316),  # Coordinates (switched)
    (merged_train_stations[merged_train_stations['Station_Code'] == 'TE25']['Longitude'], merged_train_stations[merged_train_stations['Station_Code'] == 'TE25']['Latitude']),  # Coordinates for TE25 (unchanged)
    (103.899893, 1.299735),  # Coordinates (switched)
    (103.901557, 1.300198),  # Coordinates (switched)
    (103.905380, 1.302866),  # Coordinates (switched)
    (merged_train_stations[merged_train_stations['Station_Code'] == 'TE26']['Longitude'], merged_train_stations[merged_train_stations['Station_Code'] == 'TE26']['Latitude']),  # Coordinates for TE26 (unchanged)
    (103.907911, 1.304563),  # Coordinates (switched)
    (103.910318, 1.305671),  # Coordinates (switched)
    (103.913385, 1.306448),  # Coordinates (switched)
    (103.914546, 1.306700),  # Coordinates (switched)
    (merged_train_stations[merged_train_stations['Station_Code'] == 'TE27']['Longitude'], merged_train_stations[merged_train_stations['Station_Code'] == 'TE27']['Latitude'])  # Coordinates for TE27 (unchanged)
]
additional_linestring = LineString(additional_coords)
train_routes_dict['TE'].append(additional_linestring)

  return [float(c) for c in o]


In [52]:
train_routes_dict

{'NS': [<LINESTRING (103.742 1.333, 103.742 1.334, 103.741 1.335, 103.741 1.335, 103...>,
  <LINESTRING (103.749 1.349, 103.75 1.349, 103.75 1.349, 103.75 1.35, 103.75 ...>,
  <LINESTRING (103.752 1.359, 103.752 1.359, 103.752 1.365, 103.752 1.365, 103...>,
  <LINESTRING (103.744 1.385, 103.744 1.385, 103.744 1.386, 103.744 1.386, 103...>,
  <LINESTRING (103.747 1.397, 103.747 1.398, 103.748 1.398, 103.748 1.398, 103...>,
  <LINESTRING (103.762 1.425, 103.764 1.426, 103.764 1.427, 103.764 1.427, 103...>,
  <LINESTRING (103.774 1.433, 103.774 1.433, 103.775 1.433, 103.775 1.433, 103...>,
  <LINESTRING (103.786 1.437, 103.786 1.437, 103.788 1.438, 103.788 1.438, 103...>,
  <LINESTRING (103.801 1.441, 103.801 1.441, 103.804 1.442, 103.804 1.442, 103...>,
  <LINESTRING (103.82 1.449, 103.82 1.449, 103.821 1.449, 103.821 1.449, 103.8...>,
  <LINESTRING (103.83 1.443, 103.83 1.443, 103.83 1.442, 103.83 1.442, 103.83 ...>,
  <LINESTRING (103.835 1.429, 103.835 1.428, 103.835 1.428, 103.835 1.

In [86]:
train_lines_data = []
for line_name, segments in train_routes_dict.items():
    for segment in segments:
        train_lines_data.append({'TrainLine': line_name, 'geometry': segment})

# Create GeoDataFrame from the list
train_lines_gdf = gpd.GeoDataFrame(train_lines_data, crs="EPSG:4326")

# Export to GeoJSON
train_lines_gdf.to_file("train_lines.geojson", driver="GeoJSON")

print("GeoJSON file 'train_lines.geojson' created successfully.")

GeoJSON file 'train_lines.geojson' created successfully.


In [54]:
bus_routes_gdf = gpd.read_file('../datasets/routes/filtered_bus_routes.geojson')

In [None]:
bus_routes_gdf

In [75]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, MultiLineString

# Convert bus stops (Pandas DataFrame) to GeoDataFrame with geometry points
bus_routes_stops_df = gpd.GeoDataFrame(
    bus_routes_stops,
    geometry=gpd.points_from_xy(bus_routes_stops['Longitude'], bus_routes_stops['Latitude']),
    crs="EPSG:4326"
)

# Define the proximity threshold for determining parallel segments
PROXIMITY_THRESHOLD = 0.3  # in kilometers (300 meters)

def calculate_parallelness(bus_routes_stops_df, bus_routes_gdf, train_routes_dict):
    parallel_scores = []

    # Iterate over each route in bus_routes_gdf GeoDataFrame
    for _, bus_route in bus_routes_gdf.iterrows():
        route_key = bus_route['index']  # Route identifier like '10_direction_1'
        
        # Only consider routes in one direction
        if '_direction_1' not in route_key:
            continue

        # Extract service number from route_key
        service_no = route_key.split('_')[0]

        # Select all stops for this route in Direction 1, sorted by StopSequence
        route_stops = bus_routes_stops_df[
            (bus_routes_stops_df['ServiceNo'] == service_no) &
            (bus_routes_stops_df['Direction'] == 1)
        ].sort_values('StopSequence')
        
        consecutive_parallel = 0
        parallel_count = 0
        total_stops = len(route_stops)

        # Check each stop for proximity to MRT lines
        for i in range(total_stops - 1):
            bus_stop_geom = route_stops.iloc[i].geometry
            
            # Check proximity to each MRT line segment
            is_parallel = False
            for mrt_line in train_routes_dict.values():
                for segment in mrt_line:
                    if bus_stop_geom.distance(segment) <= PROXIMITY_THRESHOLD:
                        is_parallel = True
                        break
                if is_parallel:
                    break

            # Track consecutive parallel stops
            if is_parallel:
                consecutive_parallel += 1
            else:
                # Add to parallel count if consecutive parallel stops > 1
                if consecutive_parallel > 1:
                    parallel_count += consecutive_parallel
                consecutive_parallel = 0  # Reset if no longer parallel

        # Final check for last set of consecutive parallel stops
        if consecutive_parallel > 1:
            parallel_count += consecutive_parallel

        # Calculate parallel ratio
        parallel_ratio = parallel_count / total_stops if total_stops > 0 else 0
        parallel_scores.append((route_key, parallel_ratio))

    # Sort by parallel ratio in descending order
    parallel_scores.sort(key=lambda x: x[1], reverse=True)

    return parallel_scores

# Run the function and print the top-ranked routes
parallel_scores = calculate_parallelness(bus_routes_stops_df, bus_routes_gdf, train_routes_dict)



Route: 70M_direction_1, Parallelness Score: 0.9904761904761905
Route: 63_direction_1, Parallelness Score: 0.9901960784313726
Route: 24_direction_1, Parallelness Score: 0.99
Route: 51_direction_1, Parallelness Score: 0.9897959183673469
Route: 89_direction_1, Parallelness Score: 0.9894736842105263
Route: 61_direction_1, Parallelness Score: 0.9893617021276596
Route: 975_direction_1, Parallelness Score: 0.9891304347826086
Route: 961M_direction_1, Parallelness Score: 0.9887640449438202
Route: 62_direction_1, Parallelness Score: 0.9886363636363636
Route: 53_direction_1, Parallelness Score: 0.9882352941176471


In [78]:
for route, score in parallel_scores[:30]:  # Show top 10 routes
    print(f"Route: {route}, Parallelness Score: {score}")

Route: 70M_direction_1, Parallelness Score: 0.9904761904761905
Route: 63_direction_1, Parallelness Score: 0.9901960784313726
Route: 24_direction_1, Parallelness Score: 0.99
Route: 51_direction_1, Parallelness Score: 0.9897959183673469
Route: 89_direction_1, Parallelness Score: 0.9894736842105263
Route: 61_direction_1, Parallelness Score: 0.9893617021276596
Route: 975_direction_1, Parallelness Score: 0.9891304347826086
Route: 961M_direction_1, Parallelness Score: 0.9887640449438202
Route: 62_direction_1, Parallelness Score: 0.9886363636363636
Route: 53_direction_1, Parallelness Score: 0.9882352941176471
Route: 961_direction_1, Parallelness Score: 0.9882352941176471
Route: 64_direction_1, Parallelness Score: 0.9878048780487805
Route: 9_direction_1, Parallelness Score: 0.9878048780487805
Route: 14_direction_1, Parallelness Score: 0.9875
Route: 160_direction_1, Parallelness Score: 0.9875
Route: 67_direction_1, Parallelness Score: 0.9873417721518988
Route: 30_direction_1, Parallelness Score