## Thompson East Coast Line Analysis


In [1]:
import requests
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from haversine import haversine
import osmnx as ox
from dotenv import load_dotenv
import os
import xlrd


In [2]:
# Set pandas option to display all rows
pd.set_option('display.max_rows', None)

In [3]:
pd.set_option('display.max_rows', None)
os.environ['OGR_GEOMETRY_ACCEPT_UNCLOSED_RING'] = 'NO'

# Retrieving api key
load_dotenv("../key.env")
api_key = os.getenv("API_KEY")

In [19]:
%run get_bus_info_function.ipynb
%run get_geospatial_function.ipynb
bus_services_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusServices", api_key)
bus_routes_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusRoutes", api_key)
bus_stops_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusStops", api_key)
geospatial_train_path = "../datasets/geospatial_layer/TrainStation_Jul2024/RapidTransitSystemStation.shp"
train_stations = pd.read_excel("../datasets/Train_Stations.xls")
geospatial_train_gdf = gpd.read_file(geospatial_train_path)

DataSourceError: ../datasets/geospatial_layer/TrainStation_Jul2024/RapidTransitSystemStation.shp: No such file or directory

In [21]:
# Load environment variables for API key
load_dotenv("../key.env")
api_key = os.getenv("API_KEY")

# Load data from LTA Datamall API for Bus services, routes, and stops
def get_bus_info(url, api_key):
    headers = {'AccountKey': api_key, 'accept': 'application/json'}
    response = requests.get(url, headers=headers)
    data = response.json()
    return pd.DataFrame(data['value'])

bus_services_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusServices", api_key)
bus_routes_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusRoutes", api_key)
bus_stops_df = get_bus_info("https://datamall2.mytransport.sg/ltaodataservice/BusStops", api_key)

# Load train station data for Thompson-East Coast Line (TEL)
geospatial_train_path = "../datasets/geospatial_layer/TrainStation_TEL.shp"
geospatial_train_gdf = gpd.read_file(geospatial_train_path)

DataSourceError: ../datasets/geospatial_layer/TrainStation_TEL.shp: No such file or directory

In [None]:
# Data pre-processing
# Merge bus routes and stops to get lat-long of each bus stop
bus_routes_services = pd.merge(bus_routes_df, bus_services_df, on=["ServiceNo", "Direction"], how="left")
merged_bus_routes = pd.merge(bus_routes_services, bus_stops_df, on='BusStopCode', how='left')
merged_bus_routes.dropna(subset=['Longitude', 'Latitude'], inplace=True)

# Process train stations (specific to TEL)
geospatial_train_gdf['centroid'] = geospatial_train_gdf.centroid
geospatial_train_gdf['geometry'] = geospatial_train_gdf['centroid']
geospatial_train_gdf = geospatial_train_gdf.to_crs(epsg=4326)  # Reproject to WGS 84

# Extract relevant columns and convert to Pandas DataFrame
train_stations = geospatial_train_gdf[['STN_NAM_DE', 'geometry']]
train_stations['Longitude'] = train_stations.geometry.x
train_stations['Latitude'] = train_stations.geometry.y
train_stations.rename(columns={'STN_NAM_DE': 'Station_Name'}, inplace=True)

In [None]:

# Haversine distance calculation for overlap percentage
def calculate_overlap_percentage(df_bus_stops, df_train_stations, max_distance=350):
    gdf_bus_stops = gpd.GeoDataFrame(df_bus_stops, geometry=gpd.points_from_xy(df_bus_stops['Longitude'], df_bus_stops['Latitude']), crs="EPSG:4326")
    gdf_train_stations = gpd.GeoDataFrame(df_train_stations, geometry=gpd.points_from_xy(df_train_stations['Longitude'], df_train_stations['Latitude']), crs="EPSG:4326")
    
    results = []
    grouped = gdf_bus_stops.groupby('ServiceNo')

    for service_no, bus_stops_service in grouped:
        bus_stops_service_unique = bus_stops_service.drop_duplicates(subset=['BusStopCode'])
        total_bus_stops = len(bus_stops_service_unique)
        overlapping_stops = 0
        bus_stop_overlaps = {}

        for _, bus_stop in bus_stops_service_unique.iterrows():
            bus_stop_code = bus_stop['BusStopCode']
            nearby_stations = []

            for _, mrt_station in gdf_train_stations.iterrows():
                distance = haversine((bus_stop['Latitude'], bus_stop['Longitude']), (mrt_station['Latitude'], mrt_station['Longitude'])) * 1000
                if distance <= max_distance:
                    nearby_stations.append({'Station_Name': mrt_station['Station_Name'], 'Distance': distance})

            if nearby_stations:
                bus_stop_overlaps[bus_stop_code] = nearby_stations
                overlapping_stops += 1

        overlap_percentage = (overlapping_stops / total_bus_stops) * 100 if total_bus_stops > 0 else 0
        results.append({
            'ServiceNo': service_no,
            'TotalBusStops': total_bus_stops,
            'OverlappingStops': overlapping_stops,
            'OverlapPercentage': overlap_percentage,
            'OverlappedBusStops': bus_stop_overlaps
        })

    overlap_df = pd.DataFrame(results)
    return overlap_df

# Example usage of the overlap calculation function
overlap_results_tel = calculate_overlap_percentage(merged_bus_routes, train_stations, max_distance=350)
print(overlap_results_tel)

# Visualization of overlapping bus stops and TEL stations
gdf_bus_stops = gpd.GeoDataFrame(merged_bus_routes, geometry=gpd.points_from_xy(merged_bus_routes['Longitude'], merged_bus_routes['Latitude']), crs="EPSG:4326")
gdf_train_stations = gpd.GeoDataFrame(train_stations, geometry=gpd.points_from_xy(train_stations['Longitude'], train_stations['Latitude']), crs="EPSG:4326")

fig, ax = plt.subplots(figsize=(12, 8))
gdf_bus_stops.plot(ax=ax, color='blue', markersize=1, label='Bus Stops')
gdf_train_stations.plot(ax=ax, color='red', markersize=5, label='TEL Stations')
plt.title('Bus Stops and Thompson-East Coast MRT Line (TEL) Stations')
plt.legend()
plt.show()
