# LTA API usage
- bus stops
- bus routes
- bus services
- passenger volume by bus stops
- estimated travel times
- traffic incidents (from weather)
- traffic flow

In [1]:
from LTA_API_key import API_key
import requests
import json
import pandas as pd
import numpy as np
import math

In [2]:
def get_lta_response(url,params=None):
    """ returns API response in json format
    Args:
        url (str): url of API end point
        params (dict): parameters to request
    Returns:
        dict: API response from the input url
    """
    headers = {"AccountKey": API_key}

    # A GET request to the API
    response = requests.request("GET", url, headers=headers,params=params)

    # Print the response
    return response.json()


In [3]:
bus_stops = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/BusStops")
bus_stops

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#BusStops',
 'value': [{'BusStopCode': '01012',
   'RoadName': 'Victoria St',
   'Description': 'Hotel Grand Pacific',
   'Latitude': 1.29684825487647,
   'Longitude': 103.85253591654006},
  {'BusStopCode': '01013',
   'RoadName': 'Victoria St',
   'Description': "St. Joseph's Ch",
   'Latitude': 1.29770970610083,
   'Longitude': 103.8532247463225},
  {'BusStopCode': '01019',
   'RoadName': 'Victoria St',
   'Description': 'Bras Basah Cplx',
   'Latitude': 1.29698951191332,
   'Longitude': 103.85302201172507},
  {'BusStopCode': '01029',
   'RoadName': 'Nth Bridge Rd',
   'Description': 'Opp Natl Lib',
   'Latitude': 1.2966729849642,
   'Longitude': 103.85441422464267},
  {'BusStopCode': '01039',
   'RoadName': 'Nth Bridge Rd',
   'Description': 'Bugis Cube',
   'Latitude': 1.29820784139683,
   'Longitude': 103.85549139837407},
  {'BusStopCode': '01059',
   'RoadName': 'Victoria St',
   'Description': 'Bugis St

In [11]:
import pandas as pd
import os
df = pd.DataFrame(bus_stops['value'])
df.to_csv(os.path.join(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA",'SG_bus_stops.csv'),index=False)

In [20]:
[v for v in bus_stops['value'] if v['BusStopCode']=="10009"]

[{'BusStopCode': '10009',
  'RoadName': 'Bt Merah Ctrl',
  'Description': 'Bt Merah Int',
  'Latitude': 1.28210155945393,
  'Longitude': 103.81722480263163}]

In [12]:
bus_routes = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/BusRoutes")
bus_routes


{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadataBusRoutes',
 'value': [{'ServiceNo': '10',
   'Operator': 'SBST',
   'Direction': 1,
   'StopSequence': 1,
   'BusStopCode': '75009',
   'Distance': 0,
   'WD_FirstBus': '0500',
   'WD_LastBus': '2300',
   'SAT_FirstBus': '0500',
   'SAT_LastBus': '2300',
   'SUN_FirstBus': '0500',
   'SUN_LastBus': '2300'},
  {'ServiceNo': '10',
   'Operator': 'SBST',
   'Direction': 1,
   'StopSequence': 2,
   'BusStopCode': '76059',
   'Distance': 0.6,
   'WD_FirstBus': '0502',
   'WD_LastBus': '2302',
   'SAT_FirstBus': '0502',
   'SAT_LastBus': '2302',
   'SUN_FirstBus': '0502',
   'SUN_LastBus': '2302'},
  {'ServiceNo': '10',
   'Operator': 'SBST',
   'Direction': 1,
   'StopSequence': 3,
   'BusStopCode': '76069',
   'Distance': 1.1,
   'WD_FirstBus': '0504',
   'WD_LastBus': '2304',
   'SAT_FirstBus': '0504',
   'SAT_LastBus': '2304',
   'SUN_FirstBus': '0503',
   'SUN_LastBus': '2304'},
  {'ServiceNo': '10',
   'Opera

In [13]:
bus_services = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/BusServices")
bus_services

# AM_Peak_Fre: Freq of dispatch for AM Peak 0630H - 0830H (range in minutes)
# AM_Offpeak_Freq: Freq of dispatch for AM Off-Peak 0831H - 1659H (range in minutes)
# PM_Peak_Freq: Freq of dispatch for PM Peak 1700H - 1900H (range in minutes)
# PM_Offpeak_Freq: Freq of dispatch for PM Off-Peak after 1900H (range in minutes)

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#BusServices',
 'value': [{'ServiceNo': '118',
   'Operator': 'GAS',
   'Direction': 1,
   'Category': 'TRUNK',
   'OriginCode': '65009',
   'DestinationCode': '97009',
   'AM_Peak_Freq': '5-08',
   'AM_Offpeak_Freq': '8-12',
   'PM_Peak_Freq': '8-10',
   'PM_Offpeak_Freq': '09-14',
   'LoopDesc': ''},
  {'ServiceNo': '118',
   'Operator': 'GAS',
   'Direction': 2,
   'Category': 'TRUNK',
   'OriginCode': '97009',
   'DestinationCode': '65009',
   'AM_Peak_Freq': '10-10',
   'AM_Offpeak_Freq': '8-11',
   'PM_Peak_Freq': '4-08',
   'PM_Offpeak_Freq': '9-12',
   'LoopDesc': ''},
  {'ServiceNo': '118A',
   'Operator': 'GAS',
   'Direction': 1,
   'Category': 'TRUNK',
   'OriginCode': '65009',
   'DestinationCode': '96119',
   'AM_Peak_Freq': '06-66',
   'AM_Offpeak_Freq': '-',
   'PM_Peak_Freq': '-',
   'PM_Offpeak_Freq': '-',
   'LoopDesc': ''},
  {'ServiceNo': '118B',
   'Operator': 'GAS',
   'Direction': 1,
  

In [3]:
passenger_vol_by_bus_stops = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/PV/Bus")
passenger_vol_by_bus_stops

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#FarecardBatch',
 'value': [{'Link': 'https://ltafarecard.s3.ap-southeast-1.amazonaws.com/202410/transport_node_bus_202410.zip?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEIj%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDmFwLXNvdXRoZWFzdC0xIkcwRQIhAJAlyvnd0AcK7OqYPRLd5V%2FvwCsWjne6G3t0MXXw3sx%2BAiBqKNP9nfUOkT5H2NfUCYW%2B5PpTzJslW602QWPrOyP%2BzSrCBQhBEAQaDDM0MDY0NTM4MTMwNCIMb%2BMcqgf9r%2BRy4hNJKp8FLjbsFCH7%2FhbliXuw3DaqmHC%2Fcv4TO3yy5RKzGGIcDFX3QtXEqf2%2FBUsWaSlySRggwlE9wVOPwanrT%2BwcbcJiBBMwAlmEkaDJyQGibS5LryTyXWtGCeIfrUReSGP9o6ojORIHYt9bfKZRHxjJlhfKFdfRlKy2U1ZkfSBnXA1%2BPjwwlnPhuh8afr69ds3HPqjWFObO1kjWO%2BGyypqVOpF27Zbgx0ZkdCBuOjx4jvXFYhODnTlsHMjv7WOnzq1y8LigY3%2FF1ZZ3DEPLoLTTu1%2FO8Q3yR5cPqtf8b2YatLGUXcop7FabhiJfd0Mb8aqJjwW88GEGN1Fd%2B%2FyqgTVGhpX8ADaXTYFcDdPx0CpZEGgrPrTgU%2FzFb2VOwOReUcCMTM8GeTcgrDnm3aNHKFyuycCT4%2F8nhhNKJMmVxCp%2FLb0Q1b1X0vQDOBhC0nCrJ3YC31HJnK0RF0K8nyLs7U4WS2KbfDgQ4PZA0ti4d7Pwmihwfs5OijaaTfbkTzuG4RJy7%2BXlAEe

In [7]:
traffic_flow = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/TrafficFlow")
traffic_flow

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#TrafficFlow',
 'value': [{'Link': 'https://dmprod-datasets.s3.ap-southeast-1.amazonaws.com/traffic-flow/data/trafficflow.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBYaDmFwLXNvdXRoZWFzdC0xIkcwRQIgOIpPwC2eF%2FokizyQoWR5tMyQvGYnssqBWOBEutbzwFgCIQDgib8SeiYapbWpvhokvkAsofT%2FyBtdFtnquAyN2Mc6IirLBQif%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAQaDDM0MDY0NTM4MTMwNCIMCQpU16684JtB9UnBKp8Ftday%2BXz%2FQGAsKy%2BFUxkHQrqx1HcDlc%2BcRrpeS2AmCCd5HBg%2FhO2R8cUzd4lGR35fGlWZTS7J1qZJWnvgdzl46uCTNq5311ymBkY5dbRth5jSQM9MmuYhyc8yeJb38XDpHqkm1g8U8xot1%2B8AnFKkOXpqpLHccmm9gxnvdO%2Bh5W3%2B%2Biaj2cEO9g30KQX6P4edXRmBt21HU43BDucintqcThlGsUKFsuS9hUqJaaE%2FS1idCmSLjCAk5QfF%2B5a7Oa2f3dB%2F%2FGszbyKik4ZwYjUfavn3DFU1PeDUqVyhrqfCk0uR56owoGmwiXOjAmnFQEip0bu7l3j2tW%2FtFIZcJ4mnd2z73kaoZYkK1ec3Rf9%2BbthbODXANsEgtig89wa28509VHyJPxlxKikaj0qg2etV9MYIcLcyaizKi%2F5kcIRPvvsOZth1rLa6HpLQ%2FIV7TJ9rbJJlphEO22kKlbfa1jscWzXPpx%2FhweK8UDqneQrqjh6Imj2J%2B2uYRs4H036AXQacD

In [3]:
passenger_vol_by_origin_destination = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/PV/ODBus")
passenger_vol_by_origin_destination

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#FarecardBatch',
 'value': [{'Link': 'https://ltafarecard.s3.ap-southeast-1.amazonaws.com/202412/origin_destination_bus_202412.zip?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEPv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDmFwLXNvdXRoZWFzdC0xIkcwRQIgUjwfdaxQVMg102A3ZFeYBCq9PtV89jWzJlg5Pbi8EvkCIQC%2FIg%2B4I7akgvD%2FDNxqF8Yj220I%2Bqnl4AITXdmgyVAQ%2FyrLBQjk%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAQaDDM0MDY0NTM4MTMwNCIM2bhaw7cBRWgmb%2BuvKp8F8RJQhxjgM8zdwP6DGrvFeXMM1YuKld6rbOMYok7OoXYQJCbMcuQCmVeNQ1qcrOQaLP1WldWP5kt955bEBqvyBvv0Q8UQvz0IeqxDa%2BM1O6f81KA3CvFh07sO7MczpQFTaSQKQSedyiC6Pv%2F%2Fbd8o2ag2E3A%2BP0HfxsRHOtw6go7gDBgW7jc13xQlypQQrkcdqTv44VZ%2BWZVsWixxMWx3qIexHRrpr34RzzNIBeEB%2BQwmlGJGEFgDeZVbxYBM3V0JWXzoDQ3vgUhl60Vi2E6eJSFOV7lHBoLyMb%2Booi8oxUj5mmi5A1KVMOt%2BdNhI%2BIzRWVoD5wgNms3OArMowLYF%2FX1PRZFwFk9mMxVcl40%2Bzu7%2BByRYZW3TzALxdAgz%2FrG2pT%2FY%2Bw5i70cGNHHQ6%2BTIZ4BaWJYePJioF1BqDpJdxcLjH%2F1z065yYc1%2B2%2BoiTIb4%2FvJfNAwZFJMTyHa9cbH

In [5]:
passenger_vol_by_train_stations = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/PV/Train")
passenger_vol_by_train_stations

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#FarecardBatch',
 'value': [{'Link': 'https://ltafarecard.s3.ap-southeast-1.amazonaws.com/202409/transport_node_train_202409.zip?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEND%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDmFwLXNvdXRoZWFzdC0xIkcwRQIgC9cBLc88e3ICbowXWwvtgIBSvbl27Ab6nWKZ51igGDICIQC9UrmbjUF1kSMpSa9Z3EO4G7YvGrpfWQk4wITCt2dq4SrCBQhZEAQaDDM0MDY0NTM4MTMwNCIMyuRSUDAZk6SdkqitKp8FGBpTcuxYdX50C2yNl564p%2FjHoxSL%2FEG5wsLxMTMdGgEtGemSFMMj2LmRtis2pVge%2BaVzVnmV51RZH8lA%2BLMiIPvAptfGYp9XqJjagW%2B2V%2Bp5HRoNMsSmy1VVphxtwARKDfT2RSwiubk49rbu8hrZ7LOalS0m0t2Z9f2RyMKkjYP4nFaY3M31UdNWxNCAgM0t4%2FCYP%2FDlfNJU67nTzbpvLiXw8H3oEGNP58myfi1pYtA4fOpBFuo9LxD7nI8XxR%2BVQvh%2B1VcvDSv630eemfWRiduZ9stE4jQv6DqJHTTE%2B%2Fy920b3pxLUMJE25nZ207jRb7fl8nwzQdOeODaRZIq2hmAM1s38ke7DPtrYOL2KT2jHM2tiz6TV7VSc3cGDBowV6uKO%2Fk3DX6LM2uVND7JTKXlbcpijmcnfg9qINPCvzt5eT2qSE8%2B29ch3b2%2F6xOx7XF4oadJXJXOL97FMtRatqxCmh7nQIYPWUDXjl5GzlffyGRDy3iZVNxCy7xVNmc7n6Ie1sgBdkfy

In [4]:
estimated_travel_times = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/EstTravelTimes")
estimated_travel_times

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#EstTravelTimes',
 'value': [{'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   'StartPoint': 'AYE/MCE INTERCHANGE',
   'EndPoint': 'TELOK BLANGAH RD',
   'EstTime': 2},
  {'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   'StartPoint': 'TELOK BLANGAH RD',
   'EndPoint': 'LOWER DELTA RD',
   'EstTime': 1},
  {'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   'StartPoint': 'LOWER DELTA RD',
   'EndPoint': 'NORMANTON PARK',
   'EstTime': 5},
  {'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   'StartPoint': 'NORMANTON PARK',
   'EndPoint': 'NORTH BUONA VISTA RD',
   'EstTime': 2},
  {'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   'StartPoint': 'NORTH BUONA VISTA RD',
   'EndPoint': 'CLEMENTI RD',
   'EstTime': 2},
  {'Name': 'AYE',
   'Direction': 1,
   'FarEndPoint': 'TUAS CHECKPOINT',
   '

In [7]:
pd.DataFrame(estimated_travel_times['value']).to_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\20250113_estimatedTravelTimes.csv",index=False)

In [8]:
travel_speed_bands = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/v3/TrafficSpeedBands")
travel_speed_bands

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#TrafficSpeedBands',
 'lastUpdatedTime': '2025-01-13 11:45:00',
 'value': [{'LinkID': '103000000',
   'RoadName': 'KENT ROAD',
   'RoadCategory': 'E',
   'SpeedBand': 4,
   'MinimumSpeed': '30',
   'MaximumSpeed': '39',
   'StartLon': '103.85298052044503',
   'StartLat': '1.3170142376560023',
   'EndLon': '103.85259882242372',
   'EndLat': '1.3166840028663076'},
  {'LinkID': '103000010',
   'RoadName': 'BUCKLEY ROAD',
   'RoadCategory': 'E',
   'SpeedBand': 8,
   'MinimumSpeed': '70',
   'MaximumSpeed': '999',
   'StartLon': '103.84102305136321',
   'StartLat': '1.3166507852203482',
   'EndLon': '103.84022564204443',
   'EndLat': '1.316912438354752'},
  {'LinkID': '103000011',
   'RoadName': 'BUCKLEY ROAD',
   'RoadCategory': 'E',
   'SpeedBand': 3,
   'MinimumSpeed': '20',
   'MaximumSpeed': '29',
   'StartLon': '103.84022564204443',
   'StartLat': '1.316912438354752',
   'EndLon': '103.84102305136321',
   'E

In [9]:
pd.DataFrame(travel_speed_bands['value']).to_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\TravelSpeedBands_20250113_11-45-00.csv",index=False)

In [12]:
GeospatialWholeIsland = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/GeospatialWholeIsland",
                                         params={'ID':'BusStopLocation'})
GeospatialWholeIsland

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#GeospatialWholeIsland',
 'value': [{'Link': 'https://dmgeospatial.s3.ap-southeast-1.amazonaws.com/BusStopLocation.zip?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBIaDmFwLXNvdXRoZWFzdC0xIkcwRQIgO7FqMGy0uhq6f2IwHw13Bvt1LJsJVMvYOSkX7e5X0QgCIQCLOhlilsZOakAyKx3GnUntzX18XAiI194hYwvJ3hGv9yrLBQj7%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAQaDDM0MDY0NTM4MTMwNCIMv0dh0ZdAtkIH9r1zKp8FtOVXjFemxdDMO%2BNjeq6%2BydP33eL9CYu5dMd4DSTOhydleNjsF09bX17kxCi%2BZKLT%2BPkWdsbzRwf1t3%2FJIDtjilIx2CGtRQPEEKYQJD9Kzh%2Bbb7GR7%2BqWx5ySsjN%2BebWBwASArOrVM2r7kkTyWHhLbQ5cUF8XwYx7vT%2Byhlc9k4keaFLLwmd66QV%2BHSIuUTKZectSha4YtQM7hcQH%2BnbEt%2B2eoQpMrxdoxF9J1IVMFMzy96Es0fQzEMb8yX8jkRzYUwGwe1DnaajGg0NUlMhH9u0%2BBsnOR6ME6PC8pTlKgza0hzHwzTgn0WnvaomDYS4tw4wwgPHKJg%2F%2BFatPiMeKx5mA4sdJLWgNi6nhczxxWCu4mynAL%2BowXf6p%2BJZJwaTlcXBKVRIg7e2dCkdqn9fNpL38QRbnETajJIO40uKnk%2BK6%2BxCzGCDNZiTCzIJspJjjXFI4lhV14dCbCNIlHHOJu8fOy6R7%2Bw3jlMVwNrcwqHyhDtU8994e2JJcLUELfxlkHpfFXyWDYh3

In [12]:
carparkAvailability = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/CarParkAvailabilityv2")
carparkAvailability

{'odata.metadata': 'http://datamall2.mytransport.sg/ltaodataservice/$metadata#CarParkAvailability',
 'value': [{'CarParkID': '1',
   'Area': 'Marina',
   'Development': 'Suntec City',
   'Location': '1.29375 103.85718',
   'AvailableLots': 1575,
   'LotType': 'C',
   'Agency': 'LTA'},
  {'CarParkID': '2',
   'Area': 'Marina',
   'Development': 'Marina Square',
   'Location': '1.29115 103.85728',
   'AvailableLots': 1229,
   'LotType': 'C',
   'Agency': 'LTA'},
  {'CarParkID': '3',
   'Area': 'Marina',
   'Development': 'Raffles City',
   'Location': '1.29382 103.85319',
   'AvailableLots': 464,
   'LotType': 'C',
   'Agency': 'LTA'},
  {'CarParkID': '4',
   'Area': 'Marina',
   'Development': 'The Esplanade',
   'Location': '1.29011 103.85561',
   'AvailableLots': 635,
   'LotType': 'C',
   'Agency': 'LTA'},
  {'CarParkID': '5',
   'Area': 'Marina',
   'Development': 'Millenia Singapore',
   'Location': '1.29251 103.86009',
   'AvailableLots': 653,
   'LotType': 'C',
   'Agency': 'LTA'

In [18]:
import time
from datetime import datetime

time_cum = 0
while time_cum < 24*3600:
    carparkAvailability = get_lta_response(url = "https://datamall2.mytransport.sg/ltaodataservice/CarParkAvailabilityv2")
    df = pd.DataFrame(carparkAvailability['value'])
    now = datetime.now()
    current_time = now.strftime("%Y-%m-%d %H-%M-%S")
    print("Current Time =", current_time)
    df.to_csv(os.path.join(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\CarParkAvailability",f'{current_time}.csv'),index=False)
    time.sleep(3600)
    time_cum += 3600
    

Current Time = 2024-11-11 17-55-23
Current Time = 2024-11-11 18-55-23
Current Time = 2024-11-11 19-55-24
Current Time = 2024-11-11 20-55-24
Current Time = 2024-11-11 21-55-24
Current Time = 2024-11-11 22-55-24
Current Time = 2024-11-11 23-55-24
Current Time = 2024-11-12 00-55-24
Current Time = 2024-11-12 01-55-25
Current Time = 2024-11-12 02-55-25
Current Time = 2024-11-12 03-55-25
Current Time = 2024-11-12 04-55-25
Current Time = 2024-11-12 05-55-25
Current Time = 2024-11-12 06-55-25
Current Time = 2024-11-12 07-55-25
Current Time = 2024-11-12 08-55-26
Current Time = 2024-11-12 09-55-26
Current Time = 2024-11-12 10-55-26
Current Time = 2024-11-12 11-55-26
Current Time = 2024-11-12 12-55-26
Current Time = 2024-11-12 13-55-26
Current Time = 2024-11-12 14-55-27
Current Time = 2024-11-12 15-55-27
Current Time = 2024-11-12 16-55-27


# GTFS Data

Current static GTFS obtained from [LTA](https://www.transit.land/feeds/f-w21z-lta)

In [27]:
import pandas as pd
import numpy as np
import math

## shapes.txt

In [24]:
gtfs_df = pd.read_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\gtfs-feed-lta\shapes.txt")
gtfs_df = gtfs_df.sort_values(by=['shape_id','shape_pt_sequence'])
gtfs_df_dict = {shape_id:df for shape_id, df in gtfs_df.groupby('shape_id', as_index = False)}
gtfs_df_dict

{'100:SAT:0_shape':               shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence
 12741  100:SAT:0_shape      1.350466    103.871690                  1
 12742  100:SAT:0_shape      1.346990    103.872055                  2
 12743  100:SAT:0_shape      1.344833    103.871092                  3
 12744  100:SAT:0_shape      1.341963    103.870969                  4
 12745  100:SAT:0_shape      1.338453    103.870849                  5
 12746  100:SAT:0_shape      1.337778    103.873611                  6
 12747  100:SAT:0_shape      1.334776    103.878312                  7
 12748  100:SAT:0_shape      1.332907    103.878715                  8
 12749  100:SAT:0_shape      1.329303    103.879756                  9
 12750  100:SAT:0_shape      1.326626    103.880544                 10
 12751  100:SAT:0_shape      1.323230    103.881457                 11
 12752  100:SAT:0_shape      1.319823    103.881947                 12
 12753  100:SAT:0_shape      1.316282    103.882022       

In [None]:
def planar_distance(lat1, lon1, lat2, lon2):
    # Conversion factors
    meters_per_degree_lat = 111320  # Approximate meters per degree of latitude
    meters_per_degree_lon = 111320 * math.cos(math.radians(lat1))  # Approximate meters per degree of longitude at given latitude

    # Calculate differences in coordinates
    delta_lat = lat2 - lat1
    delta_lon = lon2 - lon1

    # Convert differences to meters
    delta_lat_meters = delta_lat * meters_per_degree_lat
    delta_lon_meters = delta_lon * meters_per_degree_lon

    # Use Pythagorean theorem to calculate distance
    distance = math.sqrt(delta_lat_meters ** 2 + delta_lon_meters ** 2)
    return distance

def get_shape_dist_traveled(df):
    """ adds the shape_dist_traveled column to df - cummulative distance travelled
    Args:
        df (pd.DataFrame): GTFS dataframe of a unique shape_id. Must have lat and lon columns
    Returns:
        pd.DataFrame: with an appended column 'shape_dist_traveled' which describes the cummulative distance
    """
    coordinates = df.iloc[:,1:3].values
    # print(df.iloc[:,1:3])
    meters_per_degree_lat = 111320  # Approximate meters per degree of latitude
    # meters_per_degree_lon = 111320 * math.cos(math.radians(lat1))  # Approximate meters per degree of longitude at given latitude
    diff_coords = np.diff(coordinates,axis=0)
    distance_multiplier = np.ones(diff_coords.shape) # to perform matrix multiplication later
    distance_multiplier[:,0] = distance_multiplier[:,0]*meters_per_degree_lat
    # Approximate meters per degree of longitude at given latitude
    distance_multiplier[:,1] = distance_multiplier[:,1]*np.cos(np.radians(diff_coords[0,0]))*meters_per_degree_lat
    distance_metres = np.multiply(diff_coords,distance_multiplier)
    distance_metres = np.linalg.norm(distance_metres,axis=1)

    # calculate cummulative distance
    cum_dist = np.cumsum(distance_metres)
    cum_dist = [0] + cum_dist.tolist()
    # lin_dist = [0] + distance_metres.tolist()
    df['shape_dist_traveled'] = cum_dist
    return df

get_shape_dist_traveled(gtfs_df_dict[list(gtfs_df_dict)[-1]])

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
379,TE.1_shape,1.427145,103.794295,0,0.0
378,TE.1_shape,1.437388,103.787675,1,1357.663428
377,TE.1_shape,1.448646,103.785519,2,2633.678609


In [67]:
shape_dist_traveled = pd.concat([get_shape_dist_traveled(df) for df in gtfs_df_dict.values()])
shape_dist_traveled.to_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\gtfs-feed-lta\shapes1.txt",index=False)
shape_dist_traveled

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
12741,100:SAT:0_shape,1.350466,103.871690,1,0.000000
12742,100:SAT:0_shape,1.346990,103.872055,2,389.132061
12743,100:SAT:0_shape,1.344833,103.871092,3,652.071437
12744,100:SAT:0_shape,1.341963,103.870969,4,971.842723
12745,100:SAT:0_shape,1.338453,103.870849,5,1362.742185
...,...,...,...,...,...
375,TE.0_shape,1.437388,103.787675,2,1276.015180
376,TE.0_shape,1.427145,103.794295,3,2633.678607
379,TE.1_shape,1.427145,103.794295,0,0.000000
378,TE.1_shape,1.437388,103.787675,1,1357.663428


### stops.txt
[GTFS reference](https://gtfs.org/documentation/schedule/reference/#stopstxt)

In [71]:
gtfs_stops = pd.read_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\gtfs-feed-lta - Copy\stops.txt")
gtfs_stops['parent_station'] = ''
rearranged_columns = ['stop_id','stop_code','stop_name','stop_lat','stop_lon','stop_url','parent_station','wheelchair_boarding']
gtfs_stops = gtfs_stops[rearranged_columns]
gtfs_stops.to_csv(r"C:\Users\hypak\OneDrive - Singapore Management University\Documents\Data\SG_LTA\gtfs-feed-lta - Copy\stops1.txt")
gtfs_stops

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,parent_station,wheelchair_boarding
0,01012,01012,Hotel Grand Pacific,1.296848,103.852536,https://busrouter.sg/#/stops/01012,,1
1,01013,01013,St. Joseph's Ch,1.297710,103.853225,https://busrouter.sg/#/stops/01013,,1
2,01019,01019,Bras Basah Cplx,1.296990,103.853022,https://busrouter.sg/#/stops/01019,,1
3,01029,01029,Opp Natl Lib,1.296673,103.854414,https://busrouter.sg/#/stops/01029,,1
4,01039,01039,Bugis Cube,1.298208,103.855491,https://busrouter.sg/#/stops/01039,,1
...,...,...,...,...,...,...,...,...
5246,PW0,PW0,PUNGGOL,1.405255,103.902354,https://www.lta.gov.sg/content/ltagov/en/map/t...,,1
5247,SW0,SW0,SENGKANG,1.391330,103.895294,https://www.lta.gov.sg/content/ltagov/en/map/t...,,1
5248,SE0,SE0,SENGKANG,1.391330,103.895294,https://www.lta.gov.sg/content/ltagov/en/map/t...,,1
5249,CG0,CG0,TANAH MERAH,1.327262,103.946513,https://www.lta.gov.sg/content/ltagov/en/map/t...,,1


# Scrap bus service routes

- Some missing route information in the GTFS dataset

In [6]:
from bs4 import BeautifulSoup
import requests
import re

In [17]:

# URL of the webpage to scrape
url = "https://moovitapp.com/index/en-gb/public_transportation-line-67-Singapore_%E6%96%B0%E5%8A%A0%E5%9D%A1-1678-775181-589194-0"

def scrap_moovit_busRoutes(url):
    stop_list = dict()
    # Send an HTTP request to the website
    response = requests.get(url)
    response.text
    # Check if request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all <h3> elements inside the list
        stops = soup.select("ul.stops-list h3")
        
        # Extract and print the text from each <h3>
        for stop in stops:
            busName = stop.text.strip()
            m = re.findall(r'\((.*?)\)',busName)
            stop_list[m[0]] = busName
            # print(m)
            # stop_list.append()
    else:
        print("Failed to retrieve the webpage.")
    return stop_list

scrap_moovit_busRoutes(url)

{'75009': 'Tampines Ctrl 1 - Tampines Int (75009)',
 '76059': 'Tampines Ave 5 - Opp Our Tampines Hub (76059)',
 '76069': 'Tampines Ave 5 - Blk 147 (76069)',
 '75059': 'Tampines Ave 1 - Bef Tampines West Stn (75059)',
 '75069': 'Bedok Reservoir Rd - Blk 960a (75069)',
 '75349': 'Bedok Reservoir Rd - the Clearwater Condo (75349)',
 '84209': 'Bedok Nth Ave 3 - Bedok Resvr Stn Exit B (84209)',
 '84529': 'Bedok Nth Ave 3 - Blk 109 (84529)',
 '84219': 'Bedok Nth Ave 3 - Bet Blks 139/140 (84219)',
 '84199': 'Bedok Nth Ave 3 - Opp Blk 220 Cp (84199)',
 '84049': 'New Upp Changi Rd - Blk 27 (84049)',
 '84039': 'New Upp Changi Rd - Bedok Stn Exit A (84039)',
 '84029': 'New Upp Changi Rd - Opp Blk 32 (84029)',
 '84019': 'New Upp Changi Rd - Opp Chai Chee Ind Pk (84019)',
 '83099': 'Changi Rd - Aft Perpetual Succour CH (83099)',
 '83079': 'Changi Rd - Bef Siglap Plain (83079)',
 '83059': 'Changi Rd - Mjd Kassim (83059)',
 '83049': 'Changi Rd - Bef Lor 110 Changi (83049)',
 '83029': 'Changi Rd - Aft