# Table of Content
01. Import Libraries
02. Import Data
03. Get Coordinates of Train Stations from OneMap API
04. Get Missing Coordinates Manually
05. Export Data

# 01. Import Libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os
import asyncio     # the library needed for asynchronous operation
import nest_asyncio     # the library that allows nested use of the asyncio event loop, making asyncio compatible with Jupyter notebooks
import aiohttp     # the library that allows asychronous API calls
import time        # time library is used to handle time-related tasks

In [2]:
# Allow nested asyncio calls in Jupyter notebooks
nest_asyncio.apply()

In [3]:
# Adjust setting to allow seeing all the rows in the output of this notebook
pd.options.display.max_rows = None

# 02. Import Data

In [4]:
# Define the project file path
path = r'C:\Users\saich\Desktop\CareerFoundry\Data Immersion\Achievement 6 Advanced Analytics & Dashboard Design\11-2023 HDB Flat Resale Analysis'

In [5]:
# Import 'train_station (checked).csv' from 'Prepared Data' folder

train_station = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'train_station (checked).csv'), index_col = 0)

In [6]:
train_station.head()

Unnamed: 0,station_code,station_name,line,opening,closure
0,NS1,Jurong East,North-South Line,1990-03-10,
1,NS2,Bukit Batok,North-South Line,1990-03-10,
2,NS3,Bukit Gombak,North-South Line,1990-03-10,
3,NS4,Choa Chu Kang,North-South Line,1990-03-10,
4,NS5,Yew Tee,North-South Line,1996-02-10,


In [7]:
train_station.shape

(204, 5)

# 03. Get Coordinates from OneMap API

### 03.1 Create a column of station name with code for more accurate search

In [8]:
# Create a new column called 'station_name_w_code' that stores the concatenated value as below
train_station['station_name_w_code'] = train_station['station_name'] + ' MRT Station (' + train_station['station_code'] + ')'

In [9]:
# For the 3 LRT lines, their values in 'station_name_w_code' are changed to LRT station instead of MRT station
train_station.loc[train_station['line'].isin(['Bukit Panjang LRT', 'Sengkang LRT', 'Punggol LRT']), 
                  'station_name_w_code'] = train_station['station_name'] + ' LRT Station (' + train_station['station_code'] + ')'

In [10]:
train_station

Unnamed: 0,station_code,station_name,line,opening,closure,station_name_w_code
0,NS1,Jurong East,North-South Line,1990-03-10,,Jurong East MRT Station (NS1)
1,NS2,Bukit Batok,North-South Line,1990-03-10,,Bukit Batok MRT Station (NS2)
2,NS3,Bukit Gombak,North-South Line,1990-03-10,,Bukit Gombak MRT Station (NS3)
3,NS4,Choa Chu Kang,North-South Line,1990-03-10,,Choa Chu Kang MRT Station (NS4)
4,NS5,Yew Tee,North-South Line,1996-02-10,,Yew Tee MRT Station (NS5)
5,NS7,Kranji,North-South Line,1996-02-10,,Kranji MRT Station (NS7)
6,NS8,Marsiling,North-South Line,1996-02-10,,Marsiling MRT Station (NS8)
7,NS9,Woodlands,North-South Line,1996-02-10,,Woodlands MRT Station (NS9)
8,NS10,Admiralty,North-South Line,1996-02-10,,Admiralty MRT Station (NS10)
9,NS11,Sembawang,North-South Line,1996-02-10,,Sembawang MRT Station (NS11)


### 03.2 Using aiohttp, get the coordinates data of train stations from OneMap API 

In [11]:
# Get the list of train stations in the dataset 
train_station_list = train_station['station_name_w_code'].tolist()

In [12]:
train_station_list

['Jurong East MRT Station (NS1)',
 'Bukit Batok MRT Station (NS2)',
 'Bukit Gombak MRT Station (NS3)',
 'Choa Chu Kang MRT Station (NS4)',
 'Yew Tee MRT Station (NS5)',
 'Kranji MRT Station (NS7)',
 'Marsiling MRT Station (NS8)',
 'Woodlands MRT Station (NS9)',
 'Admiralty MRT Station (NS10)',
 'Sembawang MRT Station (NS11)',
 'Canberra MRT Station (NS12)',
 'Yishun MRT Station (NS13)',
 'Khatib MRT Station (NS14)',
 'Yio Chu Kang MRT Station (NS15)',
 'Ang Mo Kio MRT Station (NS16)',
 'Bishan MRT Station (NS17)',
 'Braddell MRT Station (NS18)',
 'Toa Payoh MRT Station (NS19)',
 'Novena MRT Station (NS20)',
 'Newton MRT Station (NS21)',
 'Orchard MRT Station (NS22)',
 'Somerset MRT Station (NS23)',
 'Dhoby Ghaut MRT Station (NS24)',
 'City Hall MRT Station (NS25)',
 'Raffles Place MRT Station (NS26)',
 'Marina Bay MRT Station (NS27)',
 'Marina South Pier MRT Station (NS28)',
 'Pasir Ris MRT Station (EW1)',
 'Tampines MRT Station (EW2)',
 'Simei MRT Station (EW3)',
 'Tanah Merah MRT Sta

In [13]:
# The URL that connects with OneMap API
url = 'https://onemap.sg/api/common/elastic/search?searchVal={}&returnGeom=Y&getAddrDetails=Y&pageNum=1'

# 'start' here indicates the starting time of the retrieval process
# The time.time() function returns the number of seconds passed since epoch (the point where time begins)
start = time.time()

# Define a coroutine with async def
async def get_coordinates(session, station, count): 
    
    # Using the session created, connect with OneMap API with get() function to get the information from the API
    # await is added as we need to wait the call to complete in the event loop and return a response, else it won't do anything
    response = await session.get(url.format(station))
    
    # Parse the JSON response using .json() method 
    # await is addded since the response is an awaitable object
    data = await response.json()
    
    if data['found'] > 0:     # If the number of items found is not zero
        lat = data['results'][0]['LATITUDE']
        long = data['results'][0]['LONGITUDE']
    else:                     # If the number of items found is zero
        lat = 'Not found'
        long = 'Not found'
    print('{}) {}, {}, {}'.format(count, station, lat, long))
    
    # Return the station and the coordinates retrieved
    return station, lat, long
    
# Since inside the main() function involve coroutines, add 'async' in front of 'def'
# This tells Python that this function can do asynchronous stuff
async def main(): 
    
    count = 1
    tasks = []
    
    # A client session is created to allow the underlying TCP connection to be reused for all requests
    # Using context manager 'with' statement will close the client session automatically when all requests are done
    # Since inside this session involve coroutines, add 'async' in front of 'with'
    async with aiohttp.ClientSession() as session: 
        
        for station in train_station_list: 
            
            # For each station, create a task of running get_coordinates coroutine using asyncio.create_task()
            # A task will be executed asap whenever there is a stall (await) ongoing
            # When the task is stalled, it will go back to the main caller and run other stuff
            # When the main caller stuff is stalled, it will come back to this task and run the remaining stuff
            # So in our case, while waiting for a response to return, it would start other tasks (making other API calls)
            # Remember to pass the session into get_coordinates coroutine so that a single session is used for all requests
            # Create a list of tasks using .append method
            tasks.append(asyncio.create_task(get_coordinates(session, station, count)))
            count += 1
        
        # After creating a list of tasks, gather all the tasks created using asyncio.gather()
        # Since the tasks is inside a list, use *tasks to unpack the tasks inside the list
        # await is added to execute all the task and wait them to finish
        # Store the returned values in variable 'results'
        results = await asyncio.gather(*tasks)
    
    fail = 0
    # The result of asyncio.gather(*tasks) is a list of tuples
    # Each tuple contains a station name, its latitude and longitude returned by get_coordinates coroutine
    for result_tuple in results: 
        
        station, lat, long = result_tuple
        train_station.loc[train_station['station_name_w_code'] == station, ['latitude', 'longitude']] = lat, long
        
        if lat == 'Not found': 
            fail += 1
    
    print('A total of {} failed search'.format(fail))

# Run the event loop (the loop of an asynchronous operation) by running main()
asyncio.run(main())

# 'end' here indicates the ending time of the retrieval process
end = time.time()
total_time = end - start
print('Total {} seconds used'.format(total_time))

23) Dhoby Ghaut MRT Station (NS24), 1.29870132536119, 103.846113677951
3) Bukit Gombak MRT Station (NS3), 1.35861159094192, 103.751790910733
28) Pasir Ris MRT Station (EW1), 1.37304331635804, 103.949284527763
72) Woodleigh MRT Station (NE11), 1.33919004519388, 103.87081830915
8) Woodlands MRT Station (NS9), 1.43681962961519, 103.786066799253
76) Buangkok MRT Station (NE15), 1.38287001971672, 103.893122569706
106) HarbourFront MRT Station (CC29), 1.26538938374901, 103.821530157095
104) Labrador Park MRT Station (CC27), 1.27225417749656, 103.802631578766
101) Kent Ridge MRT Station (CC24), 1.29353349887123, 103.784572738173
107) Bayfront MRT Station (CE1), 1.28187378879209, 103.859079764874
108) Marina Bay MRT Station (CE2), 1.27625146423743, 103.855447156279
16) Bishan MRT Station (NS17), 1.35083898784737, 103.848143964542
19) Novena MRT Station (NS20), 1.32044079120154, 103.843825618748
84) Stadium MRT Station (CC6), 1.3028124684707, 103.875337711089
43) Outram Park MRT Station (EW16),

184) Farmway LRT Station (SW2), 1.39717019591821, 103.889304824196
164) South View LRT Station (BP2), 1.38029828742399, 103.745291799824
171) Bangkit LRT Station (BP9), 1.38002223010088, 103.772647370452
199) Sam Kee LRT Station (PW1), 1.40961268528804, 103.904831550608
176) Ten Mile Junction LRT Station (BP14), Not found, Not found
201) Samudera LRT Station (PW4), 1.41590172160383, 103.902156317163
202) Nibong LRT Station (PW5), 1.41185447744238, 103.900339452055
203) Sumang LRT Station (PW6), 1.40845241943079, 103.898558450395
183) Cheng Lim LRT Station (SW1), 1.39627763344324, 103.893797181731
178) Compassvale LRT Station (SE1), 1.39449304450373, 103.900492450944
150) Upper Thomson MRT Station (TE8), 1.35441643365401, 103.832898468504
165) Keat Hong LRT Station (BP3), 1.3786032506373, 103.749055668925
162) Gardens by the Bay MRT Station (TE22), 1.27848725876303, 103.867454664421
191) Punggol LRT Station (PTC), 1.40519470149606, 103.902411911915
34) Eunos MRT Station (EW7), 1.3197835

A total of 63.6 seconds are used. <br>
There is 1 station failed to get its coordinates.

In [14]:
# Check which station has failed to get its coordinates
train_station.loc[train_station['latitude'] == 'Not found']['station_name_w_code']

175    Ten Mile Junction LRT Station (BP14)
Name: station_name_w_code, dtype: object

In [15]:
train_station

Unnamed: 0,station_code,station_name,line,opening,closure,station_name_w_code,latitude,longitude
0,NS1,Jurong East,North-South Line,1990-03-10,,Jurong East MRT Station (NS1),1.33315281585758,103.742286332403
1,NS2,Bukit Batok,North-South Line,1990-03-10,,Bukit Batok MRT Station (NS2),1.34903331201636,103.749566478309
2,NS3,Bukit Gombak,North-South Line,1990-03-10,,Bukit Gombak MRT Station (NS3),1.35861159094192,103.751790910733
3,NS4,Choa Chu Kang,North-South Line,1990-03-10,,Choa Chu Kang MRT Station (NS4),1.38536316540225,103.744370779756
4,NS5,Yew Tee,North-South Line,1996-02-10,,Yew Tee MRT Station (NS5),1.39753506936297,103.747405150236
5,NS7,Kranji,North-South Line,1996-02-10,,Kranji MRT Station (NS7),1.42508698073648,103.762137459497
6,NS8,Marsiling,North-South Line,1996-02-10,,Marsiling MRT Station (NS8),1.43252114855026,103.774074641403
7,NS9,Woodlands,North-South Line,1996-02-10,,Woodlands MRT Station (NS9),1.43681962961519,103.786066799253
8,NS10,Admiralty,North-South Line,1996-02-10,,Admiralty MRT Station (NS10),1.44058856161847,103.800990519771
9,NS11,Sembawang,North-South Line,1996-02-10,,Sembawang MRT Station (NS11),1.44905082158502,103.820046140211


# 04. Get Missing Coordinates Manually

As mentioned earlier, Ten Mile Junction LRT Station (BP14) has failed to get its coordinates from OneMap API. For this reason, we are going to obtain its coordinates manually from Google Map and assign to it. 

Ten Mile Junction LRT Station (BP14) <br>
Latitude: 1.381846714608845, Longitude: 103.76047814603771

In [16]:
train_station.loc[train_station['station_name_w_code'] == 'Ten Mile Junction LRT Station (BP14)', 'latitude'] = 1.381846714608845
train_station.loc[train_station['station_name_w_code'] == 'Ten Mile Junction LRT Station (BP14)', 'longitude'] = 103.76047814603771

In [17]:
# Verify the changes
train_station

Unnamed: 0,station_code,station_name,line,opening,closure,station_name_w_code,latitude,longitude
0,NS1,Jurong East,North-South Line,1990-03-10,,Jurong East MRT Station (NS1),1.33315281585758,103.742286332403
1,NS2,Bukit Batok,North-South Line,1990-03-10,,Bukit Batok MRT Station (NS2),1.34903331201636,103.749566478309
2,NS3,Bukit Gombak,North-South Line,1990-03-10,,Bukit Gombak MRT Station (NS3),1.35861159094192,103.751790910733
3,NS4,Choa Chu Kang,North-South Line,1990-03-10,,Choa Chu Kang MRT Station (NS4),1.38536316540225,103.744370779756
4,NS5,Yew Tee,North-South Line,1996-02-10,,Yew Tee MRT Station (NS5),1.39753506936297,103.747405150236
5,NS7,Kranji,North-South Line,1996-02-10,,Kranji MRT Station (NS7),1.42508698073648,103.762137459497
6,NS8,Marsiling,North-South Line,1996-02-10,,Marsiling MRT Station (NS8),1.43252114855026,103.774074641403
7,NS9,Woodlands,North-South Line,1996-02-10,,Woodlands MRT Station (NS9),1.43681962961519,103.786066799253
8,NS10,Admiralty,North-South Line,1996-02-10,,Admiralty MRT Station (NS10),1.44058856161847,103.800990519771
9,NS11,Sembawang,North-South Line,1996-02-10,,Sembawang MRT Station (NS11),1.44905082158502,103.820046140211


In [18]:
train_station.shape

(204, 8)

# 05. Export Data

In [19]:
# Export data to 'Prepared Data' folder in csv format
train_station.to_csv(os.path.join(path, '02 Data', 'Prepared Data', 'train_station_w_coord.csv'))