# Get MTR stations geographical coordinates

In [1]:
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

Get geographical coordinates funtion (use GeoPy)

In [2]:
def get_geo_coordinates(address):
    geolocator = Nominatim(user_agent="mtr_agent")
    location = geolocator.geocode(address)
    if location == None:
        return (np.nan, np.nan)
    latitude = location.latitude
    longitude = location.longitude
    return (latitude, longitude)

Get MTR stations list from https://www.exploremetro.com/blog/hong-kong-mtr-station-names-in-cantonese-jyutping/

In [3]:
url = "https://www.exploremetro.com/blog/hong-kong-mtr-station-names-in-cantonese-jyutping/"
page_response = requests.get(url, timeout=5)
soup = BeautifulSoup(page_response.content, "html.parser")

Use BeautifulSoup library to get the list content

In [4]:
table = soup.table # the List of MTR Stations

In [5]:
table_body = table.tbody
table_body

<tbody>
<tr>
<th>English</th>
<th>Chinese</th>
<th>Jyutping</th>
</tr>
<tr>
<td>Admiralty</td>
<td>金鐘</td>
<td>gam1 zung1</td>
</tr>
<tr>
<td>Airport</td>
<td>機場</td>
<td>gei1 coeng4</td>
</tr>
<tr>
<td>AsiaWorld-Expo</td>
<td>博覽館</td>
<td>bok3 laam5 gun2</td>
</tr>
<tr>
<td>Austin</td>
<td>柯士甸</td>
<td>o1 si6 din1</td>
</tr>
<tr>
<td>Causeway Bay</td>
<td>銅鑼灣</td>
<td>tung4 lo4 waan1</td>
</tr>
<tr>
<td>Central</td>
<td>中環</td>
<td>zung1 waan4</td>
</tr>
<tr>
<td>Chai Wan</td>
<td>柴灣</td>
<td>caai4 waan1</td>
</tr>
<tr>
<td>Che Kung Temple</td>
<td>車公廟</td>
<td>ce1 gung1 miu6</td>
</tr>
<tr>
<td>Cheung Sha Wan</td>
<td>長沙灣</td>
<td>coeng4 saa1 waan1</td>
</tr>
<tr>
<td>Choi Hung</td>
<td>彩虹</td>
<td>coi2 hung4</td>
</tr>
<tr>
<td>City One</td>
<td>第一城</td>
<td>dai6 jat1 sing4</td>
</tr>
<tr>
<td>Diamond Hill</td>
<td>鑽石山</td>
<td>zyun3 sek6 saan1</td>
</tr>
<tr>
<td>Disneyland Resort</td>
<td>迪士尼</td>
<td>dik6 si6 nei4</td>
</tr>
<tr>
<td>East Tsim Sha Tsui</td>
<td>尖東</td>
<td>zim1 d

In [6]:
# we only need the English name of the stations
i = 0
mtr = []
for string in table_body.strings:
    if string != '\n':
        i = i % 3
        if i == 0:
            mtr.append(string)
        i = i + 1
mtr = mtr[1:]
mtr

['Admiralty',
 'Airport',
 'AsiaWorld-Expo',
 'Austin',
 'Causeway Bay',
 'Central',
 'Chai Wan',
 'Che Kung Temple',
 'Cheung Sha Wan',
 'Choi Hung',
 'City One',
 'Diamond Hill',
 'Disneyland Resort',
 'East Tsim Sha Tsui',
 'Fanling',
 'Fo Tan',
 'Fortress Hill',
 'Hang Hau',
 'Heng Fa Chuen',
 'Heng On',
 'Hong Kong',
 'Hung Hom',
 'Jordan',
 'Kam Sheung Road',
 'Kowloon',
 'Kowloon Bay',
 'Kowloon Tong',
 'Kwai Fong',
 'Kwai Hing',
 'Kwun Tong',
 'Lai Chi Kok',
 'Lai King',
 'Lam Tin',
 'Lo Wu',
 'LOHAS Park',
 'Lok Fu',
 'Lok Ma Chau',
 'Long Ping',
 'Ma On Shan',
 'Mei Foo',
 'Mong Kok',
 'Mong Kok East',
 'Nam Cheong',
 'Ngau Tau Kok',
 'North Point',
 'Olympic',
 'Po Lam',
 'Prince Edward',
 'Quarry Bay',
 'Racecourse',
 'Sai Wan Ho',
 'Sha Tin',
 'Sha Tin Wai',
 'Sham Shui Po',
 'Shau Kei Wan',
 'Shek Kip Mei',
 'Shek Mun',
 'Sheung Shui',
 'Sheung Wan',
 'Siu Hong',
 'Sunny Bay',
 'Tai Koo',
 'Tai Po Market',
 'Tai Shui Hang',
 'Tai Wai',
 'Tai Wo',
 'Tai Wo Hau',
 'Tin Hau'

In [7]:
lat_list = []
long_list = []

for i, row in enumerate(mtr):
    addr = row + ', Hong Kong, China' # convert all addresses into coordinates data
    lat, long = get_geo_coordinates(addr)
    lat_list.append(lat)
    long_list.append(long)

# create a dataframe of MTR stations locations
mtr_df = pd.DataFrame(data=mtr, columns=['Name'])
mtr_df['Latitude'] = lat_list
mtr_df['Longitude'] = long_list
mtr_df

Unnamed: 0,Name,Latitude,Longitude
0,Admiralty,22.278921,114.164498
1,Airport,22.316139,113.936445
2,AsiaWorld-Expo,22.321266,113.943025
3,Austin,22.305307,114.16662
4,Causeway Bay,22.280208,114.184841
5,Central,22.285239,114.150679
6,Chai Wan,22.265607,114.237964
7,Che Kung Temple,22.374746,114.186186
8,Cheung Sha Wan,22.233079,114.004457
9,Choi Hung,22.334905,114.209141


In [8]:
mtr_df.to_excel('mtr.xlsx') # export the dataform to an excel file for future use