In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
def sent_trim(string):
    string = re.sub(r'\r', ' ', string)
    string = re.sub(r'\n', ' ', string)
    string = re.sub(r'[-_]+', ' ', string)
    string = re.sub(r' +', ' ', string)
    string = re.sub(r'^ *([oV] )', ' ', string)
    string = re.sub(r'^[^\w.,!?%:/()&@;-]+', '', string)
    string = re.sub(r'[^\w.,!?%:/()）。&@;]+$', '', string)
    return string

In [3]:
html = requests.get('https://en.wikipedia.org/wiki/List_of_MTR_stations')

In [4]:
soup = BeautifulSoup(html.text, 'html5lib')

In [5]:
# remove some extra explanatory text under some stations, e.g. formerly...
for small in soup.find_all("small"): 
    small.decompose()

In [6]:
tables = soup.find_all('table', class_='wikitable sortable')

In [7]:
mtr_stations_df_list = pd.read_html(str(tables))
mtr_stations_df = pd.concat(mtr_stations_df_list)
mtr_stations_df = mtr_stations_df[['District', 'Name']].drop_duplicates().sort_values('District').reset_index(drop=True)
mtr_stations_df.columns = ['District', 'Station']

In [8]:
mtr_stations_df

Unnamed: 0,District,Station
0,Central and Western,Hong Kong
1,Central and Western,Admiralty
2,Central and Western,Central
3,Central and Western,Kennedy Town
4,Central and Western,HKU
5,Central and Western,Sai Ying Pun
6,Central and Western,Sheung Wan
7,Eastern,Tin Hau
8,Eastern,North Point
9,Eastern,Quarry Bay


In [9]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="tor_explorer")

In [10]:
def get_lat_long(address):
    location = geolocator.geocode(f'{address}, HK')
    try:
        return location.latitude, location.longitude
    except AttributeError:
        return 0, 0

In [11]:
def append_lat_long(series):
    lat, long = get_lat_long(f"{series['Station']} Station")
    series['Latitude'] = lat
    series['Longitude'] = long
    return series

In [12]:
mtr_stations_df = mtr_stations_df.apply(append_lat_long, axis=1)

In [22]:
mtr_stations_df = mtr_stations_df[mtr_stations_df['Latitude'] != 0]

In [23]:
mtr_stations_df

Unnamed: 0,District,Station,Latitude,Longitude
0,Central and Western,Hong Kong,22.279328,114.162813
1,Central and Western,Admiralty,22.278381,114.165013
2,Central and Western,Central,22.350627,114.184916
3,Central and Western,Kennedy Town,22.281363,114.127832
4,Central and Western,HKU,22.283976,114.135507
5,Central and Western,Sai Ying Pun,22.286121,114.142086
6,Central and Western,Sheung Wan,22.285899,114.153707
7,Eastern,Tin Hau,22.282709,114.191492
9,Eastern,Quarry Bay,22.287754,114.214932
10,Eastern,Fortress Hill,22.288222,114.193682


In [24]:
mtr_stations_df.to_csv('stations.csv', index=False)

In [14]:
import folium

In [15]:
get_lat_long('Hong Kong')

(22.2793278, 114.1628131)

In [16]:
map_hk = folium.Map(location=get_lat_long('Hong Kong'), zoom_start=10.5)

# add markers to map
for lat, lng, district, station in zip(mtr_stations_df['Latitude'], mtr_stations_df['Longitude'], mtr_stations_df['District'], mtr_stations_df['Station']):
    label = '{}, {}'.format(station, district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_hk)

map_hk