In [1]:
# Only run this cell of codes if you have not installed the geopy and folium libraries
# folium can be used in replacement of geopandas
# folium documentation can be found at https://python-visualization.github.io/folium/quickstart.html

# ! pip install geopy
# ! pip install folium

# Class Activity - Geographical Clusters Part 1

Datasets are rarely complete and often require pre-processing. Imagine some datasets have only an address column without latitude and longitude columns to represent your data geographically. In that case, you need to convert your data into a geographic format. The process of converting addresses to geographic information — Latitude and Longitude — to map their locations is called Geocoding.

Geocoding is the computational process of transforming a physical address description to a location on the Earth’s surface (spatial representation in numerical coordinates).

## In this activity, you are required to: 
1. Review the sample code below on performing geocoding in Python by using Geopy Library
2. Adapt the sample code to create a map visualisation of existing hawker centres in Singapore

In [2]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt
import folium
from folium.plugins import FastMarkerCluster

### Geocoding Single Address 

In [3]:
locator = Nominatim(user_agent="myGeocoder")
location = locator.geocode("lau pa sat, singapore")

In [4]:
print(location.address)
print("Latitude = {}, Longitude = {}".format(location.latitude, location.longitude))

Lau Pa Sat, 18, Raffles Quay, Golden Shoe, Downtown Core, Singapore, Central, 048582, Singapore
Latitude = 1.28061665, Longitude = 103.85048020335128


### Geocoding with Pandas DataFrame

In [5]:
df = pd.read_csv("addresses.csv")
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44


In [6]:
df['ADDRESS'] = df['Address1'].astype(str) + ',' + \
                df['Address3'] + ',' + \
                df['Address4'] + ',' + \
                df['Address5'] + ',' + ' Sweden'   

df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,ADDRESS
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län, ..."
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,..."
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms l..."
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län,..."
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms ..."


In [7]:
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
df['location'] = df['ADDRESS'].apply(geocode)
df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Odengatan 92,113 22,STOCKHOLM,Stockholms län, Sweden',), **{}).
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 445, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "C:\Users\User\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 440, in _make_request
    httplib_response = conn.getresponse()
  File "C:\Users\User\anaconda3\lib\http\client.py", line 1347, in getresponse
    response.begin()
  File "C:\Users\User\anaconda3\lib\http\client.py", line 307, in begin
    version, status, reason = self._read_status()
  File "C:\Users\User\anaconda3\lib\http\client.py", line 268, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "C:\Users\User\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "C:\Users\User\anaconda3\lib\ssl.py", l

In [8]:
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,ADDRESS,location,point
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län, ...","(13, Karlaplan, Östermalm, Östermalms stadsdel...","(59.3388914, 18.0909335, 0.0)"
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,...","(47, Nybrogatan, Villastaden, Östermalm, Öster...","(59.3372072, 18.0790982, 0.0)"
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms l...","(Karlavägen, Östermalm, Östermalms stadsdelsom...","(59.3358717, 18.0995103, 0.0)"
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län,...","(Hötorgshallen, Hötorget, Klara, Norrmalm, Nor...","(59.3343273, 18.0626039, 0.0)"
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms ...","(Drottninggatan, Klara, Norrmalm, Norrmalms st...","(59.3336068, 18.0616879, 0.0)"


In [9]:
df['point'][0][0]

59.3388914

In [10]:
# split point column into latitude, longitude and altitude columns
df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,ADDRESS,location,point,latitude,longitude,altitude
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län, ...","(13, Karlaplan, Östermalm, Östermalms stadsdel...","(59.3388914, 18.0909335, 0.0)",59.338891,18.090933,0.0
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,...","(47, Nybrogatan, Villastaden, Östermalm, Öster...","(59.3372072, 18.0790982, 0.0)",59.337207,18.079098,0.0
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms l...","(Karlavägen, Östermalm, Östermalms stadsdelsom...","(59.3358717, 18.0995103, 0.0)",59.335872,18.09951,0.0
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län,...","(Hötorgshallen, Hötorget, Klara, Norrmalm, Nor...","(59.3343273, 18.0626039, 0.0)",59.334327,18.062604,0.0
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms ...","(Drottninggatan, Klara, Norrmalm, Norrmalms st...","(59.3336068, 18.0616879, 0.0)",59.333607,18.061688,0.0


In [11]:
df.columns

Index(['Typ', 'Nr', 'Namn', 'Address1', 'Address3', 'Address4', 'Address5',
       'Telefon', 'ADDRESS', 'location', 'point', 'latitude', 'longitude',
       'altitude'],
      dtype='object')

In [12]:
df = df.drop(['Address1', 'Address3', 'Address4', 'Address5','Telefon', 'ADDRESS', 'location', 'point'], axis=1)

In [13]:
df.head()

Unnamed: 0,Typ,Nr,Namn,latitude,longitude,altitude
0,Butik,102,Fältöversten,59.338891,18.090933,0.0
1,Butik,104,,59.337207,18.079098,0.0
2,Butik,106,Garnisonen,59.335872,18.09951,0.0
3,Butik,110,,59.334327,18.062604,0.0
4,Butik,113,Sergel,59.333607,18.061688,0.0


In [14]:
df.latitude.isnull().sum()

2

In [15]:
df = df[pd.notnull(df["latitude"])]

In [16]:
map1 = folium.Map(
    location=[59.338315,18.089960],
    tiles='cartodbpositron',
    zoom_start=12,
)

In [17]:
df.apply(lambda row:folium.CircleMarker(location=[row["latitude"], row["longitude"]]).add_to(map1), axis=1)
map1

In [18]:
map1.save("map.html")