# PART 1

### Import libraries

In [1]:
!pip install geocoder
import pandas as pd
import geocoder # import geocoder



### Build a dataframe of postalcode of Canada

In [39]:
url = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050'
data_wiki = pd.read_html(url)

In [40]:
df = data_wiki[0]
df.rename(columns = {'Postcode':'PostalCode'}, inplace=True)
df.shape

(287, 3)

In [41]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Clean data

In [42]:
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True) # Remove row which is not assigned

In [43]:
df['Borough'].value_counts() # Count Borough

Etobicoke           45
North York          38
Downtown Toronto    37
Scarborough         37
Central Toronto     17
West Toronto        13
York                 9
East Toronto         7
East York            6
Mississauga          1
Name: Borough, dtype: int64

In [44]:
df.loc[df.Neighbourhood == 'Not assigned'] # Check if any Neighbourhood is not assigned

Unnamed: 0,PostalCode,Borough,Neighbourhood


In [45]:
df_joined = df.groupby('PostalCode', as_index=False).agg({'Borough': pd.Series.unique, 'Neighbourhood': ', '.join})

In [46]:
df_joined.loc[df_joined.PostalCode == 'M9V'] # Check the duplicate of PostalCode of M9V in df_joined

Unnamed: 0,PostalCode,Borough,Neighbourhood
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [47]:
df_joined.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [48]:
df_joined.shape

(103, 3)

### end of part 1

# PART 2

In [49]:
postalcode_df = pd.DataFrame(data = df_joined) # Create a new data frame for part 2

In [50]:
!wget -q -O 'geo_data.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [51]:
geo_df = pd.read_csv('geo_data.csv')

In [52]:
geo_df.rename(columns = {'Postal Code':'PostalCode'},inplace=True)
geo_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [57]:
# Merge 2 data frame based on PostalCode column
postalcode_df = pd.merge(df_joined,geo_df[['PostalCode','Latitude', 'Longitude']],on='PostalCode') 
postalcode_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [58]:
postalcode_df.shape

(103, 5)

### end of part 2

# PART 3

In [80]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    openssl-1.1.1e             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                       

In [74]:
# filter all the Borough containing Toronto
toronto_df = postalcode_df[postalcode_df['Borough'].str.contains('Toronto')] 

In [79]:
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [84]:
map_toronto = folium.Map(location=[43.653225, -79.383186], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### end of part 3