# **Part 1**

## First, I need to import something

In [0]:
import pandas as pd
import numpy as np

## Read CSV file 
I downloaded it and import to this path: '/content/PostCode_Canada.csv'

In [448]:
df = pd.read_csv('/content/PostCode_Canada.csv')
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M9Z,Not assigned,Not assigned
1,M9Y,Not assigned,Not assigned
2,M9X,Not assigned,Not assigned
3,M9W,Etobicoke,Northwest
4,M9V,Etobicoke,Albion Gardens


##Remove *Not assigned* value in **Borough**


In [449]:
df["Borough"].replace("Not assigned", np.nan, inplace=True)
df.dropna(subset=["Borough"], axis=0, inplace=True)
df.index = range(len(df.index))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M9W,Etobicoke,Northwest
1,M9V,Etobicoke,Albion Gardens
2,M9V,Etobicoke,Beaumond Heights
3,M9V,Etobicoke,Humbergate
4,M9V,Etobicoke,Jamestown


## Next, *Not assigned* in **Neighbourhood** is filled by value of **Borough**

In [0]:
for i, row in df.iterrows():
  if df.loc[i]["Neighbourhood"] == "Not assigned":
    df.loc[i]["Neighbourhood"] = df.loc[i]["Borough"]

##Combinate all same Postcode values.

In [0]:
pre_i = 0
for i, row in df.iterrows():
  if i == 0:
    continue
  if df.loc[pre_i]["Postcode"] == row["Postcode"]:
    df.loc[pre_i]["Neighbourhood"] += ", " + row["Neighbourhood"]
    df.drop(labels=i, axis=0, inplace=True)
  else:
    pre_i = i

## Finally, reindex df and show it


In [452]:
df.index = range(len(df.index))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M9W,Etobicoke,Northwest
1,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."
2,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
3,M9P,Etobicoke,Westmount
4,M9N,York,Weston


In [453]:
df.shape

(103, 3)

#**Part 2**
Because it is too long for waiting response from geocoder. So I use Geospatial_Coordinated.csv file.

In [454]:
geo_df = pd.read_csv('/content/Geospatial_Coordinates.csv')
geo_df.rename(columns={"Postal Code" : "Postcode"}, inplace=True)
geo_df.set_index(keys="Postcode", inplace=True)
geo_df.head()

Unnamed: 0_level_0,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [455]:
df["Latitude"] = np.zeros(df.shape[0])
df["Longitude"] = np.zeros(df.shape[0])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M9W,Etobicoke,Northwest,0.0,0.0
1,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0
2,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",0.0,0.0
3,M9P,Etobicoke,Westmount,0.0,0.0
4,M9N,York,Weston,0.0,0.0


In [0]:
for i, row in df.iterrows():
  postcode = row["Postcode"]
  df.loc[i, "Latitude"] = geo_df.loc[postcode, "Latitude"]
  df.loc[i, "Longitude"] = geo_df.loc[postcode, "Longitude"]

In [457]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M9W,Etobicoke,Northwest,43.706748,-79.594054
1,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
2,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9N,York,Weston,43.706876,-79.518188


# **Part 3**

In [458]:
!pip install folium==0.5.0
!pip install geopy
import folium
from geopy.geocoders import Nominatim



Start Clustering

In [491]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=8).fit(df[["Latitude", "Longitude"]])
df["cluster"] = kmeans.labels_
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,cluster,color
0,M9W,Etobicoke,Northwest,43.706748,-79.594054,7,black
1,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437,4,green
2,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724,7,black
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,4,green
4,M9N,York,Weston,43.706876,-79.518188,4,green


In [492]:
# make color for each cluster
colors = ["red", "orange", "yellow", "green", "blue", "violet", "white", "black"]
for i, row in df.iterrows():
  df.loc[i, "color"] = colors[df.loc[i, "cluster"]]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,cluster,color
0,M9W,Etobicoke,Northwest,43.706748,-79.594054,7,black
1,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437,4,blue
2,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724,7,black
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,4,blue
4,M9N,York,Weston,43.706876,-79.518188,4,blue


Draw map

In [0]:
# Toronto Latitude and longitude coordinate
latitude = 43.651070+0.07
longitude = -79.347015

In [494]:
# create map and display it
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# loop through the 100 crimes and add each to the map
for lat, lng, label, color_ in zip(df["Latitude"], df["Longitude"], df["Postcode"], df["color"]):
    folium.CircleMarker(
        [lat, lng],
        radius=5, # define how big you want the circle markers to be
        color=color_,
        fill=True,
        popup=label,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(toronto_map)

# show map
toronto_map