## Clusters

**Import necessary libraries:**

In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import requests
from sklearn.cluster import KMeans

In [2]:
# !pip install folium

In [3]:
import folium

**Import dataframe from other notebook:**

In [4]:
%store -r geo_df

**Create a map centered around Toronto:**

In [5]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [18]:
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=12)

## All boroughs with 'Toronto' in their name:

**Create new dataframe only containing Boroughs that have 'Toronto' in their name:**

In [7]:
toronto_df = geo_df[geo_df['Borough'].str.contains('Toronto')].copy()
dict = {'Central Toronto':0,'Downtown Toronto':1,'East Toronto':2,'West Toronto':3}
toronto_df['Cluster Labels'] = toronto_df['Borough']
toronto_df['Cluster Labels'].replace(dict, inplace=True)

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,1
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,2


**Adding markers to those boroughs:**

In [8]:
import matplotlib.cm as cm
import matplotlib.colors as colors

## Cluster the boroughs by name:

**Color coding the unique clusters:**

In [9]:
kclusters = 5
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [10]:
markers_colors = []

for lat, lng, borough, neighborhood, cluster in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood'], toronto_df['Cluster Labels']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-2],
        fill=True,
        fill_color=rainbow[cluster-2],
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

In [11]:
map_toronto

Image: https://github.com/theblindhobo/Coursera_Capstone/blob/master/Screenshot%202020-10-30%20122226.png

## Cluster the boroughs randomly into 8 clusters:

In [13]:
df_cluster = toronto_df.drop(['PostalCode','Borough','Neighborhood','Cluster Labels'],1)
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=8, random_state=0).fit(df_cluster)
toronto_df['Labels'] = kmeans.labels_

In [16]:
map_cluster = folium.Map(location=[latitude,longitude], zoom_start=12)
colors = ['black','red','green','blue','orange','purple','yellow','darkred']

for lat,lng,borough,neighborhood,cluster in zip(toronto_df['Latitude'],toronto_df['Longitude'],toronto_df['Borough'],toronto_df['Neighborhood'],toronto_df['Labels']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color=colors[cluster],
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(map_cluster)

In [17]:
map_cluster

Image: https://github.com/theblindhobo/Coursera_Capstone/blob/master/Screenshot%202020-10-30%20122241.png