# Exploring Neighborhoods in the City of Toronto Ssing Segmenting and Clustering

---
### To help you go through the notebook, please find those cells with <font color='red'>RED</font> markdown. They are where I show my answers to the assignments
---

## First, let me load in several modulus

In [196]:
import pandas as pd
import numpy as np
import requests
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import pgeocode # import geocoder
import geopy
import io
from sklearn.cluster import KMeans

## Loading in the table from the Wikipedia

In [99]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = requests.get(url).content
df_list = pd.read_html(html)
df_tn_post=df_list[0]
df_tn_post.columns=['Postal code','Borough','Neighborhood']

ind = df_tn_post[df_tn_post['Borough'] == 'Not assigned'].index
df_tn_post.drop(ind, inplace=True)
df_tn_post.drop(df_tn_post.index[0],inplace = True)
df_tn_post.reset_index(inplace = True)
df_tn_post.drop(columns=['index'],inplace = True)

### For multiple neighborhoods existing in one postal code area, I combine them into one row with the neighborhoods separated with a comma 

In [100]:
for i,neigh in enumerate(df_tn_post['Neighborhood']):
    neigh_split=neigh.split('/')
    if(len(neigh_split)) > 1:
        str_t=''
        for j in neigh_split:
            str_t=str_t+j.rstrip().lstrip()+', '
        str_t=str_t[:-2]
    else:
        str_t=neigh_split[0]
    df_tn_post['Neighborhood'][i]=str_t        
        

## <font color='red'> Here is the table for question 1-4</font>

In [101]:
df_tn_post.head(12)

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [102]:
df_tn_post.shape

(103, 3)

## Generating latitude and longitude coordinates for a given postal code

### Here I use a python package called "pgeocode"

In [103]:
nomi = pgeocode.Nominatim('ca')
lat=[]
lon=[]
for ps in df_tn_post['Postal code']:
    tmp=nomi.query_postal_code(ps)
    lat.append(tmp['latitude'])
    lon.append(tmp['longitude'])
    if(tmp['latitude'] == None):
        print(ps)

df_tn_post['Latitude']=lat
df_tn_post['Longitude']=lon
df_tn_post.head(12)

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
5,M9A,Etobicoke,Islington Avenue,43.6662,-79.5282
6,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193
7,M3B,North York,Don Mills,43.745,-79.359
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7063,-79.3094
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783


### "Pgeocode" works pretty well except one postal code. So I will read in the provided csv file to aquire the coordiante for this problomatic one. 

In [104]:
df_err=df_tn_post.iloc[np.where(np.isnan(lat) == True)[0]]
df_err

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
76,M7R,Mississauga,Canada Post Gateway Processing Centre,,


In [105]:
for i in df_err.index:
    print(i)

76


In [74]:
url="https://cocl.us/Geospatial_data"
s=requests.get(url).content
df_ps=pd.read_csv(io.StringIO(s.decode('utf-8')))
df_ps.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [356]:
ind=76
df_tn_post.at[ind,'Latitude']=1.5
#df_tn_post

In [115]:
for ps_err,ind in zip(df_err['Postal code'],df_err.index):
    lat_err=df_ps[df_ps["Postal Code"] == ps_err]['Latitude'].values[0]
    lon_err=df_ps[df_ps["Postal Code"] == ps_err]['Longitude'].values[0]
    df_tn_post.at[ind,'Latitude']=lat_err
    df_tn_post.at[ind,'Longitude']=lon_err
df_tn_post.isna().sum()

Postal code     0
Borough         0
Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

## <font color='red'> Here is the table with the geographical coordinates of the neighborhoods in the Toronto</font>

In [116]:
df_tn_post

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.754500,-79.330000
1,M4A,North York,Victoria Village,43.727600,-79.314800
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.655500,-79.362600
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.722300,-79.450400
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.664100,-79.388900
5,M9A,Etobicoke,Islington Avenue,43.666200,-79.528200
6,M1B,Scarborough,"Malvern, Rouge",43.811300,-79.193000
7,M3B,North York,Don Mills,43.745000,-79.359000
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706300,-79.309400
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657200,-79.378300


## Here is a map of all the postal code area in Toronto

In [357]:
# create map
toronto_map = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add markers to the map
label=1
markers_colors = []
for lat, lon, poi,ps in zip(df_tn_post['Latitude'], df_tn_post['Longitude'],df_tn_post['Neighborhood'],df_tn_post['Postal code']):
    folium.CircleMarker(
        [lat, lon],
        radius=2,
        popup=ps+' : '+poi,
        color='darkorange',#rainbow[cluster-1],
        fill=True,
        fill_color='red',#rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_map)
       
toronto_map

## Explore and cluster the neighborhoods in Toronto

### Now, let's get the information of venues here by setting up the Foursquare credentials and version first .

In [129]:
CLIENT_ID = '54UWOCRUJBFHUQVNXEVFU5MFMIH1X42HLG3D0TU15AD0TAKW' # your Foursquare ID
CLIENT_SECRET = 'NOW254H0MNYNNHVQIGMMHY45SEKICH4LXYEO5BNXCJJRZWXY' # your Foursquare Secret
VERSION = '20200427' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 54UWOCRUJBFHUQVNXEVFU5MFMIH1X42HLG3D0TU15AD0TAKW
CLIENT_SECRET:NOW254H0MNYNNHVQIGMMHY45SEKICH4LXYEO5BNXCJJRZWXY


In [259]:
def getNearbyVenues(names, latitudes, longitudes, radius=600,LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Select only those boroughs that contain the word "Toronto"

In [360]:
toronto_borough=[]
for t in df_tn_post['Borough'].unique():
    if 'Toronto' in t:
        toronto_borough.append(t)
print(toronto_borough)
df_tn_bor=df_tn_post.loc[df_tn_post['Borough'].isin(toronto_borough)]
df_tn_bor.head()
df_tn_bor.shape

['Downtown Toronto', 'East Toronto', 'West Toronto', 'Central Toronto']


(39, 5)

### Getting the information of venues

In [295]:
toronto_venues = getNearbyVenues(names=df_tn_bor['Postal code'],
                                   latitudes=df_tn_bor['Latitude'],
                                   longitudes=df_tn_bor['Longitude'],LIMIT=100)



M5A
M7A
M5B
M5C
M4E
M5E
M5G
M6G
M5H
M6H
M5J
M6J
M4K
M5K
M6K
M4L
M5L
M4M
M4N
M5N
M4P
M5P
M6P
M4R
M5R
M6R
M4S
M5S
M6S
M4T
M5T
M4V
M5V
M4W
M5W
M4X
M5X
M4Y
M7Y


In [296]:
print(toronto_venues.shape)
toronto_venues.head()

(1996, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,43.6555,-79.3626,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,43.6555,-79.3626,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,43.6555,-79.3626,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,M5A,43.6555,-79.3626,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
4,M5A,43.6555,-79.3626,The Yoga Lounge,43.655515,-79.364955,Yoga Studio


### Check the number of different venues in this data

In [297]:
toronto_venues.groupby('Venue Category').count() 

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accessories Store,1,1,1,1,1,1
American Restaurant,30,30,30,30,30,30
Animal Shelter,1,1,1,1,1,1
Antique Shop,2,2,2,2,2,2
Aquarium,1,1,1,1,1,1
Art Gallery,15,15,15,15,15,15
Art Museum,3,3,3,3,3,3
Arts & Crafts Store,8,8,8,8,8,8
Asian Restaurant,16,16,16,16,16,16
Athletics & Sports,1,1,1,1,1,1


### Check the number of venues in different postal "neiborhood"

In [298]:
toronto_venues.groupby('Neighborhood').count() 

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M4E,11,11,11,11,11,11
M4K,63,63,63,63,63,63
M4L,30,30,30,30,30,30
M4M,32,32,32,32,32,32
M4N,4,4,4,4,4,4
M4P,10,10,10,10,10,10
M4R,24,24,24,24,24,24
M4S,31,31,31,31,31,31
M4T,10,10,10,10,10,10
M4V,8,8,8,8,8,8


In [299]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 235 uniques categories.


## Nest, we generate clusters using venue types in each Toronto postal code area

### Using one hot encoding to generate different features for different venue categories

In [300]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# move the column to head of list using index, pop and insert
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
cols = list(toronto_onehot)
cols.insert(0, cols.pop(cols.index('Neighborhood')))
toronto_onehot = toronto_onehot.ix[:, cols]

toronto_onehot.head()

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [301]:
toronto_onehot.shape
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M4E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746
2,M4L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M4N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
7,M4S,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [302]:
toronto_grouped.shape

(39, 235)

### Printing each area along with the top 5 most common venues

In [303]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M4E----
         venue  freq
0          Pub  0.27
1  Flower Shop  0.09
2  Coffee Shop  0.09
3       Bakery  0.09
4        Trail  0.09


----M4K----
                venue  freq
0    Greek Restaurant  0.13
1         Coffee Shop  0.06
2                 Pub  0.06
3  Italian Restaurant  0.05
4       Grocery Store  0.03


----M4L----
                  venue  freq
0        Sandwich Place  0.10
1  Fast Food Restaurant  0.07
2                  Café  0.07
3         Grocery Store  0.07
4                   Gym  0.07


----M4M----
             venue  freq
0      Coffee Shop  0.09
1             Café  0.06
2  Thai Restaurant  0.03
3          Brewery  0.03
4          Gay Bar  0.03


----M4N----
                  venue  freq
0    Photography Studio  0.25
1  Gym / Fitness Center  0.25
2                  Pool  0.25
3                  Park  0.25
4     Accessories Store  0.00


----M4P----
               venue  freq
0            Dog Run   0.2
1               Park   0.1
2   Department Store   0.1
3  Foo

In [304]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Showin the top 5 most common venues in each postal code area

In [342]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.rename(columns={'Neighborhood':'Postal code'}, inplace=True)    
neighborhoods_venues_sorted.head()

Unnamed: 0,Postal code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,Pub,Furniture / Home Store,Bakery,Gastropub,Trail
1,M4K,Greek Restaurant,Coffee Shop,Pub,Italian Restaurant,Yoga Studio
2,M4L,Sandwich Place,Café,Grocery Store,Gym,Fast Food Restaurant
3,M4M,Coffee Shop,Café,Gym,Bar,Sandwich Place
4,M4N,Gym / Fitness Center,Pool,Park,Photography Studio,Creperie


# Clustering Neighborhoods using K-means algorithm

In [343]:
# set number of clusters
kclusters = 8

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 5, 5, 5, 4, 1, 1, 5, 6, 1], dtype=int32)

## Inserting the cluster label into the table

In [344]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_tn_bor

neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Postal code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,0,M4E,Pub,Furniture / Home Store,Bakery,Gastropub,Trail
1,5,M4K,Greek Restaurant,Coffee Shop,Pub,Italian Restaurant,Yoga Studio
2,5,M4L,Sandwich Place,Café,Grocery Store,Gym,Fast Food Restaurant
3,5,M4M,Coffee Shop,Café,Gym,Bar,Sandwich Place
4,4,M4N,Gym / Fitness Center,Pool,Park,Photography Studio,Creperie


In [345]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Postal code'), on='Postal code')
toronto_merged['Cluster Labels']=toronto_merged['Cluster Labels'].astype(int)
toronto_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626,1,Coffee Shop,Park,Theater,Restaurant,Breakfast Spot
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889,1,Coffee Shop,Sushi Restaurant,Yoga Studio,Gym,Café
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783,1,Coffee Shop,Clothing Store,Hotel,Japanese Restaurant,Italian Restaurant
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756,1,Coffee Shop,Café,Seafood Restaurant,American Restaurant,Gastropub
19,M4E,East Toronto,The Beaches,43.6784,-79.2941,0,Pub,Furniture / Home Store,Bakery,Gastropub,Trail


### Number of postal code areas in a given cluster

In [346]:
toronto_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,Postal code,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1,1,1,1,1,1,1,1,1,1
1,16,16,16,16,16,16,16,16,16,16
2,1,1,1,1,1,1,1,1,1,1
3,1,1,1,1,1,1,1,1,1,1
4,1,1,1,1,1,1,1,1,1,1
5,16,16,16,16,16,16,16,16,16,16
6,2,2,2,2,2,2,2,2,2,2
7,1,1,1,1,1,1,1,1,1,1


## <font color='red'> Here ia a map of different clustered postal code neighborhoods </font>

In [354]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2.0 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, ps, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Postal code'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(ps+' : '+poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.9).add_to(map_clusters)
       
map_clusters