# Segmenting and Clustering Neighborhoods in Toronto - part 3
#### Explore and cluster the neighborhoods in Toronto
# 
### 1. Import libraries and data

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

# Class object has been created to store the credentials for safekeeping
import credentials_oa

print('Libraries imported.')

Libraries imported.


In [47]:
# Import DataFrame

toronto_data = pd.read_csv("/home/oscar/PythonProjects/Coursera/ibm-datascience/course9_Capstone/Coursera_Capstone/toronto_coords.csv")

toronto_data = toronto_data.reset_index(drop = True)

toronto_data.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,0,M3A,North York,Parkwoods,43.753259,-79.329656
1,1,M4A,North York,Victoria Village,43.725882,-79.315572
2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [3]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_data['Borough'].unique()),
        toronto_data.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


In [48]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [49]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 
### 2. Using Foursquare to retrieve venues information
#### A file with the credentials has been created to import/protect the data

In [50]:
# Access Class file with Credentials
cred = credentials_oa.Credentials
print('Foursquare credentials imported.')

Foursquare credentials imported.


In [51]:
# Define Radius and Limit to get

LIMIT = 100
RADIUS = 500

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&oauth_token={}&radius={}&limit={}'.format(
        cred.CLIENT_ID, 
        cred.CLIENT_SECRET, 
        latitude, 
        longitude, 
        cred.VERSION,
        cred.ACCESS_TOKEN,
        RADIUS, 
        LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=OVF5BXCZDMLFWBRVWGI2TEGEODAO53T2SBHISUFUEKOUZFXD&client_secret=ZLDEDVPAK5OJZK1NYCBT2QKLBV5UL0EEAOBN4UUNHD5H4MYS&ll=43.6534817,-79.3839347&v=20180605&oauth_token=HYJIQFEU4WZCU2NWXBL0TFRIX2LWXIJRNVZ1BXKWHPOHD1V3&radius=500&limit=100'

In [52]:
results = requests.get(url).json()
#results

In [53]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [54]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Chatime 日出茶太,Bubble Tea Shop,43.655542,-79.384684
3,Textile Museum of Canada,Art Museum,43.654396,-79.3865
4,Indigo,Bookstore,43.653515,-79.380696


In [55]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [56]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&oauth_token={}&radius={}&limit={}'.format(
            cred.CLIENT_ID, 
            cred.CLIENT_SECRET, 
            latitude, 
            longitude, 
            cred.VERSION,
            cred.ACCESS_TOKEN,
            RADIUS, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [57]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village


KeyError: 'groups'

In [None]:
print(toronto_venues.shape)
toronto_venues.head()

In [44]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,100,100,100,100,100,100
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Wilson Heights, Downsview North",100,100,100,100,100,100
Bayview Village,100,100,100,100,100,100
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100
"Birch Cliff, Cliffside West",100,100,100,100,100,100
"Brockton, Parkdale Village, Exhibition Place",100,100,100,100,100,100
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",100,100,100,100,100,100
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",100,100,100,100,100,100


In [45]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 62 uniques categories.


In [46]:
toronto_venues.shape

(10300, 7)

# 
### 3. Analyze each Neighborhood

In [13]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Women's Store,American Restaurant,Art Museum,Asian Restaurant,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Comic Shop,Concert Hall,Cosmetics Shop,Department Store,Dessert Shop,Diner,Distribution Center,Electronics Store,Fast Food Restaurant,Food Court,Furniture / Home Store,Garden,Gastropub,General Travel,Greek Restaurant,Gym,Gym / Fitness Center,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Lingerie Store,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Music Venue,Neighborhood,New American Restaurant,Office,Opera House,Pizza Place,Plaza,Poke Place,Ramen Restaurant,Restaurant,Salon / Barbershop,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Steakhouse,Sushi Restaurant,Taco Place,Tanning Salon,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [14]:
toronto_onehot.shape

(10300, 62)

In [15]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Women's Store,American Restaurant,Art Museum,Asian Restaurant,Bakery,Bank,Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Comic Shop,Concert Hall,Cosmetics Shop,Department Store,Dessert Shop,Diner,Distribution Center,Electronics Store,Fast Food Restaurant,Food Court,Furniture / Home Store,Garden,Gastropub,General Travel,Greek Restaurant,Gym,Gym / Fitness Center,Hotel,Ice Cream Shop,Italian Restaurant,Japanese Restaurant,Latin American Restaurant,Lingerie Store,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Music Venue,New American Restaurant,Office,Opera House,Pizza Place,Plaza,Poke Place,Ramen Restaurant,Restaurant,Salon / Barbershop,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Steakhouse,Sushi Restaurant,Taco Place,Tanning Salon,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant
0,Agincourt,0.01,0.03,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.03,0.01,0.01,0.03,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.02,0.03,0.03,0.01,0.03,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.03,0.01,0.01,0.02,0.02,0.01,0.01,0.04,0.01,0.02,0.01
1,"Alderwood, Long Branch",0.01,0.03,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.03,0.01,0.01,0.03,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.02,0.03,0.03,0.01,0.03,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.03,0.01,0.01,0.02,0.02,0.01,0.01,0.04,0.01,0.02,0.01
2,"Bathurst Manor, Wilson Heights, Downsview North",0.01,0.03,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.03,0.01,0.01,0.03,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.02,0.03,0.03,0.01,0.03,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.03,0.01,0.01,0.02,0.02,0.01,0.01,0.04,0.01,0.02,0.01
3,Bayview Village,0.01,0.03,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.03,0.01,0.01,0.03,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.02,0.03,0.03,0.01,0.03,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.03,0.01,0.01,0.02,0.02,0.01,0.01,0.04,0.01,0.02,0.01
4,"Bedford Park, Lawrence Manor East",0.01,0.03,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.03,0.01,0.01,0.03,0.01,0.01,0.01,0.01,0.02,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.02,0.03,0.03,0.01,0.03,0.01,0.02,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.03,0.01,0.01,0.02,0.02,0.01,0.01,0.04,0.01,0.02,0.01


In [16]:
toronto_grouped.shape

(99, 62)

In [17]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                 venue  freq
0       Clothing Store  0.07
1             Tea Room  0.04
2  American Restaurant  0.03
3                Hotel  0.03
4          Coffee Shop  0.03


----Alderwood, Long Branch----
                 venue  freq
0       Clothing Store  0.07
1             Tea Room  0.04
2  American Restaurant  0.03
3                Hotel  0.03
4          Coffee Shop  0.03


----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0       Clothing Store  0.07
1             Tea Room  0.04
2  American Restaurant  0.03
3                Hotel  0.03
4          Coffee Shop  0.03


----Bayview Village----
                 venue  freq
0       Clothing Store  0.07
1             Tea Room  0.04
2  American Restaurant  0.03
3                Hotel  0.03
4          Coffee Shop  0.03


----Bedford Park, Lawrence Manor East----
                 venue  freq
0       Clothing Store  0.07
1             Tea Room  0.04
2  American Restaurant  0.03
3        

In [18]:
# Getting the information into a Pandas DataFrame

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [19]:
# Get top 10 venues for each Neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
1,"Alderwood, Long Branch",Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
3,Bayview Village,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
4,"Bedford Park, Lawrence Manor East",Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant


# 
### 5. Cluster Neighborhoods

In [20]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

  import sys


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [21]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster_Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head(10) # check the last columns!

# remove NAN // df["A"] = df["A"].fillna(0)
#df1 = toronto_merged[toronto_merged.isna().any(axis=1)]
#df1


Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
3,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
4,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
5,5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
6,6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
7,7,M3B,North York,Don Mills,43.745906,-79.352188,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
8,8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant
9,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Tea Room,Ice Cream Shop,Cosmetics Shop,American Restaurant,Coffee Shop,Hotel,Japanese Restaurant,Seafood Restaurant,New American Restaurant


In [22]:
#find_na = toronto_merged.is_NaN.any(axis=1)
#find_na

#is_NaN = toronto_merged.isnull()
#row_has_NaN = is_NaN.any(axis=1)
#rows_with_NaN = toronto_merged[row_has_NaN]
#rows_with_NaN = rows_with_NaN.reset_index(drop=True)
#rows_with_NaN

# Some Neighborhoods shoe 'NA' - Need to remove those Neighborhoods

toronto_merged = toronto_merged.dropna()

toronto_merged['Cluster_Labels'] = toronto_merged.Cluster_Labels.astype(int)


In [23]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 
### 6. Examine Clusters

#### Cluster 1

In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

KeyError: 'Cluster Labels'

#### 
#### Cluster 2

In [None]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

#### 
#### Cluster 3

In [None]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

#### 
#### Cluster 4

In [None]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

#### 
#### Cluster 5

In [None]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]