### Coursera Capstone project

Using data from FourSquare to analyze and predict areas with similarities viz., restaurants, schools, coffee shops, supermarket, places of interest.

In [116]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [117]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


#### Loading the table from the Wikipedia page into a dataframe and displaying it.

In [118]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = pd.DataFrame(tables[0],columns=["Postal Code","Borough","Neighborhood"])

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Remove all the rows where the value of Borough is 'Not Assigned'

In [119]:
df = df[df["Borough"] != "Not assigned"]

In [120]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### Steps to replace the null values in the Neighborhood column with the Borough names.

In [121]:
for index, row in df.head().iterrows():
    df.iloc[index:2] = row['Neighborhood'] if  (row['Neighborhood'] is None) else row['Borough']
    

#### As some of the postal codes have more than one Neighborhood, we will convert the neighborhood to an array.

In [122]:
df["Neighborhood"] = df["Neighborhood"].str.split(",")

There are no rows with Neighborhood as **NaN**

In [123]:
df.shape

(103, 3)

In [124]:
geo_codes_df = pd.read_csv("Geospatial_Coordinates.csv")

geo_codes_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [125]:
combi_df = pd.merge(df, geo_codes_df, on='Postal Code')

In [126]:
combi_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,[Parkwoods],43.753259,-79.329656
1,M4A,North York,[Victoria Village],43.725882,-79.315572
2,M5A,Downtown Toronto,"[Regent Park, Harbourfront]",43.65426,-79.360636
3,M6A,North York,"[Lawrence Manor, Lawrence Heights]",43.718518,-79.464763
4,M7A,Downtown Toronto,"[Queen's Park, Ontario Provincial Government]",43.662301,-79.389494


#### Let's include only the rows where the Borough is Toronto

In [127]:
df_Toronto = combi_df[combi_df['Borough'].str.contains("Toronto")].reset_index(drop=True)

print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_Toronto['Borough'].unique()),
        df_Toronto.shape[0]
    )
)

df_Toronto.head()

The dataframe has 4 boroughs and 39 neighborhoods.


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"[Regent Park, Harbourfront]",43.65426,-79.360636
1,M7A,Downtown Toronto,"[Queen's Park, Ontario Provincial Government]",43.662301,-79.389494
2,M5B,Downtown Toronto,"[Garden District, Ryerson]",43.657162,-79.378937
3,M5C,Downtown Toronto,[St. James Town],43.651494,-79.375418
4,M4E,East Toronto,[The Beaches],43.676357,-79.293031


In [128]:
from geopy.geocoders import Nominatim

address = 'Toronto'

geolocator = Nominatim(user_agent="Exploring_Canada")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [129]:
import folium
import numpy.random as random
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_Toronto['Latitude'], df_Toronto['Longitude'], df_Toronto['Borough'], df_Toronto['Neighborhood']):
    for every_neighborhood in neighborhood:
        label = '{}, {}'.format(neighborhood, borough)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  
    
map_toronto


#### Moving forward using the FourSquare API to explore the areas

In [130]:
CLIENT_ID = 'SECAGOPU1RPKJSGUPZL4FAS0GTTGZ5AW2KCVR2LFZ4EQP04H' # your Foursquare ID
CLIENT_SECRET = 'U4DTPVJFLMOE1TO32QFQ2IS10UNLBZDCBSEYQIPKL2XNRYK2' # your Foursquare Secret
VERSION = '20200519' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SECAGOPU1RPKJSGUPZL4FAS0GTTGZ5AW2KCVR2LFZ4EQP04H
CLIENT_SECRET:U4DTPVJFLMOE1TO32QFQ2IS10UNLBZDCBSEYQIPKL2XNRYK2


In [131]:
df_Toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"[Regent Park, Harbourfront]",43.65426,-79.360636
1,M7A,Downtown Toronto,"[Queen's Park, Ontario Provincial Government]",43.662301,-79.389494
2,M5B,Downtown Toronto,"[Garden District, Ryerson]",43.657162,-79.378937
3,M5C,Downtown Toronto,[St. James Town],43.651494,-79.375418
4,M4E,East Toronto,[The Beaches],43.676357,-79.293031
5,M5E,Downtown Toronto,[Berczy Park],43.644771,-79.373306
6,M5G,Downtown Toronto,[Central Bay Street],43.657952,-79.387383
7,M6G,Downtown Toronto,[Christie],43.669542,-79.422564
8,M5H,Downtown Toronto,"[Richmond, Adelaide, King]",43.650571,-79.384568
9,M6H,West Toronto,"[Dufferin, Dovercourt Village]",43.669005,-79.442259


In [132]:
df_Toronto.loc[0, 'Neighborhood']

['Regent Park', ' Harbourfront']

In [144]:
neighborhood_latitude = df_Toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_Toronto.loc[0, 'Longitude'] # neighborhood longitude value
borough = df_Toronto.loc[0, 'Borough'] # neighborhood name
post_code = df_Toronto.loc[0, 'Postal Code'] # neighborhood name

neighborhood_names = df_Toronto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(borough, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

print('Postal code of {} is {} and neighborhood has {} and {}'.format(borough, 
                                                               post_code, 
                                                               neighborhood_names[0],
                                                               neighborhood_names[1]))

Latitude and longitude values of Downtown Toronto are 43.6542599, -79.3606359.
Postal code of Downtown Toronto is M5A and neighborhood has Regent Park and  Harbourfront


In [134]:
LIMIT = 50
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=SECAGOPU1RPKJSGUPZL4FAS0GTTGZ5AW2KCVR2LFZ4EQP04H&client_secret=U4DTPVJFLMOE1TO32QFQ2IS10UNLBZDCBSEYQIPKL2XNRYK2&v=20200519&ll=43.6542599,-79.3606359&radius=500&limit=50'

In [135]:
import requests
from pandas.io.json import json_normalize

results = requests.get(url).json()


#### Borrowing the get_categories from the FourSquare lab

In [136]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [137]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
3,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [138]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

47 venues were returned by Foursquare.


In [162]:
def getNearbyVenues(postcodes, latitudes, longitudes, radius=500):
    
    venues_list=[]
    o =3
    for postcode, lat, lng in zip(postcodes, latitudes, longitudes):
        if o>3:
            return []
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)

        # make the GET request
        json_resp = requests.get(url).json()
#         print(names)
        results = json_resp["response"]['groups'][0]['items']
        
        

            # return only relevant information for each nearby venue
        venues_list.append([(
            postcode, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [164]:
toronto_venues = getNearbyVenues(postcodes=df_Toronto['Postal Code'],
                                   latitudes=df_Toronto['Latitude'],
                                   longitudes=df_Toronto['Longitude']
                                  )


[[('M5A', 43.6542599, -79.3606359, 'Roselle Desserts', 43.653446723052674, -79.3620167174383, 'Bakery'), ('M5A', 43.6542599, -79.3606359, 'Tandem Coffee', 43.65355870959944, -79.36180945913513, 'Coffee Shop'), ('M5A', 43.6542599, -79.3606359, 'Morning Glory Cafe', 43.653946942635294, -79.36114884214422, 'Breakfast Spot'), ('M5A', 43.6542599, -79.3606359, 'Cooper Koo Family YMCA', 43.65324910177244, -79.35800826343677, 'Distribution Center'), ('M5A', 43.6542599, -79.3606359, 'Body Blitz Spa East', 43.65473505045365, -79.35987433132891, 'Spa'), ('M5A', 43.6542599, -79.3606359, 'Impact Kitchen', 43.65636850543279, -79.35697968750694, 'Restaurant'), ('M5A', 43.6542599, -79.3606359, 'Corktown Common', 43.655617799749734, -79.3562113397429, 'Park'), ('M5A', 43.6542599, -79.3606359, 'The Extension Room', 43.65331304337331, -79.35972538072777, 'Gym / Fitness Center'), ('M5A', 43.6542599, -79.3606359, 'The Distillery Historic District', 43.65024435658077, -79.35932278633118, 'Historic Site'), (

In [168]:
toronto_venues.columns = ['Postal Code', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Venue,Venue Category,Venue Latitude,Venue Longitude
Postal Code,Latitude,Longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M4E,43.676357,-79.293031,"[Glen Manor Ravine, The Big Carrot Natural Foo...","[Trail, Health Food Store, Pub, Neighborhood]","[43.67682094413784, 43.678879, 43.679181434941...","[-79.29394208780985, -79.297734, -79.297215358..."
M4K,43.679557,-79.352188,"[MenEssentials, Pantheon, Cafe Fiorentina, Dol...","[Cosmetics Shop, Greek Restaurant, Italian Res...","[43.677820068604575, 43.67762124481265, 43.677...","[-79.35126543045044, -79.35143390043564, -79.3..."
M4L,43.668999,-79.315572,"[System Fitness, British Style Fish & Chips, B...","[Gym, Fish & Chips Shop, Ice Cream Shop, Fast ...","[43.667171452103204, 43.66872336908457, 43.667...","[-79.31273345707406, -79.31713935234417, -79.3..."
M4M,43.659526,-79.340923,"[Ed's Real Scoop, The Bone House, Queen Books,...","[Ice Cream Shop, Pet Store, Bookstore, Fish Ma...","[43.660655832455014, 43.66089376125636, 43.660...","[-79.3420187548006, -79.3410974737743, -79.342..."
M4N,43.72802,-79.38879,"[Lawrence Park Ravine, Zodiac Swim School, TTC...","[Park, Swim School, Bus Line]","[43.72696303913755, 43.72853205765438, 43.7280...","[-79.39438246708775, -79.3828602612317, -79.38..."
M4P,43.712751,-79.390197,"[Homeway Restaurant & Brunch, Sherwood Park, S...","[Breakfast Spot, Park, Food & Drink Shop, Depa...","[43.71264120397444, 43.71655100307589, 43.7154...","[-79.39155655199944, -79.38777567141624, -79.3..."
M4R,43.715383,-79.405678,"[Barreworks, Uncle Betty's Diner, Civello Salo...","[Yoga Studio, Diner, Salon / Barbershop, Resta...","[43.71407030751952, 43.7144524368588, 43.71511...","[-79.40010911522093, -79.40009057656115, -79.4..."
M4S,43.704324,-79.38879,"[Jules Cafe Patisserie, Thobors Boulangerie Pa...","[Dessert Shop, Café, Indian Restaurant, Desser...","[43.70413799694304, 43.704513877453266, 43.702...","[-79.38841260442167, -79.38861602551758, -79.3..."
M4T,43.689574,-79.38316,[Loring-Wyle Parkette],[Park],[43.690270427217385],[-79.3834375880377]
M4V,43.686412,-79.400049,"[The Market By Longo’s, LCBO, Daeco Sushi, Mar...","[Supermarket, Liquor Store, Sushi Restaurant, ...","[43.686711, 43.686990631074885, 43.68783769992...","[-79.399536, -79.39923810519545, -79.395652492..."


In [169]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot.insert(0, "Postal Code", toronto_venues['Postal Code'], True) 
# move neighborhood column to the first column

toronto_onehot

Unnamed: 0,Postal Code,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [200]:
toronto_grouped = toronto_onehot.groupby('Postal Code').sum().reset_index()
toronto_grouped

Unnamed: 0,Postal Code,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M4E,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,M4K,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,1
2,M4L,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4M,0,0,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,1,0,1
4,M4N,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,M4P,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,M4R,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
7,M4S,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
8,M4T,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,M4V,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0


In [201]:
num_top_venues = 5

for hood in toronto_grouped['Postal Code']:
    area = df_Toronto[df_Toronto['Postal Code'] == hood]["Borough"]
    print("----"+area.iloc[0]+"----")
    
    neigh_names = df_Toronto[df_Toronto['Postal Code'] == hood]["Neighborhood"]

    neigh_names = neigh_names.iloc[0]
    num_neighs = len(neigh_names)
    neigh_list = ""
    if num_neighs == 1:
        neigh_list = neigh_names[0]
    else:
        for i in range(0,num_neighs):
            neigh_list = neigh_list + neigh_names[i]
            if(i<num_neighs-1):
                neigh_list = neigh_list + ", "
    
    
    print("Neighborhood - "+neigh_list)
    temp = toronto_grouped[toronto_grouped['Postal Code'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----East Toronto----
Neighborhood - The Beaches
               venue  freq
0  Health Food Store   1.0
1              Trail   1.0
2                Pub   1.0
3       Neighborhood   1.0
4            Airport   0.0


----East Toronto----
Neighborhood - The Danforth West,  Riverdale
                    venue  freq
0        Greek Restaurant   9.0
1      Italian Restaurant   3.0
2             Coffee Shop   3.0
3          Ice Cream Shop   2.0
4  Furniture / Home Store   2.0


----East Toronto----
Neighborhood - India Bazaar,  The Beaches West
                  venue  freq
0  Fast Food Restaurant   2.0
1                  Park   2.0
2        Sandwich Place   2.0
3                   Gym   1.0
4          Intersection   1.0


----East Toronto----
Neighborhood - Studio District
                 venue  freq
0                 Café   4.0
1          Coffee Shop   3.0
2               Bakery   2.0
3              Brewery   2.0
4  American Restaurant   2.0


----Central Toronto----
Neighborhood - Lawrence Pa

                venue  freq
0                Café   3.0
1         Coffee Shop   3.0
2  Italian Restaurant   2.0
3    Sushi Restaurant   2.0
4                 Pub   2.0


----Downtown Toronto----
Neighborhood - Queen's Park,  Ontario Provincial Government
              venue  freq
0       Coffee Shop   8.0
1  Sushi Restaurant   2.0
2       Yoga Studio   1.0
3              Café   1.0
4               Bar   1.0


----East Toronto----
Neighborhood - Business reply mail Processing Centre
                venue  freq
0  Light Rail Station   2.0
1         Yoga Studio   1.0
2       Garden Center   1.0
3          Skate Park   1.0
4                Park   1.0




In [202]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [203]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M4E,Trail,Health Food Store,Neighborhood,Pub,Yoga Studio
1,M4K,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Restaurant
2,M4L,Sandwich Place,Park,Fast Food Restaurant,Sushi Restaurant,Brewery
3,M4M,Café,Coffee Shop,Gastropub,Bakery,Brewery
4,M4N,Park,Swim School,Bus Line,Event Space,Electronics Store


In [205]:
from sklearn.cluster import KMeans

toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)

kmeans = KMeans(n_clusters=5, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 4, 0, 3, 0, 0, 0, 3, 0, 0], dtype=int32)

In [206]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_Toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Postal Code'), on='Postal Code')

toronto_merged.head() # check the last columns!



Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M5A,Downtown Toronto,"[Regent Park, Harbourfront]",43.65426,-79.360636,2,Coffee Shop,Park,Pub,Bakery,Theater
1,M7A,Downtown Toronto,"[Queen's Park, Ontario Provincial Government]",43.662301,-79.389494,2,Coffee Shop,Sushi Restaurant,Yoga Studio,Creperie,Smoothie Shop
2,M5B,Downtown Toronto,"[Garden District, Ryerson]",43.657162,-79.378937,3,Café,Middle Eastern Restaurant,Restaurant,Coffee Shop,Bookstore
3,M5C,Downtown Toronto,[St. James Town],43.651494,-79.375418,1,Café,Coffee Shop,Gastropub,American Restaurant,Cosmetics Shop
4,M4E,East Toronto,[The Beaches],43.676357,-79.293031,0,Trail,Health Food Store,Neighborhood,Pub,Yoga Studio


In [209]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighborhood, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(neighborhood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

### Cluster 1

In [218]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list([1,6,7,8,9,10])]]


Unnamed: 0,Postal Code,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,M4E,East Toronto,Trail,Health Food Store,Neighborhood,Pub,Yoga Studio
7,M6G,Downtown Toronto,Grocery Store,Café,Park,Diner,Baby Store
9,M6H,West Toronto,Pharmacy,Bakery,Bank,Pool,Brazilian Restaurant
10,M5J,Downtown Toronto,Coffee Shop,Aquarium,Hotel,Brewery,Café
11,M6J,West Toronto,Bar,Restaurant,Asian Restaurant,Men's Store,Vegetarian / Vegan Restaurant
14,M6K,West Toronto,Café,Coffee Shop,Breakfast Spot,Pet Store,Bar
15,M4L,East Toronto,Sandwich Place,Park,Fast Food Restaurant,Sushi Restaurant,Brewery
18,M4N,Central Toronto,Park,Swim School,Bus Line,Event Space,Electronics Store
19,M5N,Central Toronto,Garden,Yoga Studio,Deli / Bodega,Ethiopian Restaurant,Electronics Store
20,M4P,Central Toronto,Sandwich Place,Breakfast Spot,Dance Studio,Hotel,Food & Drink Shop


### Cluster 2

In [219]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0] + list([1,6,7,8,9,10])]]


Unnamed: 0,Postal Code,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,M5C,Downtown Toronto,Café,Coffee Shop,Gastropub,American Restaurant,Cosmetics Shop
5,M5E,Downtown Toronto,Coffee Shop,Cocktail Bar,Seafood Restaurant,Restaurant,Bakery
8,M5H,Downtown Toronto,Coffee Shop,Café,Seafood Restaurant,Concert Hall,Restaurant
13,M5K,Downtown Toronto,Café,Coffee Shop,Hotel,Seafood Restaurant,Japanese Restaurant
16,M5L,Downtown Toronto,Café,Coffee Shop,Hotel,Gym,American Restaurant
34,M5W,Downtown Toronto,Café,Coffee Shop,Bakery,Beer Bar,Seafood Restaurant
36,M5X,Downtown Toronto,Café,Coffee Shop,Restaurant,Concert Hall,Seafood Restaurant


### Cluster 3

In [220]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0] + list([1,6,7,8,9,10])]]


Unnamed: 0,Postal Code,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M5A,Downtown Toronto,Coffee Shop,Park,Pub,Bakery,Theater
1,M7A,Downtown Toronto,Coffee Shop,Sushi Restaurant,Yoga Studio,Creperie,Smoothie Shop
6,M5G,Downtown Toronto,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Burger Joint


### Cluster 4

In [221]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0] + list([1,6,7,8,9,10])]]


Unnamed: 0,Postal Code,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,M5B,Downtown Toronto,Café,Middle Eastern Restaurant,Restaurant,Coffee Shop,Bookstore
17,M4M,East Toronto,Café,Coffee Shop,Gastropub,Bakery,Brewery
26,M4S,Central Toronto,Dessert Shop,Sandwich Place,Sushi Restaurant,Café,Gym
27,M5S,Downtown Toronto,Café,Japanese Restaurant,Bar,Italian Restaurant,Bookstore
28,M6S,West Toronto,Café,Coffee Shop,Pub,Pizza Place,Italian Restaurant
30,M5T,Downtown Toronto,Café,Bakery,Coffee Shop,Mexican Restaurant,Vegetarian / Vegan Restaurant
35,M4X,Downtown Toronto,Coffee Shop,Italian Restaurant,Chinese Restaurant,Pizza Place,Café
37,M4Y,Downtown Toronto,Restaurant,Coffee Shop,Sushi Restaurant,Yoga Studio,Men's Store


### Cluster 5

In [222]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0] + list([1,6,7,8,9,10])]]


Unnamed: 0,Postal Code,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
12,M4K,East Toronto,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Restaurant
