In [1]:
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### <center> -------------- Part 1 -------------- </center> <center> Read postal codes data from https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M and transform the data into a pandas dataframe </center>

__Step 1: Dataframe consists of PostalCode, Borough and Neighbourhood columns__

In [2]:
# Step 1: imported data from the url: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
# Obtained the data of postal codes and transformed it into pandas dataframe, where the dataframe consist of three columns: PostalCode, Borough, and Neighborhood

d= pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
len(d)
df=d[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


__Step 2: Ignoring cells with a borough that is Not assigned.__

In [3]:
# Step 2: filtering Borough column having values Not assigned.
df=df[df['Borough']!='Not assigned']
df.head()
df.index=range(len(df))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


__Step 3: Replacing a cell which has a borough but a Not assigned neighborhood, then the neighborhood is assigned the same value as the borough.__

In [4]:
# Step 3: If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough. 
df['Neighbourhood'].replace("Not assigned", df["Borough"],inplace=True)

df.head(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


__Step 4: Combining more than one neighborhood in one postal code area.__

In [5]:
# Step 4: More than one neighborhood can exist in one postal code area. 

df_postal = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_postal.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_postal.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


__Step 5: Printing number of rows in the dataframe__

In [6]:
#print the number of rows of your dataframe.
df_postal.shape

(103, 3)

### <center> -------------- Part 2 -------------- </center>  <center>Creating a new dataframe which shows geographical coordinates of each postal code.</center>

__Step 1: Reading Geospatial_Coordinates.csv file into dataframes__

In [7]:
# Step 1: reading Geospatial_Coordinates.csv file 

df_lat= pd.read_csv(r'C:/Users/Preeti/Desktop/IBM_course/Course 8-Applied Data Science Capstone/Geospatial_Coordinates.csv')
df_lat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
# Step 2: renaming the 'Postal code' column in postal dataframe, inorder to merge two dataframes 

df_postal.rename(columns={'Postcode':'Postal Code'}, inplace=True)

__Step 2: Merging two dataframes i.e. Postal code and geospatial coordinates__

In [9]:
# Step 3: merging two dataframes i.e. postal code and geospatial coordinates

df_merge_location= pd.merge(df_postal,df_lat,on='Postal Code')
df_merge_location.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### <center> -------------- Part 3 -------------- </center> <center> Exploring and clustering neighborhoods of Toronto. </center>

__Step 1: Creating a new dataframe consisting of boroughs that contains the word Toronto__

In [10]:
# Step 1: Creating new df_toronto dataframe from merged dataframe i.e. postal code and geospatial coordinates. 

df1=df_merge_location[df_merge_location['Borough'].str.contains("Toronto")]
df1.index=range(len(df1))
df1.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


__Step 2: Transforming dataframe__  

In [11]:
# Step 2: Splitting combined Neighbourhoods into different rows belonging to same postal code. 
# For example: Postal code M4K neighbourhoods are The Danforth West and Riverdale

new_df = pd.DataFrame(df1.Neighbourhood.str.split(',').tolist(), index=df1['Postal Code']).stack()
new_df = new_df.reset_index([0, 'Postal Code'])
new_df.columns = ['Postal Code', 'Neighbourhood']
df_toronto= pd.merge(new_df,df1,on='Postal Code')
df_toronto.head()

Unnamed: 0,Postal Code,Neighbourhood_x,Borough,Neighbourhood_y,Latitude,Longitude
0,M4E,The Beaches,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,The Danforth West,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4K,Riverdale,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
3,M4L,The Beaches West,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
4,M4L,India Bazaar,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572


In [12]:
# deleting Neighbourhood_y column created after splitting Neighbourhoods
del df_toronto['Neighbourhood_y']

In [13]:
# Renaming column Neighbourhood_x to Neighbourhood

#df_new_merge = df_new_merge[['Postal Code','Borough','Neighbourhood_x','Latitude','Longitude']]
df_toronto.rename(columns={'Neighbourhood_x':'Neighbourhood'}, inplace=True)
df_toronto.head()

Unnamed: 0,Postal Code,Neighbourhood,Borough,Latitude,Longitude
0,M4E,The Beaches,East Toronto,43.676357,-79.293031
1,M4K,The Danforth West,East Toronto,43.679557,-79.352188
2,M4K,Riverdale,East Toronto,43.679557,-79.352188
3,M4L,The Beaches West,East Toronto,43.668999,-79.315572
4,M4L,India Bazaar,East Toronto,43.668999,-79.315572


In [14]:
# deleting Postal code column, not required for further processing
del df_toronto['Postal Code']

In [15]:
# final dataframe created after extracting borough Containing Toronto and its neighbourhood
df_toronto.head()

Unnamed: 0,Neighbourhood,Borough,Latitude,Longitude
0,The Beaches,East Toronto,43.676357,-79.293031
1,The Danforth West,East Toronto,43.679557,-79.352188
2,Riverdale,East Toronto,43.679557,-79.352188
3,The Beaches West,East Toronto,43.668999,-79.315572
4,India Bazaar,East Toronto,43.668999,-79.315572


__Step 3: examining number of boroughs and its associated neighbourhoods__

In [16]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_toronto['Borough'].unique()),
        df_toronto.shape[0]
    )
)

The dataframe has 4 boroughs and 74 neighborhoods.


__Step 4: Using geopy library to get the latitude and longitude values of Toronto__

In [17]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


__Step 5: Create map of toronto with Neighbourhoods superimposed on top__ 

In [18]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

__Step 6: Simplifying the above map by segmenting and clustering only Downtown Toronto neighbourhoods__

In [19]:
Downtown_toronto_data = df_toronto[df_toronto['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
Downtown_toronto_data.head()

Unnamed: 0,Neighbourhood,Borough,Latitude,Longitude
0,Rosedale,Downtown Toronto,43.679563,-79.377529
1,Cabbagetown,Downtown Toronto,43.667967,-79.367675
2,St. James Town,Downtown Toronto,43.667967,-79.367675
3,Church and Wellesley,Downtown Toronto,43.66586,-79.38316
4,Harbourfront,Downtown Toronto,43.65426,-79.360636


__Step 7: Getting the geographical coordinates of Downtown Toronto__

In [20]:
address = 'Downtown Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.6541737, -79.38081164513409.


__Step 8: Visualizing the Downtown Torornto neighbourhoods, just like Toronto neighbourhoods__

In [21]:
map_Downtown_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Downtown_toronto_data['Latitude'], Downtown_toronto_data['Longitude'], Downtown_toronto_data['Borough'], Downtown_toronto_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Downtown_toronto)  
    
map_Downtown_toronto

### Defining Foursquare Credentials

In [22]:
CLIENT_ID = 'PXOLQG4J2VY3Z5BL2MMUGFOETGJZ0ZJ2XJVITBVBKGMSKVZE' # your Foursquare ID
CLIENT_SECRET = '2OR4HXI3NJCY3EOKWSR4VS4DY2WAD15FLZ0NYLLMN20AVXAH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' +  CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET )

Your credentails:
CLIENT_ID: PXOLQG4J2VY3Z5BL2MMUGFOETGJZ0ZJ2XJVITBVBKGMSKVZE
CLIENT_SECRET:2OR4HXI3NJCY3EOKWSR4VS4DY2WAD15FLZ0NYLLMN20AVXAH


__Exploring the first neighborhood in Downtown Toronto dataframe.__ 

In [23]:
Downtown_toronto_data.loc[0,'Neighbourhood']

'Rosedale'

In [24]:
neighborhood_latitude = Downtown_toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Downtown_toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Downtown_toronto_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


__Extracting top 50 venues that are in Rosedale within the rdaius of 500 meters.__

In [25]:
LIMIT = 50
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=PXOLQG4J2VY3Z5BL2MMUGFOETGJZ0ZJ2XJVITBVBKGMSKVZE&client_secret=2OR4HXI3NJCY3EOKWSR4VS4DY2WAD15FLZ0NYLLMN20AVXAH&v=20180605&ll=43.6795626,-79.37752940000001&radius=500&limit=50'

In [26]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e57c50671c428001b9c0edb'},
 'response': {'headerLocation': 'Rosedale',
  'headerFullLocation': 'Rosedale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.6840626045, 'lng': -79.37131878274371},
   'sw': {'lat': 43.675062595499995, 'lng': -79.38374001725632}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4aff2d47f964a520743522e3',
       'name': 'Rosedale Park',
       'location': {'address': '38 Scholfield Ave.',
        'crossStreet': 'at Edgar Ave.',
        'lat': 43.68232820227814,
        'lng': -79.37893434347683,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.68232820227814,
          'lng': -79.37893434347683}],
        'distance': 32

In [27]:
# As we know, all information is in the items key. Borrowing get_category_type function from Foursquare lab. 

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [28]:
# cleaning the json information and structuring it into pandas dataframe. 

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Rosedale Park,Playground,43.682328,-79.378934
1,Whitney Park,Park,43.682036,-79.373788
2,Alex Murray Parkette,Park,43.6783,-79.382773
3,Milkman's Lane,Trail,43.676352,-79.373842


In [29]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


### Exploring Neighbourhoods in Downtown Toronto

__Step 1: creating a function to repeat the sma eprocess to all neighbourhoods in Downtown Toronto__

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

__Step 2: Calling getNearbyVenues to create a new dataframe having all neighbourhoods in Downtown Toronto__

In [31]:
Downtown_toronto_venues = getNearbyVenues(names=Downtown_toronto_data['Neighbourhood'],
                                   latitudes=Downtown_toronto_data['Latitude'],
                                   longitudes=Downtown_toronto_data['Longitude']
                                  )

Rosedale
Cabbagetown
 St. James Town
Church and Wellesley
Harbourfront
Ryerson
 Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide
 King
 Richmond
Harbourfront East
 Toronto Islands
 Union Station
Design Exchange
 Toronto Dominion Centre
Commerce Court
 Victoria Hotel
Harbord
 University of Toronto
Chinatown
 Grange Park
 Kensington Market
CN Tower
 Bathurst Quay
 Island airport
 Harbourfront West
 King and Spadina
 Railway Lands
 South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place
 Underground city
Christie
Queen's Park


In [32]:
# checking the size of the Downtown Toronto dataframe
print(Downtown_toronto_venues.shape)
Downtown_toronto_venues.head()

(1475, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,Cabbagetown,43.667967,-79.367675,Butter Chicken Factory,43.667072,-79.369184,Indian Restaurant


__Step 3: Calculating count of venues in each Neighbourhood__

In [33]:
Downtown_toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bathurst Quay,14,14,14,14,14,14
Garden District,50,50,50,50,50,50
Grange Park,50,50,50,50,50,50
Harbourfront West,14,14,14,14,14,14
Island airport,14,14,14,14,14,14
Kensington Market,50,50,50,50,50,50
King,50,50,50,50,50,50
King and Spadina,14,14,14,14,14,14
Railway Lands,14,14,14,14,14,14
Richmond,50,50,50,50,50,50


In [34]:
# finding number of unique categories 
print('There are {} uniques categories.'.format(len(Downtown_toronto_venues['Venue Category'].unique())))

There are 179 uniques categories.


### Analyze Each Neighbourhood

In [35]:
# one hot encoding
Downtown_toronto_onehot = pd.get_dummies(Downtown_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Downtown_toronto_onehot['Neighborhood'] = Downtown_toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Downtown_toronto_onehot.columns[-1]] + list(Downtown_toronto_onehot.columns[:-1])
Downtown_toronto_onehot = Downtown_toronto_onehot[fixed_columns]

Downtown_toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
Downtown_toronto_onehot.shape

(1475, 179)

__Grouping rows by Neighborhood and taking the mean of the frequency of occurrence of each category of venues__

In [37]:
Downtown_toronto_grouped = Downtown_toronto_onehot.groupby('Neighborhood').mean().reset_index()
Downtown_toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,Bathurst Quay,0.0,0.071429,0.071429,0.071429,0.142857,0.071429,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Garden District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Grange Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.06,0.02,0.0
3,Harbourfront West,0.0,0.071429,0.071429,0.071429,0.142857,0.071429,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Island airport,0.0,0.071429,0.071429,0.071429,0.142857,0.071429,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Kensington Market,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.06,0.02,0.0
6,King,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0
7,King and Spadina,0.0,0.071429,0.071429,0.071429,0.142857,0.071429,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Railway Lands,0.0,0.071429,0.071429,0.071429,0.142857,0.071429,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Richmond,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0


In [38]:
Downtown_toronto_grouped.shape

(37, 179)

__Printing top 5 most common venues corresponding to each neighbourhood__

In [39]:
num_top_venues = 5

for hood in Downtown_toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Downtown_toronto_grouped[Downtown_toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Bathurst Quay----
                 venue  freq
0       Airport Lounge  0.14
1          Coffee Shop  0.07
2                  Bar  0.07
3              Airport  0.07
4  Rental Car Location  0.07


---- Garden District----
              venue  freq
0       Coffee Shop  0.08
1              Café  0.06
2  Ramen Restaurant  0.04
3    Clothing Store  0.04
4           Theater  0.04


---- Grange Park----
                           venue  freq
0                            Bar  0.08
1                           Café  0.08
2          Vietnamese Restaurant  0.06
3             Mexican Restaurant  0.06
4  Vegetarian / Vegan Restaurant  0.04


---- Harbourfront West----
                 venue  freq
0       Airport Lounge  0.14
1          Coffee Shop  0.07
2                  Bar  0.07
3              Airport  0.07
4  Rental Car Location  0.07


---- Island airport----
                 venue  freq
0       Airport Lounge  0.14
1          Coffee Shop  0.07
2                  Bar  0.07
3              Air

In [40]:
#function sorting the venue in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [41]:
# displaying top 10 venues for each neighbourhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Downtown_toronto_grouped['Neighborhood']

for ind in np.arange(Downtown_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Downtown_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bathurst Quay,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
1,Garden District,Coffee Shop,Café,Clothing Store,Bakery,Bookstore,Ramen Restaurant,Theater,Cosmetics Shop,Diner,College Rec Center
2,Grange Park,Café,Bar,Vietnamese Restaurant,Mexican Restaurant,Comfort Food Restaurant,Dessert Shop,Burger Joint,Vegetarian / Vegan Restaurant,Coffee Shop,Bakery
3,Harbourfront West,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
4,Island airport,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport


In [42]:
kclusters = 6

Downtown_toronto_grouped_clustering = Downtown_toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Downtown_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 4, 2, 2, 4, 1, 2, 2, 1])

In [43]:
Downtown_toronto_data.rename(columns={'Neighborhood':'Neighbourhood'})

Unnamed: 0,Neighbourhood,Borough,Latitude,Longitude
0,Rosedale,Downtown Toronto,43.679563,-79.377529
1,Cabbagetown,Downtown Toronto,43.667967,-79.367675
2,St. James Town,Downtown Toronto,43.667967,-79.367675
3,Church and Wellesley,Downtown Toronto,43.66586,-79.38316
4,Harbourfront,Downtown Toronto,43.65426,-79.360636
5,Ryerson,Downtown Toronto,43.657162,-79.378937
6,Garden District,Downtown Toronto,43.657162,-79.378937
7,St. James Town,Downtown Toronto,43.651494,-79.375418
8,Berczy Park,Downtown Toronto,43.644771,-79.373306
9,Central Bay Street,Downtown Toronto,43.657952,-79.387383


In [44]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Downtown_toronto_merged = Downtown_toronto_data
Downtown_toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Downtown_toronto_merged = Downtown_toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

Downtown_toronto_merged.head() # check the last columns!

Unnamed: 0,Neighbourhood,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rosedale,Downtown Toronto,43.679563,-79.377529,2,Park,Playground,Trail,Comfort Food Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Creperie,Cosmetics Shop
1,Cabbagetown,Downtown Toronto,43.667967,-79.367675,3,Coffee Shop,Park,Restaurant,Bakery,Pizza Place,Italian Restaurant,Pub,Café,American Restaurant,Sandwich Place
2,St. James Town,Downtown Toronto,43.667967,-79.367675,4,Coffee Shop,Park,Restaurant,Bakery,Pizza Place,Italian Restaurant,Pub,Café,American Restaurant,Sandwich Place
3,Church and Wellesley,Downtown Toronto,43.66586,-79.38316,2,Coffee Shop,Gastropub,Restaurant,Sushi Restaurant,Gym,Men's Store,Japanese Restaurant,Gay Bar,Ice Cream Shop,General Entertainment
4,Harbourfront,Downtown Toronto,43.65426,-79.360636,2,Coffee Shop,Park,Pub,Bakery,Breakfast Spot,Café,Theater,Mexican Restaurant,Cosmetics Shop,Restaurant


### Visualizing the clusters

In [45]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Downtown_toronto_merged['Latitude'], Downtown_toronto_merged['Longitude'], Downtown_toronto_merged['Neighbourhood'], Downtown_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining clusters

In [46]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 0, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Design Exchange,Coffee Shop,Café,Restaurant,Japanese Restaurant,Bakery,Gastropub,Deli / Bodega,Bar,Concert Hall,Fried Chicken Joint
24,Kensington Market,Café,Bar,Vietnamese Restaurant,Mexican Restaurant,Comfort Food Restaurant,Dessert Shop,Burger Joint,Vegetarian / Vegan Restaurant,Coffee Shop,Bakery
29,King and Spadina,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport


In [47]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 1, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Garden District,Coffee Shop,Café,Clothing Store,Bakery,Bookstore,Ramen Restaurant,Theater,Cosmetics Shop,Diner,College Rec Center
9,Central Bay Street,Coffee Shop,Burger Joint,Italian Restaurant,Japanese Restaurant,Chinese Restaurant,Bubble Tea Shop,Portuguese Restaurant,Department Store,Bar,Sandwich Place
12,Richmond,Café,Steakhouse,American Restaurant,Seafood Restaurant,Asian Restaurant,Sushi Restaurant,Bar,Hotel,Gastropub,Pizza Place
14,Toronto Islands,Coffee Shop,Aquarium,Plaza,Café,Hotel,Park,Ice Cream Shop,IT Services,Bubble Tea Shop,Baseball Stadium
17,Toronto Dominion Centre,Coffee Shop,Café,Restaurant,Japanese Restaurant,Bakery,Gastropub,Deli / Bodega,Bar,Concert Hall,Fried Chicken Joint
18,Commerce Court,Coffee Shop,Café,Gastropub,Restaurant,Hotel,American Restaurant,Seafood Restaurant,Gym,Deli / Bodega,Japanese Restaurant
19,Victoria Hotel,Coffee Shop,Café,Gastropub,Restaurant,Hotel,American Restaurant,Seafood Restaurant,Gym,Deli / Bodega,Japanese Restaurant
26,Bathurst Quay,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
27,Island airport,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
28,Harbourfront West,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport


In [48]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 2, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Rosedale,Park,Playground,Trail,Comfort Food Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Creperie,Cosmetics Shop
3,Church and Wellesley,Coffee Shop,Gastropub,Restaurant,Sushi Restaurant,Gym,Men's Store,Japanese Restaurant,Gay Bar,Ice Cream Shop,General Entertainment
4,Harbourfront,Coffee Shop,Park,Pub,Bakery,Breakfast Spot,Café,Theater,Mexican Restaurant,Cosmetics Shop,Restaurant
7,St. James Town,Café,Restaurant,Coffee Shop,Park,Hotel,Italian Restaurant,BBQ Joint,Farmers Market,Beer Bar,Gastropub
8,Berczy Park,Coffee Shop,Bakery,Cocktail Bar,Beer Bar,Cheese Shop,Restaurant,Café,Seafood Restaurant,Farmers Market,Breakfast Spot
10,Adelaide,Café,Steakhouse,American Restaurant,Seafood Restaurant,Asian Restaurant,Sushi Restaurant,Bar,Hotel,Gastropub,Pizza Place
20,Harbord,Café,Bar,Restaurant,Japanese Restaurant,Bookstore,Bakery,Italian Restaurant,Beer Store,Sandwich Place,Dessert Shop


In [49]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 3, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Cabbagetown,Coffee Shop,Park,Restaurant,Bakery,Pizza Place,Italian Restaurant,Pub,Café,American Restaurant,Sandwich Place
11,King,Café,Steakhouse,American Restaurant,Seafood Restaurant,Asian Restaurant,Sushi Restaurant,Bar,Hotel,Gastropub,Pizza Place
13,Harbourfront East,Coffee Shop,Aquarium,Plaza,Café,Hotel,Park,Ice Cream Shop,IT Services,Bubble Tea Shop,Baseball Stadium
15,Union Station,Coffee Shop,Aquarium,Plaza,Café,Hotel,Park,Ice Cream Shop,IT Services,Bubble Tea Shop,Baseball Stadium
21,University of Toronto,Café,Bar,Restaurant,Japanese Restaurant,Bookstore,Bakery,Italian Restaurant,Beer Store,Sandwich Place,Dessert Shop
22,Chinatown,Café,Bar,Vietnamese Restaurant,Mexican Restaurant,Comfort Food Restaurant,Dessert Shop,Burger Joint,Vegetarian / Vegan Restaurant,Coffee Shop,Bakery
25,CN Tower,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
30,Railway Lands,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
31,South Niagara,Airport Lounge,Coffee Shop,Harbor / Marina,Sculpture Garden,Rental Car Location,Bar,Boutique,Boat or Ferry,Airport Gate,Airport
32,Stn A PO Boxes 25 The Esplanade,Café,Restaurant,Seafood Restaurant,Coffee Shop,Cocktail Bar,Beer Bar,Italian Restaurant,Farmers Market,Bakery,Japanese Restaurant


In [50]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 4, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,St. James Town,Coffee Shop,Park,Restaurant,Bakery,Pizza Place,Italian Restaurant,Pub,Café,American Restaurant,Sandwich Place
5,Ryerson,Coffee Shop,Café,Clothing Store,Bakery,Bookstore,Ramen Restaurant,Theater,Cosmetics Shop,Diner,College Rec Center
23,Grange Park,Café,Bar,Vietnamese Restaurant,Mexican Restaurant,Comfort Food Restaurant,Dessert Shop,Burger Joint,Vegetarian / Vegan Restaurant,Coffee Shop,Bakery


In [51]:
Downtown_toronto_merged.loc[Downtown_toronto_merged['Cluster Labels'] == 5, Downtown_toronto_merged.columns[[0] + list(range(5, Downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
33,First Canadian Place,Café,Restaurant,Coffee Shop,Bar,Seafood Restaurant,Beer Bar,Hotel,Gastropub,Deli / Bodega,Concert Hall
