<h1> Segmenting and Clustering the Neighbourhoods in Toronto,Canada </h1>

Parsing data from the wikipeida page

In [2]:
import csv
import requests
from bs4 import BeautifulSoup


def scrape_data(url):

    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find_all('table')[0]

    rows = table.select('tbody > tr')

    header = [th.text.rstrip() for th in rows[0].find_all('th')]
    #print(header)
    with open('output.csv', 'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for row in rows[1:]:
            data = [th.text.rstrip() for th in row.find_all('td')]
            writer.writerow(data)

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
scrape_data(url)

Converting the parsed data into a dataframe

In [3]:
import pandas as pd
df_canada = pd.read_csv("output.csv")
df_canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Removing rows with not assigned values

In [4]:
df_canada=df_canada[df_canada.Borough != 'Not assigned'].reset_index(drop=True)
df_canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df_canada.nunique()


Postal Code      103
Borough           11
Neighbourhood     99
dtype: int64

Merging rows with same Postal Code

In [6]:
df_canada_merged=df_canada.groupby('Postal Code')['Neighbourhood'].apply(','.join).reset_index()
pd.set_option("max_colwidth", None)
df_canada_merged

Unnamed: 0,Postal Code,Neighbourhood
0,M1B,"Malvern, Rouge"
1,M1C,"Rouge Hill, Port Union, Highland Creek"
2,M1E,"Guildwood, Morningside, West Hill"
3,M1G,Woburn
4,M1H,Cedarbrae
...,...,...
98,M9N,Weston
99,M9P,Westmount
100,M9R,"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens"
101,M9V,"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens"


Since all the Postal Codes are distinct there's no need for merging rows

In [7]:
df_canada.shape

(103, 3)

Gathering Location information of each neighbourhood

In [8]:
!wget -q -O 'location_data.csv' 'http://cocl.us/Geospatial_data'
print('Data downloaded!')

with open('location_data.csv','r') as csv_file:
    df_location = pd.read_csv(csv_file)
df_location.head()

Data downloaded!


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging the two dataframes

In [9]:
df_canada_loc=df_canada.merge(df_location,how='inner',on='Postal Code',suffixes=('',''),copy=False)
pd.set_option('display.max_rows', None)
df_canada_loc

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


Foursquare Credentials

In [10]:
CLIENT_ID = 'SF4JB4NWBVAV4QYJXQQTSB10N2JAGONIKNSFUSJV1DF2RS1Q' # your Foursquare ID
CLIENT_SECRET = '1SIMXG54UW3FHTNNR5U5C1ZDDTFFOZKUKQMSTRJQD4PQK0E5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: SF4JB4NWBVAV4QYJXQQTSB10N2JAGONIKNSFUSJV1DF2RS1Q
CLIENT_SECRET:1SIMXG54UW3FHTNNR5U5C1ZDDTFFOZKUKQMSTRJQD4PQK0E5


Getting surrounding venues for postal code M3A

In [11]:
df_canada_loc.loc[0,'Postal Code']

'M3A'

In [12]:
neighborhood_latitude = df_canada_loc.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_canada_loc.loc[0, 'Longitude'] # neighborhood longitude value

Postal_Code = df_canada_loc.loc[0, 'Postal Code'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(Postal_Code, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of M3A are 43.7532586, -79.3296565.


In [13]:
LIMIT=100

radius=1000

url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&limit={}&radius={}'.format(CLIENT_ID,CLIENT_SECRET, neighborhood_latitude,neighborhood_longitude,VERSION,LIMIT,radius)
url

'https://api.foursquare.com/v2/venues/explore?client_id=SF4JB4NWBVAV4QYJXQQTSB10N2JAGONIKNSFUSJV1DF2RS1Q&client_secret=1SIMXG54UW3FHTNNR5U5C1ZDDTFFOZKUKQMSTRJQD4PQK0E5&ll=43.7532586,-79.3296565&v=20180605&limit=100&radius=1000'

In [14]:
import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [15]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '604a7aa0b915ec4d1bdacc29'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 28,
  'suggestedBounds': {'ne': {'lat': 43.762258609000014,
    'lng': -79.31721997969855},
   'sw': {'lat': 43.74425859099999, 'lng': -79.34209302030145}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b8991cbf964a520814232e3',
       'name': "Allwyn's Bakery",
       'location': {'address': '81 Underhill drive',
        'lat': 43.75984035203157,
        'lng': -79.32471879917513,
        'labeledLatLngs': [{'label': 'display'

In [16]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [17]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Tim Hortons,Café,43.760668,-79.326368
2,Brookbanks Park,Park,43.751976,-79.33214
3,A&W,Fast Food Restaurant,43.760643,-79.326865
4,Bruno's valu-mart,Grocery Store,43.746143,-79.32463


In [18]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

28 venues were returned by Foursquare.


Getting surrounding venues for all postal codes in the city of toronto

In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code', 
                  'Postal Code Latitude', 
                  'Postal Code Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    print(nearby_venues.tail())
    return(nearby_venues)

In [20]:
toronto_venues = getNearbyVenues(names=df_canada_loc['Postal Code'],
                                   latitudes=df_canada_loc['Latitude'],
                                   longitudes=df_canada_loc['Longitude']
                                  )

M3A
M4A
M5A
M6A
M7A
M9A
M1B
M3B
M4B
M5B
M6B
M9B
M1C
M3C
M4C
M5C
M6C
M9C
M1E
M4E
M5E
M6E
M1G
M4G
M5G
M6G
M1H
M2H
M3H
M4H
M5H
M6H
M1J
M2J
M3J
M4J
M5J
M6J
M1K
M2K
M3K
M4K
M5K
M6K
M1L
M2L
M3L
M4L
M5L
M6L
M9L
M1M
M2M
M3M
M4M
M5M
M6M
M9M
M1N
M2N
M3N
M4N
M5N
M6N
M9N
M1P
M2P
M4P
M5P
M6P
M9P
M1R
M2R
M4R
M5R
M6R
M7R
M9R
M1S
M4S
M5S
M6S
M1T
M4T
M5T
M1V
M4V
M5V
M8V
M9V
M1W
M4W
M5W
M8W
M9W
M1X
M4X
M5X
M8X
M4Y
M7Y
M8Y
M8Z
     Postal Code  Postal Code Latitude  Postal Code Longitude  \
4877         M8Z             43.628841             -79.520999   
4878         M8Z             43.628841             -79.520999   
4879         M8Z             43.628841             -79.520999   
4880         M8Z             43.628841             -79.520999   
4881         M8Z             43.628841             -79.520999   

                       Venue  Venue Latitude  Venue Longitude  \
4877                  Mr.Sub       43.636174       -79.520655   
4878         Mandarin Buffet       43.621352       -79.523015   
48

In [21]:
print(toronto_venues.shape)
toronto_venues.head()

(4882, 7)


Unnamed: 0,Postal Code,Postal Code Latitude,Postal Code Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M3A,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,M3A,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
2,M3A,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
3,M3A,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant
4,M3A,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store


In [22]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 335 uniques categories.


One hot encoding for different categories

In [23]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Postal Code'] = toronto_venues['Postal Code'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Postal Code,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M3A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
pd.set_option('display.max_rows', None)
toronto_grouped

Unnamed: 0,Postal Code,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0
5,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M1K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M1M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M1N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
toronto_grouped.shape

(102, 336)

In [26]:
num_top_venues = 5

for hood in toronto_grouped['Postal Code']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Postal Code'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                           venue  freq
0           Fast Food Restaurant  0.14
1                          Trail  0.14
2  Paper / Office Supplies Store  0.07
3                            Spa  0.07
4                           Bank  0.07


----M1C----
                venue  freq
0                Park   0.2
1      Breakfast Spot   0.2
2        Burger Joint   0.2
3          Playground   0.2
4  Italian Restaurant   0.2


----M1E----
                  venue  freq
0           Pizza Place  0.17
1                  Bank  0.08
2  Fast Food Restaurant  0.08
3           Coffee Shop  0.08
4        Sandwich Place  0.04


----M1G----
                  venue  freq
0                  Park  0.25
1           Coffee Shop  0.25
2     Indian Restaurant  0.12
3  Fast Food Restaurant  0.12
4    Chinese Restaurant  0.12


----M1H----
               venue  freq
0             Bakery  0.09
1  Indian Restaurant  0.06
2        Gas Station  0.06
3               Bank  0.06
4        Coffee Shop  0.06


----M1

                venue  freq
0         Coffee Shop  0.11
1               Hotel  0.06
2                Café  0.06
3          Restaurant  0.05
4  Seafood Restaurant  0.05


----M5M----
                  venue  freq
0    Italian Restaurant  0.08
1           Coffee Shop  0.08
2  Fast Food Restaurant  0.05
3                  Bank  0.05
4        Sandwich Place  0.05


----M5N----
                venue  freq
0    Sushi Restaurant  0.13
1                Bank  0.09
2  Italian Restaurant  0.09
3         Coffee Shop  0.09
4                Café  0.09


----M5P----
              venue  freq
0              Park  0.08
1       Coffee Shop  0.06
2  Sushi Restaurant  0.06
3              Bank  0.06
4              Café  0.06


----M5R----
                           venue  freq
0                           Café  0.06
1             Italian Restaurant  0.05
2                    Coffee Shop  0.05
3  Vegetarian / Vegan Restaurant  0.04
4                     Restaurant  0.03


----M5S----
                        

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Top 10 venues for each postal code

In [28]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)
pd.set_option('display.max_rows', None)
neighborhoods_venues_sorted

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Trail,Fast Food Restaurant,Chinese Restaurant,Supermarket,Caribbean Restaurant,Spa,Paper / Office Supplies Store,Park,Coffee Shop,Restaurant
1,M1C,Breakfast Spot,Playground,Park,Burger Joint,Italian Restaurant,Farm,Elementary School,Entertainment Service,Escape Room,Ethiopian Restaurant
2,M1E,Pizza Place,Bank,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Sandwich Place,Beer Store,Liquor Store,Supermarket,Discount Store
3,M1G,Park,Coffee Shop,Chinese Restaurant,Mobile Phone Shop,Indian Restaurant,Fast Food Restaurant,Falafel Restaurant,Elementary School,Entertainment Service,Escape Room
4,M1H,Bakery,Gas Station,Coffee Shop,Indian Restaurant,Bank,Pharmacy,Caribbean Restaurant,Fried Chicken Joint,Athletics & Sports,Chinese Restaurant
5,M1J,Convenience Store,Ice Cream Shop,Restaurant,Bowling Alley,Fast Food Restaurant,Grocery Store,Coffee Shop,Japanese Restaurant,Intersection,Sandwich Place
6,M1K,Discount Store,Pizza Place,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Grocery Store,Hockey Arena,Sandwich Place,Asian Restaurant,Light Rail Station
7,M1L,Intersection,Bus Line,Bakery,Coffee Shop,Bank,Gym,Soccer Field,General Entertainment,Restaurant,Trail
8,M1M,Pizza Place,Ice Cream Shop,Beach,Sports Bar,Auto Garage,Park,Restaurant,Filipino Restaurant,Financial or Legal Service,Electronics Store
9,M1N,Park,Gym,General Entertainment,Thai Restaurant,College Stadium,Ice Cream Shop,Café,Skating Rink,Restaurant,Gym Pool


Clustering different postal codes using Kmeans 

In [29]:
from sklearn.cluster import KMeans

kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 0, 4, 0, 0, 0, 0, 3, 3], dtype=int32)

In [30]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_canada_loc

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Postal Code'), on='Postal Code')
#toronto_merged['Cluster Labels'].astype(int)
pd.set_option('display.max_rows', None)
toronto_merged.drop([95],axis=0,inplace=True)
toronto_merged.reset_index(inplace=True)
toronto_merged.drop("index",axis=1)
toronto_merged# check the last columns!

Unnamed: 0,index,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Park,Pharmacy,Bus Stop,Shopping Mall,ATM,Shop & Service,Supermarket,Food & Drink Shop,Fast Food Restaurant,Skating Rink
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Coffee Shop,Portuguese Restaurant,Boxing Gym,Gym / Fitness Center,Intersection,Golf Course,French Restaurant,Men's Store,Park,Pizza Place
2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Coffee Shop,Restaurant,Park,Café,Theater,Diner,Breakfast Spot,Sushi Restaurant,Pub,Bakery
3,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Furniture / Home Store,Clothing Store,Coffee Shop,Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Fried Chicken Joint,Sushi Restaurant,Women's Store,Dessert Shop
4,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Sushi Restaurant,Park,Café,Hotel,Thai Restaurant,Ramen Restaurant,Japanese Restaurant,Italian Restaurant,Restaurant
5,5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,3.0,Pharmacy,Grocery Store,Convenience Store,Playground,Bank,Bakery,Shopping Mall,Golf Course,Skating Rink,Café
6,6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0.0,Trail,Fast Food Restaurant,Chinese Restaurant,Supermarket,Caribbean Restaurant,Spa,Paper / Office Supplies Store,Park,Coffee Shop,Restaurant
7,7,M3B,North York,Don Mills,43.745906,-79.352188,1.0,Pizza Place,Japanese Restaurant,Burger Joint,Coffee Shop,Breakfast Spot,Greek Restaurant,Mobile Phone Shop,Café,Caribbean Restaurant,Liquor Store
8,8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0.0,Pizza Place,Brewery,Gym / Fitness Center,Pharmacy,Rock Climbing Spot,Coffee Shop,Café,Fast Food Restaurant,Soccer Stadium,Flea Market
9,9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1.0,Coffee Shop,Gastropub,Japanese Restaurant,Seafood Restaurant,Café,Theater,Italian Restaurant,Hotel,Pizza Place,Ramen Restaurant


In [31]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
#from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python-3.7-main

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _libgcc_mutex-0.1          |      conda_forge           3 KB  conda-forge
    _openmp_mutex-4.5          |           1_llvm           5 KB  conda-forge
    _py-xgboost-mutex-2.0      |            cpu_0           8 KB  conda-forge
    _pytorch_select-0.2        |            gpu_0           2 KB
    absl-py-0.12.0             |     pyhd8ed1ab_0          96 KB  conda-forge
    aiohttp-3.7.4              |   py37h5e8e339_0  

  libgfortran4       conda-forge/linux-64::libgfortran4-7.5.0-h14aa051_18
  libglib            conda-forge/linux-64::libglib-2.66.7-h3e27bee_1
  libiconv           conda-forge/linux-64::libiconv-1.16-h516909a_0
  liblapack          conda-forge/linux-64::liblapack-3.9.0-8_mkl
  libllvm11          conda-forge/linux-64::libllvm11-11.1.0-hf817b99_0
  libnghttp2         conda-forge/linux-64::libnghttp2-1.43.0-h812cca2_0
  libssh2            conda-forge/linux-64::libssh2-1.9.0-ha56f1ee_6
  libwebp-base       conda-forge/linux-64::libwebp-base-1.2.0-h7f98852_0
  libxkbcommon       conda-forge/linux-64::libxkbcommon-1.0.3-he3ba5ed_0
  libzopfli          conda-forge/linux-64::libzopfli-1.0.3-h9c3ff4c_0
  llvm-openmp        conda-forge/linux-64::llvm-openmp-11.0.1-h4bd325d_0
  mpc                conda-forge/linux-64::mpc-1.1.0-h04dde30_1009
  mpfr               conda-forge/linux-64::mpfr-4.0.2-he80fd80_1
  mysql-common       conda-forge/linux-64::mysql-common-8.0.23-ha770c72_1
  my

imagecodecs-2021.1.2 | 6.7 MB    | ##################################### | 100% 
lzo-2.10             | 314 KB    | ##################################### | 100% 
beautifulsoup4-4.9.3 | 86 KB     | ##################################### | 100% 
iniconfig-1.1.1      | 8 KB      | ##################################### | 100% 
jeepney-0.6.0        | 32 KB     | ##################################### | 100% 
kiwisolver-1.3.1     | 78 KB     | ##################################### | 100% 
mysql-common-8.0.23  | 1.5 MB    | ##################################### | 100% 
gmp-6.2.1            | 806 KB    | ##################################### | 100% 
seaborn-0.11.1       | 4 KB      | ##################################### | 100% 
pexpect-4.8.0        | 47 KB     | ##################################### | 100% 
python_abi-3.7       | 4 KB      | ##################################### | 100% 
mistune-0.8.4        | 54 KB     | ##################################### | 100% 
pyqt-5.12.3          | 21 KB

charls-2.2.0         | 138 KB    | ##################################### | 100% 
keyring-18.0.0       | 50 KB     | ##################################### | 100% 
ipywidgets-7.6.3     | 101 KB    | ##################################### | 100% 
google-auth-oauthlib | 19 KB     | ##################################### | 100% 
tqdm-4.59.0          | 77 KB     | ##################################### | 100% 
wheel-0.36.2         | 31 KB     | ##################################### | 100% 
lcms2-2.12           | 443 KB    | ##################################### | 100% 
traitlets-5.0.5      | 81 KB     | ##################################### | 100% 
terminado-0.9.2      | 26 KB     | ##################################### | 100% 
async-timeout-3.0.1  | 11 KB     | ##################################### | 100% 
libffi-3.3           | 51 KB     | ##################################### | 100% 
blosc-1.21.0         | 841 KB    | ##################################### | 100% 
tensorflow-estimator | 645 K

openjpeg-2.4.0       | 525 KB    | ##################################### | 100% 
krb5-1.17.2          | 1.4 MB    | ##################################### | 100% 
blinker-1.4          | 13 KB     | ##################################### | 100% 
lz4-c-1.9.3          | 179 KB    | ##################################### | 100% 
pygments-2.8.1       | 736 KB    | ##################################### | 100% 
google-pasta-0.2.0   | 42 KB     | ##################################### | 100% 
openssl-1.1.1j       | 2.1 MB    | ##################################### | 100% 
more-itertools-8.7.0 | 39 KB     | ##################################### | 100% 
readline-8.0         | 281 KB    | ##################################### | 100% 
sleef-3.5.1          | 1.5 MB    | ##################################### | 100% 
pytables-3.6.1       | 1.5 MB    | ##################################### | 100% 
vincent-0.4.4        | 28 KB     | ##################################### | 100% 
h5py-3.1.0           | 1.2 M

done
Libraries imported.


Mapping clustered postal codes

In [32]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postal Code'], toronto_merged['Cluster Labels']):
    temp=int(cluster)
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[temp-1],
        fill=True,
        fill_color=rainbow[temp-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0.0,toronto_merged.columns[[1] + list(range(7, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M4A,Coffee Shop,Portuguese Restaurant,Boxing Gym,Gym / Fitness Center,Intersection,Golf Course,French Restaurant,Men's Store,Park,Pizza Place
6,M1B,Trail,Fast Food Restaurant,Chinese Restaurant,Supermarket,Caribbean Restaurant,Spa,Paper / Office Supplies Store,Park,Coffee Shop,Restaurant
8,M4B,Pizza Place,Brewery,Gym / Fitness Center,Pharmacy,Rock Climbing Spot,Coffee Shop,Café,Fast Food Restaurant,Soccer Stadium,Flea Market
10,M6B,Grocery Store,Fast Food Restaurant,Coffee Shop,Pizza Place,Gas Station,Gym,Bank,Latin American Restaurant,Food Court,Mediterranean Restaurant
14,M4C,Park,Coffee Shop,Sandwich Place,Café,Pizza Place,Athletics & Sports,Pastry Shop,Beer Store,Liquor Store,Skating Rink
16,M6C,Pizza Place,Convenience Store,Coffee Shop,Trail,Soccer Stadium,Middle Eastern Restaurant,Mexican Restaurant,Field,Sandwich Place,Sushi Restaurant
17,M9C,Coffee Shop,Sandwich Place,IT Services,Park,Intersection,Grocery Store,Shopping Mall,College Rec Center,Beer Store,Pet Store
18,M1E,Pizza Place,Bank,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint,Sandwich Place,Beer Store,Liquor Store,Supermarket,Discount Store
21,M6E,Pharmacy,Pizza Place,Park,Falafel Restaurant,Coffee Shop,Japanese Restaurant,Grocery Store,Discount Store,Bus Stop,Fast Food Restaurant
26,M1H,Bakery,Gas Station,Coffee Shop,Indian Restaurant,Bank,Pharmacy,Caribbean Restaurant,Fried Chicken Joint,Athletics & Sports,Chinese Restaurant


In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1.0,toronto_merged.columns[[1] + list(range(7, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Coffee Shop,Restaurant,Park,Café,Theater,Diner,Breakfast Spot,Sushi Restaurant,Pub,Bakery
3,M6A,Furniture / Home Store,Clothing Store,Coffee Shop,Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Fried Chicken Joint,Sushi Restaurant,Women's Store,Dessert Shop
4,M7A,Coffee Shop,Sushi Restaurant,Park,Café,Hotel,Thai Restaurant,Ramen Restaurant,Japanese Restaurant,Italian Restaurant,Restaurant
7,M3B,Pizza Place,Japanese Restaurant,Burger Joint,Coffee Shop,Breakfast Spot,Greek Restaurant,Mobile Phone Shop,Café,Caribbean Restaurant,Liquor Store
9,M5B,Coffee Shop,Gastropub,Japanese Restaurant,Seafood Restaurant,Café,Theater,Italian Restaurant,Hotel,Pizza Place,Ramen Restaurant
13,M3C,Restaurant,Gym,Supermarket,Beer Store,Coffee Shop,Italian Restaurant,Bank,Japanese Restaurant,New American Restaurant,Sushi Restaurant
15,M5C,Coffee Shop,Café,Restaurant,Seafood Restaurant,Italian Restaurant,Bakery,Theater,Gastropub,Plaza,Hotel
19,M4E,Coffee Shop,Pizza Place,Pub,Beach,Japanese Restaurant,Breakfast Spot,Burger Joint,Bakery,Asian Restaurant,Indian Restaurant
20,M5E,Coffee Shop,Café,Hotel,Park,Japanese Restaurant,Restaurant,Bakery,Gym,Beer Bar,Pub
23,M4G,Coffee Shop,Sporting Goods Shop,Grocery Store,Electronics Store,Furniture / Home Store,Bank,Department Store,Sandwich Place,Brewery,Sports Bar


In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2.0,toronto_merged.columns[[1] + list(range(7, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,M9W,Hotel,Drugstore,Coffee Shop,Zoo,Farm,Elementary School,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Space


In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3.0,toronto_merged.columns[[1] + list(range(7, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,Park,Pharmacy,Bus Stop,Shopping Mall,ATM,Shop & Service,Supermarket,Food & Drink Shop,Fast Food Restaurant,Skating Rink
5,M9A,Pharmacy,Grocery Store,Convenience Store,Playground,Bank,Bakery,Shopping Mall,Golf Course,Skating Rink,Café
11,M9B,Park,Hotel,Pizza Place,Mexican Restaurant,Restaurant,Bank,Clothing Store,Grocery Store,Gym,Theater
12,M1C,Breakfast Spot,Playground,Park,Burger Joint,Italian Restaurant,Farm,Elementary School,Entertainment Service,Escape Room,Ethiopian Restaurant
39,M2K,Bank,Gas Station,Japanese Restaurant,Grocery Store,Café,Chinese Restaurant,Trail,Intersection,Park,Shopping Mall
46,M3L,Park,Bank,Pizza Place,Moving Target,Vietnamese Restaurant,Grocery Store,Zoo,Event Space,Electronics Store,Elementary School
50,M9L,Bakery,Italian Restaurant,Electronics Store,Pizza Place,Shopping Mall,Medical Center,Pharmacy,Bank,Park,Eastern European Restaurant
51,M1M,Pizza Place,Ice Cream Shop,Beach,Sports Bar,Auto Garage,Park,Restaurant,Filipino Restaurant,Financial or Legal Service,Electronics Store
53,M3M,Vietnamese Restaurant,Baseball Field,Restaurant,Zoo,Farm,Electronics Store,Elementary School,Entertainment Service,Escape Room,Ethiopian Restaurant
57,M9M,Convenience Store,Golf Course,Gas Station,Storage Facility,Park,Bakery,Discount Store,Zoo,Event Space,Electronics Store


In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4.0,toronto_merged.columns[[1] + list(range(7, toronto_merged.shape[1]))]]

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,M1G,Park,Coffee Shop,Chinese Restaurant,Mobile Phone Shop,Indian Restaurant,Fast Food Restaurant,Falafel Restaurant,Elementary School,Entertainment Service,Escape Room
45,M2L,Park,Pool,Zoo,Falafel Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Entertainment Service,Escape Room,Ethiopian Restaurant


# Created by Parshwa Shah