# Import of all necessary libraries

In [96]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

# Making request to URL and making BeautifulSoup object for scraping

In [97]:
URL = 'http://en.turkcewiki.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL)

soup = BeautifulSoup(page.content, 'lxml')

# Table tag of data

In [98]:
tble = soup.find('table')
print(len(tble))

2


# Initialize empty lists for all the data

In [99]:
postal_codes=[]
boroughs = []
neighs = []

# Loop through all the cells of table and retrieve all the important information

In [100]:
for neigh in tble.find_all('td'):
  sp = neigh.find('span')
  if(sp.text != 'Not assigned'):
    postal_codes.append(neigh.find('b').text)
    data = sp.text
    split_both = data.split("(")
    hoods = split_both[1].split(")")[0]
    hoods_data = hoods.replace("/",",")
    boroughs.append(split_both[0])
    neighs.append(hoods_data)

# Checking for lengths of lists and discrepancies

In [101]:
print(len(postal_codes))
print(len(boroughs))
print(len(neighs))

103
103
103


# Empty dataframe 

In [102]:
df = pd.DataFrame(columns=['PostalCode','Borough','Neighbourhood'])
df

Unnamed: 0,PostalCode,Borough,Neighbourhood


# Assigning data to columns

In [103]:
df['PostalCode'] =postal_codes
df['Borough'] = boroughs
df['Neighbourhood'] = neighs

df

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


# Checking unique postal codes in order to merge same postal code boroughs

In [104]:
len(df['PostalCode'].unique())

103

# Getting data from CSV of geospatial data

In [105]:
from io import StringIO 
url = 'http://cocl.us/Geospatial_data'
s=requests.get(url).content
c=pd.read_csv(StringIO(s.decode('utf-8')))
c

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


# Merge Both the tables

In [106]:
df2= df.merge(c, left_on='PostalCode',right_on = 'Postal Code', how='left')
df2.drop(columns=['Postal Code'],inplace=True)
df2

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


# Exmine North York Borough of Toronto

In [107]:
df3 = df2[df2['Borough']=='North York']
df3

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
7,M3B,North York,Don Mills,43.745906,-79.352188
10,M6B,North York,Glencairn,43.709577,-79.445073
13,M3C,North York,Don Mills,43.7259,-79.340923
27,M2H,North York,Hillcrest Village,43.803762,-79.363452
28,M3H,North York,"Bathurst Manor , Wilson Heights , Downsview North",43.754328,-79.442259
33,M2J,North York,"Fairview , Henry Farm , Oriole",43.778517,-79.346556
34,M3J,North York,"Northwood Park , York University",43.76798,-79.487262


# Imports

In [108]:
from geopy.geocoders import Nominatim 
import folium
import json
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

# Getting longitude and lattitude of North York

In [109]:
address = 'North York, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.7543263, -79.44911696639593.


# Generate map of all neighbourhoods in North York

In [110]:
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(df3['Latitude'],df3['Longitude'], df3['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    
map_north_york

# Defining foursquare credentials

In [111]:
CLIENT_ID = 'RFI4U14MUXCU2TS4TCEPPFVL2MVA2DC4VFFZAAX2334YMWF3' 
CLIENT_SECRET = 'NCCPVLPHQ05YAFD1ATX1B0A1AZXPOZ4MICI3V5KRHTYCJP0Ot'
VERSION = '20180605' 
LIMIT = 100 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RFI4U14MUXCU2TS4TCEPPFVL2MVA2DC4VFFZAAX2334YMWF3
CLIENT_SECRET:NCCPVLPHQ05YAFD1ATX1B0A1AZXPOZ4MICI3V5KRHTYCJP0Ot


# Getting longitude and lattitude of the first neighbourhood

In [112]:
neighborhood_latitude = df3.loc[0, 'Latitude']
neighborhood_longitude = df3.loc[0, 'Longitude']

neighborhood_name = df3.loc[0, 'Neighbourhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


# Making a get request to foursquare API to explore venues near Parkwood

In [113]:
LIMIT =100
radius =500
AUTH = 'IKEMNNFA2KMSR1AEWHKN2HUZEV3HCQB3GNZHGQKJ4EZ3XL3O'
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&oauth_token={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    AUTH,
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [114]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '6098ec506d61f1572aca66b7'},
 'notifications': [{'item': {'unreadCount': 0}, 'type': 'notificationTray'}],
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4e8d9dcdd5fbbbb6b3003c7b-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d163941735',
         'name': 'Park',
         'pluralName': 'Parks',
         'primary': True,
         'shortName': 'Park'}],
       'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'location': {'address': 'Toronto',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'distance': 245,
        'formattedAddress': ['Toronto', 'Toronto ON', 'Canada'],
        'labeledLatLngs': [{'label': 'display',
          

# Retrieving categories of venues obtained

In [115]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Cleaning response and generating Dataframe

In [116]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Careful & Reliable Painting,Construction & Landscaping,43.752622,-79.331957
2,649 Variety,Convenience Store,43.754513,-79.331942
3,Towns On The Ravine,Hotel,43.754754,-79.332552
4,Sun Life,Construction & Landscaping,43.75476,-79.332783


In [117]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

10 venues were returned by Foursquare.


# Getting venues near all the neighbourhoods in north york borough

In [118]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    AUTH = 'IKEMNNFA2KMSR1AEWHKN2HUZEV3HCQB3GNZHGQKJ4EZ3XL3O'
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&oauth_token={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            AUTH,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# Generating dataframe of all neighbourhoods

In [120]:
north_york_venues = getNearbyVenues(names=df3['Neighbourhood'],
                                   latitudes=df3['Latitude'],
                                   longitudes=df3['Longitude'])
north_york_venues

Parkwoods
Victoria Village
Lawrence Manor , Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor , Wilson Heights , Downsview North
Fairview , Henry Farm , Oriole
Northwood Park , York University
Bayview Village
Downsview
York Mills , Silver Hills
Downsview
North Park , Maple Leaf Park , Upwood Park
Humber Summit
Willowdale , Newtonbrook
Downsview
Bedford Park , Lawrence Manor East
Humberlea , Emery
Willowdale
Downsview
York Mills West
Willowdale


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
3,Parkwoods,43.753259,-79.329656,Towns On The Ravine,43.754754,-79.332552,Hotel
4,Parkwoods,43.753259,-79.329656,Sun Life,43.754760,-79.332783,Construction & Landscaping
...,...,...,...,...,...,...,...
423,Willowdale,43.782736,-79.442259,Golden Touch,43.781478,-79.444905,Spa
424,Willowdale,43.782736,-79.442259,Imperial Health Spa Clinic,43.780436,-79.444456,Spa
425,Willowdale,43.782736,-79.442259,Price Chopper,43.783237,-79.446339,Grocery Store
426,Willowdale,43.782736,-79.442259,Bathurst Village Market,43.784063,-79.445984,Supermarket


In [121]:
north_york_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
3,Parkwoods,43.753259,-79.329656,Towns On The Ravine,43.754754,-79.332552,Hotel
4,Parkwoods,43.753259,-79.329656,Sun Life,43.75476,-79.332783,Construction & Landscaping


# Getting count of venues near neighbourhoods

In [124]:
north_york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor , Wilson Heights , Downsview North",34,34,34,34,34,34
Bayview Village,6,6,6,6,6,6
"Bedford Park , Lawrence Manor East",52,52,52,52,52,52
Don Mills,41,41,41,41,41,41
Downsview,24,24,24,24,24,24
"Fairview , Henry Farm , Oriole",100,100,100,100,100,100
Glencairn,12,12,12,12,12,12
Hillcrest Village,6,6,6,6,6,6
Humber Summit,3,3,3,3,3,3
"Humberlea , Emery",4,4,4,4,4,4


# Unique categories of venues

In [125]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 135 uniques categories.


# One hot encoding categories for clustering

In [126]:
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

north_york_onehot['Neighborhood'] = north_york_venues['Neighborhood'] 

fixed_columns = [north_york_onehot.columns[-1]] + list(north_york_onehot.columns[:-1])
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bakery,Bank,Bar,Baseball Field,Beer Store,Bike Shop,Boutique,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Stop,Business Center,Business Service,Butcher,Cafeteria,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,...,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,Noodle House,Optical Shop,Other Repair Shop,Paper / Office Supplies Store,Park,Pastry Shop,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Spa,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [127]:
north_york_onehot.shape

(428, 136)

# Mean of neighbourhood categories

In [128]:
north_york_grouped = north_york_onehot.groupby('Neighborhood').mean().reset_index()
north_york_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Bakery,Bank,Bar,Baseball Field,Beer Store,Bike Shop,Boutique,Breakfast Spot,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Stop,Business Center,Business Service,Butcher,Cafeteria,Café,Caribbean Restaurant,Carpet Store,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,...,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,Noodle House,Optical Shop,Other Repair Shop,Paper / Office Supplies Store,Park,Pastry Shop,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Spa,Sporting Goods Shop,Supermarket,Supplement Shop,Sushi Restaurant,Tailor Shop,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store,Yoga Studio
0,"Bathurst Manor , Wilson Heights , Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.058824,0.0,0.0,0.029412,0.0,...,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.058824,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.029412,0.0,0.0,0.058824,0.0,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park , Lawrence Manor East",0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.019231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.019231,0.0,0.019231,0.0,0.0,0.0,0.0,0.0,0.038462,0.019231,0.0,0.0,0.019231,...,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019231,0.019231,0.057692,0.0,0.0,0.0,0.019231,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.096154,0.0,0.0,0.0,0.038462,0.0,0.0,0.019231,0.0,0.0,0.0,0.0,0.019231,0.0
3,Don Mills,0.02439,0.0,0.0,0.0,0.02439,0.0,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.04878,0.02439,0.0,0.02439,0.0,0.04878,0.04878,0.0,0.02439,0.0,0.0,...,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.073171,0.0,0.02439,0.0,0.0,0.02439,0.0,0.0,0.04878,0.02439,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,...,0.0,0.083333,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Fairview , Henry Farm , Oriole",0.01,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.14,0.05,0.0,0.0,0.02,0.05,...,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.01,0.01,0.06,0.01,0.0,0.01,0.01,0.03,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.02,0.01,0.0,0.07,0.01
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea , Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Getting top 5 venues of each neighbourhood and their frequencies

In [129]:
num_top_venues = 5

for hood in north_york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = north_york_grouped[north_york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor , Wilson Heights , Downsview North----
            venue  freq
0            Bank  0.06
1  Ice Cream Shop  0.06
2     Coffee Shop  0.06
3        Pharmacy  0.06
4             Spa  0.06


----Bayview Village----
                 venue  freq
0                 Café  0.17
1                  Gym  0.17
2   Chinese Restaurant  0.17
3                  Spa  0.17
4  Japanese Restaurant  0.17


----Bedford Park , Lawrence Manor East----
                venue  freq
0                 Spa  0.10
1  Italian Restaurant  0.06
2         Pizza Place  0.06
3    Sushi Restaurant  0.04
4    Business Service  0.04


----Don Mills----
                 venue  freq
0                  Gym  0.10
1           Restaurant  0.07
2  Sporting Goods Shop  0.05
3       Clothing Store  0.05
4                 Café  0.05


----Downsview----
               venue  freq
0       Home Service  0.08
1  Mobile Phone Shop  0.08
2   Business Service  0.08
3      Shopping Mall  0.08
4      Grocery Store  0.08


----Fai

In [130]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Neighbourhoods with their most common 10 venues

In [148]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = north_york_grouped['Neighborhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor , Wilson Heights , Downsview North",Mobile Phone Shop,Coffee Shop,Pharmacy,Bank,Ice Cream Shop,Spa,Pet Store,Park,Middle Eastern Restaurant,Chinese Restaurant
1,Bayview Village,Gym,Japanese Restaurant,Chinese Restaurant,Café,Spa,Bank,Dog Run,Discount Store,Diner,Dim Sum Restaurant
2,"Bedford Park , Lawrence Manor East",Spa,Pizza Place,Italian Restaurant,Coffee Shop,Boutique,Restaurant,Sandwich Place,Sushi Restaurant,Mobile Phone Shop,Juice Bar
3,Don Mills,Gym,Restaurant,Sporting Goods Shop,Café,Clothing Store,Coffee Shop,ATM,Bus Line,Mobile Phone Shop,Chinese Restaurant
4,Downsview,Grocery Store,Mobile Phone Shop,Shopping Mall,Home Service,Business Service,Gym / Fitness Center,Baseball Field,Korean Restaurant,Electronics Store,Liquor Store


# K-means clustering with 3 clusters

In [149]:
kclusters = 3

north_york_grouped_clustering = north_york_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 1], dtype=int32)

# Merge tables

In [150]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

north_york_merged = df3

north_york_merged = north_york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

north_york_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Construction & Landscaping,Park,Convenience Store,Hotel,Fireworks Store,Bus Stop,BBQ Joint,Food & Drink Shop,Yoga Studio,Dim Sum Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Pizza Place,Financial or Legal Service,Portuguese Restaurant,Intersection,Hockey Arena,Bridal Shop,Convenience Store,Cosmetics Shop,Cupcake Shop
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763,2,Clothing Store,Furniture / Home Store,Accessories Store,Home Service,Gift Shop,Coffee Shop,Miscellaneous Shop,Medical Center,Carpet Store,Lighting Store
7,M3B,North York,Don Mills,43.745906,-79.352188,2,Gym,Restaurant,Sporting Goods Shop,Café,Clothing Store,Coffee Shop,ATM,Bus Line,Mobile Phone Shop,Chinese Restaurant
10,M6B,North York,Glencairn,43.709577,-79.445073,2,Pizza Place,Spa,Metro Station,Park,Japanese Restaurant,Pharmacy,Bakery,Sushi Restaurant,Business Service,Asian Restaurant


# map of clusters

In [151]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'],north_york_merged['Neighbourhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examining data in each cluster

In [153]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,0,Park,Martial Arts School,Cafeteria,Yoga Studio,Department Store,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Cupcake Shop


In [154]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1,Construction & Landscaping,Park,Convenience Store,Hotel,Fireworks Store,Bus Stop,BBQ Joint,Food & Drink Shop,Yoga Studio,Dim Sum Restaurant
49,North York,1,Park,Construction & Landscaping,Massage Studio,Bakery,Yoga Studio,Dessert Shop,Dog Run,Discount Store,Diner,Dim Sum Restaurant
57,North York,1,Fabric Shop,Paper / Office Supplies Store,Construction & Landscaping,Baseball Field,Dessert Shop,Electronics Store,Dog Run,Discount Store,Diner,Dim Sum Restaurant
66,North York,1,Electronics Store,Park,Construction & Landscaping,Convenience Store,Flower Shop,Dessert Shop,Dog Run,Discount Store,Diner,Dim Sum Restaurant


In [155]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2, north_york_merged.columns[[1] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,2,Coffee Shop,Pizza Place,Financial or Legal Service,Portuguese Restaurant,Intersection,Hockey Arena,Bridal Shop,Convenience Store,Cosmetics Shop,Cupcake Shop
3,North York,2,Clothing Store,Furniture / Home Store,Accessories Store,Home Service,Gift Shop,Coffee Shop,Miscellaneous Shop,Medical Center,Carpet Store,Lighting Store
7,North York,2,Gym,Restaurant,Sporting Goods Shop,Café,Clothing Store,Coffee Shop,ATM,Bus Line,Mobile Phone Shop,Chinese Restaurant
10,North York,2,Pizza Place,Spa,Metro Station,Park,Japanese Restaurant,Pharmacy,Bakery,Sushi Restaurant,Business Service,Asian Restaurant
13,North York,2,Gym,Restaurant,Sporting Goods Shop,Café,Clothing Store,Coffee Shop,ATM,Bus Line,Mobile Phone Shop,Chinese Restaurant
27,North York,2,Dog Run,Pool,Fast Food Restaurant,Golf Course,Athletics & Sports,Mediterranean Restaurant,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
28,North York,2,Mobile Phone Shop,Coffee Shop,Pharmacy,Bank,Ice Cream Shop,Spa,Pet Store,Park,Middle Eastern Restaurant,Chinese Restaurant
33,North York,2,Clothing Store,Women's Store,Shoe Store,Cosmetics Shop,Coffee Shop,Restaurant,Fast Food Restaurant,Sporting Goods Shop,Lingerie Store,Kids Store
34,North York,2,Spa,Furniture / Home Store,Caribbean Restaurant,Metro Station,Coffee Shop,Optical Shop,Massage Studio,Pharmacy,Pizza Place,Falafel Restaurant
39,North York,2,Gym,Japanese Restaurant,Chinese Restaurant,Café,Spa,Bank,Dog Run,Discount Store,Diner,Dim Sum Restaurant
