__Imports and libraries__

In [103]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
import requests
import json
from pandas.io.json import json_normalize

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

!conda install -c conda-forge geopy --yes
import geopy.geocoders

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



__Access Wikipedia webpage data__

In [40]:
wiki_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_text = requests.get(wiki_url).text
soup = BeautifulSoup(html_text)
table = soup.find('table', attrs={'class':'wikitable sortable'})
trs = table.find_all('tr')
rows = list()
for tr in trs:
    td = tr.find_all('td')
    row = [ele.text.strip() for ele in td]
    if row:
        rows.append(row)

__Create dataframe with labeled columns "PostalCode", "Borough" and "Neighborhood" using pandas__

In [41]:
df = pd.DataFrame(rows, columns=['PostalCode', 'Borough', 'Neighborhood'])


__Clean dataframe from values that are not assigned and duplicate__

In [42]:
df = df[df.Borough != 'Not assigned']
df.reset_index(inplace=True, drop=True)

df['Neighborhood'] = df.apply(lambda row: row['Borough'] if row['Neighborhood'] == 'Not assigned' else row['Neighborhood'], axis=1)

df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].\
    apply(', '.join).to_frame()
df.reset_index(inplace=True)

df.head(12)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


__Fetch longitude & latiude iinformation__

In [43]:
geographical_data = pd.read_csv('https://cocl.us/Geospatial_data')

print("Dataframe shape:", geographical_data.shape)
geographical_data.head(12)

Dataframe shape: (103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


__Integrate longitude & latutude information into dataframe__

In [44]:
df = pd.concat(
    [df.set_index('PostalCode'), geographical_data.set_index('Postal Code')],
    axis=1, join='inner')
df.head(12)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
M1J,Scarborough,Scarborough Village,43.744734,-79.239476
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [45]:
df.reset_index(inplace=True)
df.head(12)

Unnamed: 0,index,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [46]:
df.rename(columns={'index':'PostalCode'}, inplace=True)
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


__Fetch longitude & latitude coordinates__

In [47]:
address = 'Toronto, Ontario, Canada'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
longitude = location.longitude
latitude = location.latitude
print('Longitude & latitude coordinates of Toronto are respectively {} and {}.'.format(location.longitude, location.latitude))

Longitude & latitude coordinates of Toronto are respectively -79.387207 and 43.653963.


__Map Toronto__

In [48]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)
    
map_toronto

__Isolate North York borough including neigborhoods in dataframe__

In [50]:
northyork_data = df[df['Borough'] == 'North York']
northyork_data = northyork_data.reset_index(drop=True)
northyork_data.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
4,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493
5,M2N,North York,Willowdale South,43.77012,-79.408493
6,M2P,North York,York Mills West,43.752758,-79.400049
7,M2R,North York,Willowdale West,43.782736,-79.442259
8,M3A,North York,Parkwoods,43.753259,-79.329656
9,M3B,North York,Don Mills North,43.745906,-79.352188


__Map North York__

In [53]:
address_northyork = 'North York, Toronto, ON, Canada'
latitude_northyork = 43.773077
longitude_northyork = -79.257774
print('Latitude & longitude coordinates of North York are respectively {}, {}.'.format(latitude_northyork, longitude_northyork))

map_northyork = folium.Map(location=[latitude_northyork, longitude_northyork], zoom_start=11.5)

for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 10,
        popup = label,
        color ='blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7).add_to(map_northyork)  
    
map_northyork

Latitude & longitude coordinates of North York are respectively 43.773077, -79.257774.


__Prepare access to Fourquare API__

In [54]:
CLIENT_ID = 'X'
CLIENT_SECRET = 'X'
VERSION = '20200107'
print('Foursquare user info removed for privacy')

Foursquare user info removed for privacy


__Explore 1st neighborhood in North York borough__

In [80]:
northyork_data.loc[0, 'Neighborhood']


'Hillcrest Village'

In [82]:
neighborhood_latitude = northyork_data.loc[0, 'Latitude']
neighborhood_longitude = northyork_data.loc[0, 'Longitude']

neighborhood_name =northyork_data.loc[0, 'Neighborhood']

print('Latitude & longitude coordinates of North Yorks 1st neighborhood (Hillcrest Village) {} are respectively {} and {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude & longitude coordinates of North Yorks 1st neighborhood (Hillcrest Village) Hillcrest Village are respectively 43.8037622 and -79.3634517.


__Eplore Top-100 venues in 1st neighbourhood (Hillcrest Village) within 500 miles radius__

In [83]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [84]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e149fefaba2970028b2dc44'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.808262204500004,
    'lng': -79.3572281853783},
   'sw': {'lat': 43.7992621955, 'lng': -79.3696752146217}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad9dce6f964a520651b21e3',
       'name': "Eagle's Nest Golf Club",
       'location': {'address': '10000 Dufferin Rd',
        'lat': 43.805454826002794,
        'lng': -79.36418592243415,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.805454826002794,
          'lng': -79.36418592243415}],
        'distance': 197,
        'cc': 'CA',
        'city': 'Toronto

In [85]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [86]:
venues = results['response']['groups'][0]['items']
venues

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4ad9dce6f964a520651b21e3',
   'name': "Eagle's Nest Golf Club",
   'location': {'address': '10000 Dufferin Rd',
    'lat': 43.805454826002794,
    'lng': -79.36418592243415,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.805454826002794,
      'lng': -79.36418592243415}],
    'distance': 197,
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['10000 Dufferin Rd', 'Toronto ON', 'Canada']},
   'categories': [{'id': '4bf58dd8d48988d1e6941735',
     'name': 'Golf Course',
     'pluralName': 'Golf Courses',
     'shortName': 'Golf Course',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/golfcourse_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []}},
  'referralId': 'e-0-4ad9dce6f964a5206

__Clean JSON and structure into pandas dataframe__ 

In [89]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues)

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,New York Fries,Fast Food Restaurant,43.803664,-79.363905
2,AY Jackson Pool,Pool,43.804515,-79.366138
3,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
4,Duncan Creek Park,Dog Run,43.805539,-79.360695


In [90]:
print('{} venues were returned by Foursquare API.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare API.


__Repeat for other neighbourhoods in North York__

In [91]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

__Cretae dataframe of venues in North York__

In [92]:
northyork_venues = getNearbyVenues(names=northyork_data['Neighborhood'],
                                   latitudes=northyork_data['Latitude'],
                                   longitudes=northyork_data['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Heights, Lawrence Manor
Glencairn
Downsview, North Park, Upwood Park
Humber Summit
Emery, Humberlea


__Check how many venues are in each neighborhood of North York__

In [93]:
northyork_venues.groupby('Neighborhood').count()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Downsview North, Wilson Heights",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24
"CFB Toronto, Downsview East",3,3,3,3,3,3
Don Mills North,4,4,4,4,4,4
Downsview Central,3,3,3,3,3,3
Downsview Northwest,4,4,4,4,4,4
Downsview West,5,5,5,5,5,5
"Downsview, North Park, Upwood Park",4,4,4,4,4,4
"Emery, Humberlea",1,1,1,1,1,1


In [95]:
print('North York has {} uniques categories of venues including:'.format(len(northyork_venues['Venue Category'].unique())))

list(northyork_venues['Venue Category'].unique())

North York has 102 uniques categories of venues including:


['Golf Course',
 'Fast Food Restaurant',
 'Pool',
 'Mediterranean Restaurant',
 'Dog Run',
 'Toy / Game Store',
 'Movie Theater',
 'Burger Joint',
 'Shopping Mall',
 'Bakery',
 'Candy Store',
 'Tea Room',
 'American Restaurant',
 'Electronics Store',
 'Coffee Shop',
 'Department Store',
 'Salon / Barbershop',
 'Smoothie Shop',
 'Clothing Store',
 'Pharmacy',
 'Theater',
 'Liquor Store',
 'Food Court',
 'Bank',
 'Japanese Restaurant',
 'Juice Bar',
 'Restaurant',
 'Cosmetics Shop',
 'Video Game Store',
 'Sporting Goods Shop',
 'Wings Joint',
 'Asian Restaurant',
 'Burrito Place',
 'Deli / Bodega',
 'Gift Shop',
 'Boutique',
 'Supplement Shop',
 "Women's Store",
 'Convenience Store',
 'Mobile Phone Shop',
 'Chinese Restaurant',
 'Dessert Shop',
 'Spa',
 'Bus Station',
 'Baseball Field',
 'Café',
 'Grocery Store',
 'Ramen Restaurant',
 'Steakhouse',
 'Indonesian Restaurant',
 'Arts & Crafts Store',
 'Plaza',
 'Pet Store',
 'Lounge',
 'Ice Cream Shop',
 'Sandwich Place',
 'Sushi Restaurant

__Integrating unique venue categories into dataframe using onehot__

In [97]:
northyork_onehot = pd.get_dummies(data = northyork_venues, drop_first  = False, 
                              prefix = "", prefix_sep = "", columns = ['Venue Category'])
northyork_onehot.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Candy Store,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Electronics Store,Empanada Restaurant,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Liquor Store,Lounge,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shopping Mall,Smoothie Shop,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,43.803762,-79.363452,New York Fries,43.803664,-79.363905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


__Selecting restaurant-type venues only out of all venue categories__

In [155]:
northyork_restaurantsonlylist = [
 
 'Neighborhood',
 'Neighborhood Latitude',
 'Neighborhood Longitude',

 'Fast Food Restaurant',
 'Mediterranean Restaurant',
 'Burger Joint',
 'American Restaurant',
 'Coffee Shop',
 'Smoothie Shop',
 'Food Court',
 'Japanese Restaurant',
 'Juice Bar',
 'Restaurant',
 'Wings Joint',
 'Asian Restaurant',
 'Burrito Place',
 'Deli / Bodega',
 'Chinese Restaurant',
 'Dessert Shop',
 'Café',
 'Grocery Store',
 'Ramen Restaurant',
 'Steakhouse',
 'Indonesian Restaurant',
 'Ice Cream Shop',
 'Sandwich Place',
 'Sushi Restaurant',
 'Vietnamese Restaurant',
 'Pizza Place',
 'Middle Eastern Restaurant',
 'Bubble Tea Shop',
 'Food & Drink Shop', 
 'Caribbean Restaurant',
 'Italian Restaurant',
 'Dim Sum Restaurant',
 'Diner',
 'Fried Chicken Joint',
 'Frozen Yogurt Shop',
 'Bar',
 'Miscellaneous Shop',
 'Food Truck',
 'Portuguese Restaurant',
 'French Restaurant',
 'Indian Restaurant',
 'Comfort Food Restaurant',
 'Thai Restaurant',
 'Pub',
 'Greek Restaurant',
 'Empanada Restaurant']

__Updating onehot dataframe based on restaurant-type only venue categories__

In [152]:
northyork_onehot = northyork_onehot[northyork_restaurantsonlylist].drop(
    columns = ['Neighborhood Latitude', 'Neighborhood Longitude']).groupby(
    'Neighborhood').sum()

northyork_onehot.head()

KeyError: "['Neighborhood Longitude', 'Neighborhood', 'Neighborhood Latitude'] not in index"

__Organize neighborhoods in North York into 5 clusters using kmeans__

In [153]:
kmeans = KMeans(n_clusters = 5, random_state = 0).fit(northyork_onehot)

__Find center of each North York cluster__

In [154]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = northyork_onehot.columns
means_df.index = ['1','2','3','4','5']
means_df['Total'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total'], ascending=False)

Unnamed: 0,Fast Food Restaurant,Mediterranean Restaurant,Dog Run,Burger Joint,American Restaurant,Coffee Shop,Smoothie Shop,Food Court,Japanese Restaurant,Juice Bar,Restaurant,Wings Joint,Asian Restaurant,Burrito Place,Deli / Bodega,Chinese Restaurant,Dessert Shop,Café,Grocery Store,Ramen Restaurant,Steakhouse,Indonesian Restaurant,Ice Cream Shop,Sandwich Place,Sushi Restaurant,Vietnamese Restaurant,Pizza Place,Middle Eastern Restaurant,Bubble Tea Shop,Food & Drink Shop,Caribbean Restaurant,Italian Restaurant,Dim Sum Restaurant,Diner,Fried Chicken Joint,Frozen Yogurt Shop,Bar,Miscellaneous Shop,Food Truck,Portuguese Restaurant,French Restaurant,Indian Restaurant,Comfort Food Restaurant,Thai Restaurant,Pub,Greek Restaurant,Empanada Restaurant,Total
3,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,1.0,1.0,1.0,2.0,3.0,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0
5,5.0,0.0,0.0,1.0,1.0,5.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0
2,2.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,21.0
4,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.5,0.0,1.0,0.0,1.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.0,0.5,0.5,0.0,0.0,0.0,0.5,0.5,0.5,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.5
1,0.058824,0.058824,0.058824,6.938894e-18,1.387779e-17,0.235294,6.938894e-18,1.387779e-17,0.176471,5.5511150000000004e-17,8.326673e-17,6.938894e-18,5.5511150000000004e-17,6.938894e-18,1.387779e-17,0.058824,6.938894e-18,0.117647,0.235294,5.5511150000000004e-17,6.938894e-18,6.938894e-18,6.938894e-18,1.110223e-16,1.110223e-16,0.058824,0.235294,1.387779e-17,6.938894e-18,0.058824,0.117647,5.5511150000000004e-17,6.938894e-18,6.938894e-18,6.938894e-18,6.938894e-18,0.058824,0.117647,0.058824,0.058824,0.058824,6.938894e-18,6.938894e-18,6.938894e-18,0.058824,6.938894e-18,0.058824,1.941176


In [158]:
print('Clusters rank as follows (based on "Total") based on optimum, starting with the most optimal: 3, 5, 2, 4, 1')

Clusters rank as follows (based on "Total") based on optimum, starting with the most optimal: 3, 5, 2, 4, 1


__Organize North York neighborhoods by cluster to which they belong__

In [126]:
northyork_neighborhood_clusters = pd.DataFrame([northyork_onehot.index, 1 + kmeans.labels_]).T
northyork_neighborhood_clusters.columns = ['Neighborhood', 'Cluster']
northyork_neighborhood_clusters

Unnamed: 0,Neighborhood,Cluster
0,"Bathurst Manor, Downsview North, Wilson Heights",4
1,Bayview Village,1
2,"Bedford Park, Lawrence Manor East",2
3,"CFB Toronto, Downsview East",1
4,Don Mills North,1
5,Downsview Central,1
6,Downsview Northwest,1
7,Downsview West,1
8,"Downsview, North Park, Upwood Park",1
9,"Emery, Humberlea",1


__Isolate most optimal North York neighborhood based on clusters (knowing that C3 is the most optimal cluster)__

In [141]:
northyork_neighborhood_clusters[northyork_neighborhood_clusters['Cluster'] == 3]


Unnamed: 0,Neighborhood,Cluster
19,Willowdale South,3


__Concluding remarks__

In [146]:
print('Given that C3 was determined to be the most optimal cluster, the most optimal location for a food & beverage warehouse within North York borough is the Willowdale South neighbourhood.')

Given that C3 was determined to be the most optimal cluster, the most optimal location for a food & beverage warehouse within North York borough is the Willowdale South neighbourhood.
