# Importing all relevant libraries

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json 
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 
from bs4 import BeautifulSoup
import urllib3
print('Libraries imported.')

Libraries imported.


# Using BeautifulSoup to scrape the webpage for the relevant data, in this case the table of postcodes


In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source)



In [3]:
#Extracting Column Names
table_head = soup.find('tr')
header=table_head.text.split('\n')
header=header[1:4]


# Data extraction and processing

In [4]:
#Extracting Data
data=soup.find('tbody').text.strip().split('\n')

dataf = [data[x:x+5] for x in range(0, len(data),5)]
df=pd.DataFrame(dataf)
df.head()
df=df.drop(df.index[0])
df=df.drop([3,4], axis=1)
df.columns=header
df.head()
df = df[df['Borough']!='Not assigned']
dfn=df

In [5]:
#Data Processing, removing rows where boroughs not assigned 
df = df[df['Borough']!='Not assigned']
df.loc[df['Neighbourhood']=='Not assigned', 'Neighbourhood'] = df[df['Neighbourhood']=='Not assigned']['Borough']

for postcode,g in df.groupby('Postcode'):
    df.loc[df['Postcode']==postcode, 'Neighbourhoods'] = ','.join(g['Neighbourhood'].tolist())
   

 
df=df[['Postcode','Borough','Neighbourhoods']]



df.drop_duplicates('Postcode',keep='first',inplace=True)
df.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhoods
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [6]:
df.shape

(103, 3)

In [7]:
#Sorting loaded data by postal code

geodf=pd.read_csv('Geospatial_Coordinates.csv')
geodf.shape
geodf=geodf.sort_values(by='Postal Code')


#sorting scraped data by postal code and combining
tst=df.sort_values(by='Postcode')
tst.loc(axis=1)['Latitude']=geodf[['Latitude']].values
tst.loc(axis=1)['Longitude']=geodf[['Longitude']].values

In [8]:
#to be used later in clustering
dft=tst

In [9]:
#final dataframe
tst=tst.reset_index(drop=True)
tst

Unnamed: 0,Postcode,Borough,Neighbourhoods,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


# Visualizing the postal code layout in Toronto

In [10]:
import folium
latitude=43.6532
longitude=-79.3832
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# plotting coordinates of postal code, with Neighbourhood, Borough as labels in popup
for lat, lng, borough, neighborhood in zip(tst['Latitude'], dft['Longitude'], dft['Borough'], dft['Neighbourhoods']):
    label = 'Neighbourhoods contained:{}Borough: {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Toronto)  
    
map_Toronto



# Using Foursquare API to get venues around the Post Code

In [11]:
CLIENT_ID = 'QIJAH1HDYSODDWAH4UOHSNJGGNXD1D5WOLU3SFOUWCRRI53T' 
CLIENT_SECRET = '441FOL4XTMQSK4F3YPBRQVR2YN4ESJQL40ALSBCBQMM0U20T' 
VERSION = '20180605'
LIMIT=100

def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)           
        results = requests.get(url).json()["response"]['groups'][0]['items']        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']    
    return(nearby_venues)     

In [12]:
#Applying getNearbyVenues to postcodes
toronto_venues = getNearbyVenues(names=dft['Postcode'],latitudes=dft['Latitude'],longitudes=dft['Longitude'])

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X
M2H
M2J
M2K
M2L
M2M
M2N
M2P
M2R
M3A
M3B
M3C
M3H
M3J
M3K
M3L
M3M
M3N
M4A
M4B
M4C
M4E
M4G
M4H
M4J
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5M
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6A
M6B
M6C
M6E
M6G
M6H
M6J
M6K
M6L
M6M
M6N
M6P
M6R
M6S
M7A
M7R
M7Y
M8V
M8W
M8X
M8Y
M8Z
M9A
M9B
M9C
M9L
M9M
M9N
M9P
M9R
M9V
M9W


In [13]:
#Kowalski.. Analysis
toronto_venues.groupby('Postcode').count()

Unnamed: 0_level_0,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
M1B,15,15,15,15,15,15
M1C,5,5,5,5,5,5
M1E,23,23,23,23,23,23
M1G,9,9,9,9,9,9
M1H,28,28,28,28,28,28
M1J,12,12,12,12,12,12
M1K,27,27,27,27,27,27
M1L,28,28,28,28,28,28
M1M,12,12,12,12,12,12
M1N,13,13,13,13,13,13


In [14]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 327 uniques categories.


In [15]:
#One Hot Encoding for venue categories to be used later
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Postcode'] = toronto_venues['Postcode'] 

In [16]:
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Postcode,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Cajun / Creole Restaurant,Camera Store,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Castle,Cemetery,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Churrascaria,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Quad,College Rec Center,College Stadium,College Theater,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Dance Studio,Deli / Bodega,Dentist's Office,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fireworks Store,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hong Kong Restaurant,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Housing Development,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Intersection,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Laundry Service,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Malay Restaurant,Market,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pide Place,Pie Shop,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Poutine Place,Print Shop,Pub,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stationery Store,Steakhouse,Storage Facility,Street Art,Supermarket,Supplement Shop,Sushi Restaurant,Syrian Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tech Startup,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Transportation Service,Tree,Tunnel,Turkish Restaurant,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,M1B,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
toronto_grouped = toronto_onehot.groupby('Postcode').mean().reset_index()


In [18]:
#Top five venues per Postcode and frequency
num_top_venues = 5

for hood in toronto_grouped['Postcode']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Postcode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                           venue  freq
0           Fast Food Restaurant  0.20
1                            Gym  0.07
2                    Bus Station  0.07
3        Fruit & Vegetable Store  0.07
4  Paper / Office Supplies Store  0.07


----M1C----
                venue  freq
0      Breakfast Spot   0.2
1                Park   0.2
2        Burger Joint   0.2
3  Italian Restaurant   0.2
4          Playground   0.2


----M1E----
                  venue  freq
0           Pizza Place  0.17
1  Fast Food Restaurant  0.09
2           Coffee Shop  0.09
3        Sandwich Place  0.04
4      Greek Restaurant  0.04


----M1G----
                  venue  freq
0                  Park  0.22
1           Coffee Shop  0.22
2    Chinese Restaurant  0.11
3  Fast Food Restaurant  0.11
4     Indian Restaurant  0.11


----M1H----
               venue  freq
0        Coffee Shop  0.11
1             Bakery  0.11
2  Indian Restaurant  0.07
3           Pharmacy  0.07
4             Lounge  0.04


----M1

In [19]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [20]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postcode'] = toronto_grouped['Postcode']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)



In [21]:
neighborhoods_venues_sorted

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Coffee Shop,Fruit & Vegetable Store,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Sandwich Place,Trail,Auto Workshop,Gym
1,M1C,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
2,M1E,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Smoothie Shop,Fried Chicken Joint,Greek Restaurant,Grocery Store,Shopping Mall,Supermarket
3,M1G,Park,Coffee Shop,Indian Restaurant,Electronics Store,Pharmacy,Fast Food Restaurant,Chinese Restaurant,Filipino Restaurant,Field,Fireworks Store
4,M1H,Bakery,Coffee Shop,Pharmacy,Indian Restaurant,Home Service,Hakka Restaurant,Sporting Goods Shop,Fast Food Restaurant,Music Store,Fried Chicken Joint
5,M1J,Fast Food Restaurant,Sandwich Place,Japanese Restaurant,Convenience Store,Train Station,Grocery Store,Restaurant,Bowling Alley,Coffee Shop,Pizza Place
6,M1K,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Bank,Pharmacy,Asian Restaurant,Department Store
7,M1L,Coffee Shop,Intersection,Convenience Store,Fast Food Restaurant,Bakery,Bank,Trail,Soccer Field,Mexican Restaurant,Metro Station
8,M1M,Pizza Place,Fast Food Restaurant,Beach,Sports Bar,Burger Joint,Park,Hardware Store,Cajun / Creole Restaurant,Zoo,Falafel Restaurant
9,M1N,Park,College Stadium,Skating Rink,Diner,Gym,Auto Garage,Gym Pool,General Entertainment,Café,Asian Restaurant


In [22]:
kclusters = 12
toronto_grouped_clustering = toronto_grouped.drop('Postcode', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10] 

array([ 8,  3,  7, 11,  1,  8,  1,  1,  7,  9], dtype=int32)

In [23]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)



In [24]:
toronto_merged = dft
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Postcode'), on='Postcode')
toronto_merged.head() 

Unnamed: 0,Postcode,Borough,Neighbourhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,8.0,Fast Food Restaurant,Coffee Shop,Fruit & Vegetable Store,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Sandwich Place,Trail,Auto Workshop,Gym
28,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,3.0,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
43,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,7.0,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Smoothie Shop,Fried Chicken Joint,Greek Restaurant,Grocery Store,Shopping Mall,Supermarket
54,M1G,Scarborough,Woburn,43.770992,-79.216917,11.0,Park,Coffee Shop,Indian Restaurant,Electronics Store,Pharmacy,Fast Food Restaurant,Chinese Restaurant,Filipino Restaurant,Field,Fireworks Store
63,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Bakery,Coffee Shop,Pharmacy,Indian Restaurant,Home Service,Hakka Restaurant,Sporting Goods Shop,Fast Food Restaurant,Music Store,Fried Chicken Joint


In [25]:
toronto_merged[['Cluster Labels']]=toronto_merged[['Cluster Labels']].fillna(0,downcast='infer')
#np.nan_to_num(toronto_merged['Cluster Labels']).astype(int)
toronto_merged

Unnamed: 0,Postcode,Borough,Neighbourhoods,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,8,Fast Food Restaurant,Coffee Shop,Fruit & Vegetable Store,Bakery,Paper / Office Supplies Store,Caribbean Restaurant,Sandwich Place,Trail,Auto Workshop,Gym
28,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,3,Breakfast Spot,Park,Burger Joint,Playground,Italian Restaurant,Farmers Market,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space
43,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,7,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Smoothie Shop,Fried Chicken Joint,Greek Restaurant,Grocery Store,Shopping Mall,Supermarket
54,M1G,Scarborough,Woburn,43.770992,-79.216917,11,Park,Coffee Shop,Indian Restaurant,Electronics Store,Pharmacy,Fast Food Restaurant,Chinese Restaurant,Filipino Restaurant,Field,Fireworks Store
63,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Bakery,Coffee Shop,Pharmacy,Indian Restaurant,Home Service,Hakka Restaurant,Sporting Goods Shop,Fast Food Restaurant,Music Store,Fried Chicken Joint
77,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,8,Fast Food Restaurant,Sandwich Place,Japanese Restaurant,Convenience Store,Train Station,Grocery Store,Restaurant,Bowling Alley,Coffee Shop,Pizza Place
92,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,1,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Bank,Pharmacy,Asian Restaurant,Department Store
108,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577,1,Coffee Shop,Intersection,Convenience Store,Fast Food Restaurant,Bakery,Bank,Trail,Soccer Field,Mexican Restaurant,Metro Station
124,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476,7,Pizza Place,Fast Food Restaurant,Beach,Sports Bar,Burger Joint,Park,Hardware Store,Cajun / Creole Restaurant,Zoo,Falafel Restaurant
141,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848,9,Park,College Stadium,Skating Rink,Diner,Gym,Auto Garage,Gym Pool,General Entertainment,Café,Asian Restaurant


In [26]:
#Creating map with clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)


x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postcode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# We see that cluster 0 has a trend in the top 3 most common values of Pharmacy, Park, or Convenience Store 

In [27]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
247,Scarborough,0,,,,,,,,,,
167,North York,0,Pharmacy,Convenience Store,Pizza Place,Bus Line,Eastern European Restaurant,Bakery,Grocery Store,Discount Store,Coffee Shop,Park
3,North York,0,Park,Convenience Store,Shopping Mall,Pharmacy,Bus Stop,Supermarket,Food & Drink Shop,Cosmetics Shop,Fast Food Restaurant,Café
244,Etobicoke,0,Discount Store,Pharmacy,Convenience Store,Pizza Place,Park,Intersection,Liquor Store,Donut Shop,Skating Rink,Garden Center
11,Etobicoke,0,Pharmacy,Café,Bank,Bakery,Golf Course,Park,Skating Rink,Grocery Store,Convenience Store,Shopping Mall
123,North York,0,Bank,Pharmacy,Bakery,Shopping Mall,Park,Electronics Store,Pizza Place,Empanada Restaurant,Italian Restaurant,Drugstore
139,North York,0,Bakery,Convenience Store,Storage Facility,Park,Discount Store,Intersection,Golf Course,Falafel Restaurant,Dumpling Restaurant,Eastern European Restaurant


# Cluster 1 has a clear trend of coffee shops being consistently in the top 3 

In [28]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Scarborough,1,Bakery,Coffee Shop,Pharmacy,Indian Restaurant,Home Service,Hakka Restaurant,Sporting Goods Shop,Fast Food Restaurant,Music Store,Fried Chicken Joint
92,Scarborough,1,Discount Store,Coffee Shop,Chinese Restaurant,Grocery Store,Fast Food Restaurant,Burger Joint,Bank,Pharmacy,Asian Restaurant,Department Store
108,Scarborough,1,Coffee Shop,Intersection,Convenience Store,Fast Food Restaurant,Bakery,Bank,Trail,Soccer Field,Mexican Restaurant,Metro Station
152,Scarborough,1,Electronics Store,Fast Food Restaurant,Coffee Shop,Burger Joint,Furniture / Home Store,Light Rail Station,Indian Restaurant,Asian Restaurant,Bakery,Automotive Shop
165,Scarborough,1,Pizza Place,Middle Eastern Restaurant,Grocery Store,Burger Joint,Soccer Field,Restaurant,Gas Station,Supermarket,Korean Restaurant,Bar
181,Scarborough,1,Chinese Restaurant,Shopping Mall,Caribbean Restaurant,Bakery,Supermarket,Sandwich Place,Pizza Place,Hong Kong Restaurant,Lounge,Sushi Restaurant
192,Scarborough,1,Pizza Place,Shopping Mall,Fast Food Restaurant,Coffee Shop,Sandwich Place,Italian Restaurant,Bakery,Thai Restaurant,Cantonese Restaurant,Bank
206,Scarborough,1,Chinese Restaurant,Bakery,Pizza Place,Park,Dessert Shop,Bubble Tea Shop,Event Space,Malay Restaurant,Caribbean Restaurant,Noodle House
237,Scarborough,1,Chinese Restaurant,Fast Food Restaurant,Bakery,Coffee Shop,Bank,Gym Pool,Sandwich Place,Tennis Court,Thrift / Vintage Store,Grocery Store
64,North York,1,Pharmacy,Park,Coffee Shop,Pool,Shopping Mall,Chinese Restaurant,Korean Restaurant,Diner,Bank,Sandwich Place


# Cluster 9 has a Cafe as a strong classifier

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 9, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
141,Scarborough,9,Park,College Stadium,Skating Rink,Diner,Gym,Auto Garage,Gym Pool,General Entertainment,Café,Asian Restaurant
127,North York,9,Korean Restaurant,Café,Middle Eastern Restaurant,Coffee Shop,Dessert Shop,Park,Shopping Mall,Fried Chicken Joint,Sandwich Place,Supermarket
143,North York,9,Coffee Shop,Bubble Tea Shop,Ramen Restaurant,Pizza Place,Japanese Restaurant,Korean Restaurant,Sandwich Place,Fast Food Restaurant,Café,Sushi Restaurant
15,North York,9,Japanese Restaurant,Burger Joint,Pizza Place,Coffee Shop,Bar,Greek Restaurant,Electronics Store,Liquor Store,Baseball Field,Thai Restaurant
32,North York,9,Restaurant,Gym,Coffee Shop,Beer Store,American Restaurant,Asian Restaurant,Supermarket,Japanese Restaurant,Bike Shop,Bank
48,East Toronto,9,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Beach,Japanese Restaurant,Bar,Sandwich Place,Café,Bakery
83,East York,9,Coffee Shop,Café,Greek Restaurant,Park,Ethiopian Restaurant,Bar,Pharmacy,Pizza Place,Convenience Store,Fast Food Restaurant
98,East Toronto,9,Greek Restaurant,Coffee Shop,Pub,Café,Italian Restaurant,Pizza Place,Ice Cream Shop,Fast Food Restaurant,Ramen Restaurant,Furniture / Home Store
114,East Toronto,9,Indian Restaurant,Coffee Shop,Café,Beach,Burrito Place,Pizza Place,Burger Joint,Bakery,Restaurant,Brewery
130,East Toronto,9,Coffee Shop,Bar,Italian Restaurant,Diner,Brewery,Bakery,American Restaurant,Café,Vietnamese Restaurant,French Restaurant
