# TABLE OF CONTENT : 
 1. [Scrapping Wikipedia Web page](#1)<br>
 2. [Adding coordinates of neighborhoods](#2)<br>
 3. [Folium maps](#3)<br>
 4. [Cluster neighborhoods](#4)<br>

In [None]:
#Installing beautifulsoup package
#!pip install beautifulsoup4

In [1]:
#import of libraries necessary to scrap the web page 
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
import folium
import matplotlib as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# 1. SCRAPPING WIKIPEDIA WEB PAGE <a id="1"></a>

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url

'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
#parsing the web page wikipedia
page_response=requests.get(url,timeout=5)
page_content = BeautifulSoup(page_response.content, 'lxml')

In [4]:
#creation of the data frame containing the table from wikipedia page : 
# varibale with all the tables found in the page 
tables=page_content.find_all('table')
#focus on the table we are interested in : 
table=tables[0].tbody
#creation of our data frame :
df_table=pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])
#iteration in the table in order to extract content and add it to our dataframe 
for row in table.find_all('tr') : 
    cols=row.find_all('td')
    try : 
        df_table=df_table.append({'PostalCode':cols[0].text,
                                          'Borough': cols[1].text, 
                                          'Neighborhood': cols[2].text, 
                                         }, ignore_index=True)
    except: 
        pass

df_table

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,Regent Park / Harbourfront\n
5,M6A\n,North York\n,Lawrence Manor / Lawrence Heights\n
6,M7A\n,Downtown Toronto\n,Queen's Park / Ontario Provincial Government\n
7,M8A\n,Not assigned\n,\n
8,M9A\n,Etobicoke\n,Islington Avenue\n
9,M1B\n,Scarborough\n,Malvern / Rouge\n


In [5]:
#data frame cleaning by removin the '\n' using  REGEX: 
import re
for index,row in df_table.iterrows():
    for col in df_table.columns:
        row[col]= re.sub(r'\n','',row[col])
df_table

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge


In [7]:
#drop rows where no borough were attributed and copy borough neighborhood where no neighborood
for index, row in df_table.iterrows():
    row['Neighborhood']=row['Neighborhood'].replace('/',',')
    if row['Borough']=='Not assigned':
        df_table.drop([index],inplace=True)
    if row['Neighborhood']== '':
        row['Neighborhood']=row['Borough']
df_table.reset_index(inplace=True,drop=True)
df_table.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
# checking if any null values are stil in the data frame : 
df_clean=df_table.isnull()
for column in df_clean.columns: 
    print(column)
    print(df_clean[column].value_counts())

PostalCode
False    103
Name: PostalCode, dtype: int64
Borough
False    103
Name: Borough, dtype: int64
Neighborhood
False    103
Name: Neighborhood, dtype: int64


In [9]:
df_table.shape

(103, 3)

In [10]:
#export data to csv
import csv
#df_table.to_csv('postal_code.csv')

# 2. Adding coordinates of neighborhoods <a id="2"></a>


In [None]:
# instal of geocoder 
#!pip install geocoder

In [11]:
import geocoder # import geocoder

In [12]:
# initialize your variable to None
lat_lng_coords = None
for index,code in enumerate(df_table['PostalCode']): 
    postal_code=code
    print(index, 'st row to complete')
# loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.geocodefarm('{}, Toronto, Ontario,Canada'.format(postal_code))
        lat_lng_coords = g.latlng
        print(lat_lng_coords)
    df_table.at[index,'latitude'] = lat_lng_coords[0]
    df_table.at[index,'longitude'] = lat_lng_coords[1]
    print(index, 'st row completed')
    lat_lng_coords = None

0 st row to complete
[43.7518806457716, -79.3303604125129]
0 st row completed
1 st row to complete
[43.7304191589716, -79.3128204341299]
1 st row completed
2 st row to complete
[43.6551399230715, -79.362648010213]
2 st row completed
3 st row to complete
[43.7232093811716, -79.4514083861301]
3 st row completed
4 st row to complete
[43.6644897460715, -79.393020629813]
4 st row completed
5 st row to complete
[43.6627693176715, -79.528312683113]
5 st row completed
6 st row to complete
[43.8115310668717, -79.1955184936129]
6 st row completed
7 st row to complete
[43.7492904663716, -79.361686706513]
7 st row completed
8 st row to complete
[43.7079391479716, -79.3115997314129]
8 st row completed
9 st row to complete
[43.6573600769715, -79.37818145713]
9 st row completed
10 st row to complete
[43.7079887390716, -79.448379516613]
10 st row completed
11 st row to complete
[43.6527900697152, -79.554061889613]
11 st row completed
12 st row to complete
[43.7856407165717, -79.1587066650129]
12 st ro

In [13]:
# extraction of the dataframe into a csv file
#df_table.to_csv('postal_code.csv')

# 3. Folium Maps <a id="3"></a>

In [18]:
#reading the previously completed csv with cordinates
df_table=pd.read_csv('postal_code.csv',index_col=0)
df_table

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.751881,-79.33036
1,M4A,North York,Victoria Village,43.730419,-79.31282
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.723209,-79.451408
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021
5,M9A,Etobicoke,Islington Avenue,43.662769,-79.528313
6,M1B,Scarborough,"Malvern , Rouge",43.811531,-79.195518
7,M3B,North York,Don Mills,43.74929,-79.361687
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.707939,-79.3116
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.378181


In [16]:
#checking data with folium maps of neigborhoods
#1st coordinates of Toronto
g2 = geocoder.geocodefarm('Toronto, Ontario,Canada')
latlong= g2.latlng
latitude= latlong[0]
longitude = latlong[1]
print(f'latitude {latitude} & longitude {longitude}')

latitude 43.6486892707151 & longitude -79.385437011713


In [19]:
#folium map : 
toronto_map= folium.Map(location=[latitude, longitude],tiles='OpenStreetMap', zoom_start=10)
for borough,neighborhood, lat,long in zip(df_table['Borough'],df_table['Neighborhood'],df_table['latitude'],df_table['longitude']):
    label=(f'{borough}, {neighborhood}')
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5, 
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1,
                        parse_html=False).add_to(toronto_map)
    
toronto_map

# 4. Cluster neighborhoods<a id="4"></a>

In [20]:
#import librairies : 
# library to handle JSON files
import json 
 # library to handle requests
import requests 
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [23]:
#GET INFORMATION ON FOURSQUARE API
#FOURSQUARE CREDENTIALS
CLIENT_ID = 'CUF5UKNECLDYIDGLLSJJF32FI1RYQJBR1WRLHIFQEVRSJJO4' 
CLIENT_SECRET = 'SW40I4VDCJKKBFFXLQOFRMBRW0AQ1DWO3ZO23QF3JUXLKWFJ' 
VERSION = '20180605' 

print('Credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

#foursquare url : 
url_base='https://api.foursquare.com/v2/'

Credentials:
CLIENT_ID: CUF5UKNECLDYIDGLLSJJF32FI1RYQJBR1WRLHIFQEVRSJJO4
CLIENT_SECRET:SW40I4VDCJKKBFFXLQOFRMBRW0AQ1DWO3ZO23QF3JUXLKWFJ


In [24]:
Toronto_data=pd.DataFrame(columns=df_table.columns[1:])
for index,row in enumerate(df_table['Borough']):
    if 'Toronto' in row: 
        Toronto_data=Toronto_data.append(df_table.loc[index,'Borough':'longitude'])
Toronto_data.reset_index(inplace=True,drop=True)

In [25]:
Toronto_data

Unnamed: 0,Borough,Neighborhood,latitude,longitude
0,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648
1,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021
2,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.378181
3,Downtown Toronto,St. James Town,43.651428,-79.375572
4,East Toronto,The Beaches,43.677029,-79.295418
5,Downtown Toronto,Berczy Park,43.645309,-79.37368
6,Downtown Toronto,Central Bay Street,43.65609,-79.384933
7,Downtown Toronto,Christie,43.668781,-79.420708
8,Downtown Toronto,"Richmond , Adelaide , King",43.6497,-79.382584
9,West Toronto,"Dufferin , Dovercourt Village",43.665089,-79.438713


In [26]:
# visualisation of Toronto Boroughs : 

borough= folium.Map(location=[latitude,longitude],zoom_start=11)
for lat,long,bo,nei in zip(Toronto_data['latitude'],Toronto_data['longitude'],Toronto_data['Borough'],Toronto_data['Neighborhood']):
    label=(bo +' -\n'+ nei)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5,
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1,
                        parse_html=False
                       ).add_to(borough)
borough

In [75]:
# connect to foursquare and get the venues for ONLY TORONTO BOROUGH
RADIUS=600
LIMIT= 100
temp=pd.DataFrame()
only_toronto_venues = pd.DataFrame(columns=['Borough', 'Neighborhood', 'latitude', 'longitude','venu_name','venue_category'])
for  bo, nei,lat,long in zip(Toronto_data['Borough'],Toronto_data['Neighborhood'],Toronto_data['latitude'],Toronto_data['longitude']):
    url=url_base+f'venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={lat},{long}&radius={RADIUS}&limit={LIMIT}'
    results=requests.get(url).json()    
    for i in range(len(results['response']['groups'][0]['items'])):
        temp.at[i,'Borough']=bo
        temp.at[i,'Neighborhood']=nei
        temp.at[i,'latitude']=lat
        temp.at[i,'longitude']=long
        temp.at[i,'venu_name']=results['response']['groups'][0]['items'][i]['venue']['name']
        temp.at[i,'venue_category']=results['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
    only_toronto_venues=only_toronto_venues.append(temp,ignore_index=True)
only_toronto_venues.to_csv('only_toronto_venues.csv')
only_toronto_venues

Unnamed: 0,Borough,Neighborhood,latitude,longitude,venu_name,venue_category
0,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Roselle Desserts,Bakery
1,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Tandem Coffee,Coffee Shop
2,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Figs Breakfast & Lunch,Breakfast Spot
3,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Morning Glory Cafe,Breakfast Spot
4,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Cocina Economica,Mexican Restaurant
5,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,The Yoga Lounge,Yoga Studio
6,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Body Blitz Spa East,Spa
7,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Rooster Coffee,Coffee Shop
8,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Starbucks,Coffee Shop
9,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Berkeley Church,Event Space


In [None]:
#read data with venues from previously created csv
only_toronto_venues= pd.read_csv('only_toronto_venues.csv')

In [76]:
# group by neighborhoods the most venues category 
dummies=pd.get_dummies(only_toronto_venues[['venue_category']], prefix="", prefix_sep="")
dummies['Neighborhood']=only_toronto_venues['Neighborhood']
#getting last column at the bigining (neigborhoods):
ordered_columns=[dummies.columns[-1]]+ list(dummies.columns[:-1])
dummies=dummies[ordered_columns]
dummies
#calculate the average venue categories per neighborhoods
only_toronto_grouped=dummies.groupby('Neighborhood').mean().reset_index()
only_toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Beach Bar,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Stop,Business Service,Butcher,Café,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chiropractor,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Theater,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Latin American Restaurant,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Movie Theater,Museum,Music School,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Pakistani Restaurant,Park,Pastry Shop,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool Hall,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Residential Building (Apartment / Condo),Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Berczy Park,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.03,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.02,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.03,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.016129,0.0,0.0,0.0,0.0,0.048387,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.056452,0.0,0.0,0.0,0.016129,0.008065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.024194,0.080645,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.016129,0.0,0.0,0.0,0.0,0.008065,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.008065,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.008065,0.016129,0.0,0.016129,0.0,0.0,0.024194,0.008065,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.008065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.048387,0.0,0.0,0.0,0.0,0.024194,0.0,0.0,0.008065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.024194,0.0,0.0,0.0,0.0,0.0,0.0,0.008065,0.0,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.0,0.024194,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.01,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.01,0.01,0.0,0.01,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0
3,"CN Tower , King and Spadina , Railway Lands , ...",0.009434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.018868,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.037736,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.113208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.009434,0.0,0.0,0.0,0.0,0.0,0.056604,0.037736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009434,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04717,0.0,0.0,0.018868,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.009434,0.0,0.0,0.028302,0.0,0.0,0.0,0.0,0.056604,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037736,0.018868,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018868,0.018868,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.005495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.021978,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.0,0.021978,0.0,0.016484,0.0,0.021978,0.010989,0.0,0.0,0.0,0.0,0.043956,0.0,0.0,0.0,0.0,0.0,0.0,0.06044,0.010989,0.153846,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.005495,0.0,0.0,0.021978,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.032967,0.010989,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.021978,0.0,0.0,0.010989,0.0,0.0,0.0,0.005495,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027473,0.0,0.0,0.0,0.010989,0.0,0.032967,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.016484,0.0,0.0,0.005495,0.0,0.0,0.032967,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016484,0.0,0.0,0.0,0.0,0.010989,0.016484,0.0,0.0,0.016484,0.0,0.0,0.021978,0.0,0.032967,0.0,0.0,0.0,0.010989,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005495,0.0,0.0,0.0,0.010989,0.010989,0.005495,0.0,0.0,0.021978,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010989,0.0,0.010989,0.0,0.0,0.0,0.010989


In [None]:
only_toronto_grouped.shape

In [77]:
# keep the 10 most present venues per neighborhoods
#sort the venues :
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues] 


In [78]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = only_toronto_grouped['Neighborhood']

for ind in np.arange(only_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(only_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Café,Hotel,Seafood Restaurant,Italian Restaurant,Restaurant,Bakery,Japanese Restaurant,Cocktail Bar,Beer Bar
1,"Brockton , Parkdale Village , Exhibition Place",Coffee Shop,Café,Bar,Restaurant,Art Gallery,Event Space,Bakery,Gift Shop,Vegetarian / Vegan Restaurant,Italian Restaurant
2,Business reply mail Processing CentrE,Coffee Shop,Café,Hotel,American Restaurant,Restaurant,Tea Room,Sushi Restaurant,Thai Restaurant,Breakfast Spot,Seafood Restaurant
3,"CN Tower , King and Spadina , Railway Lands , ...",Coffee Shop,Sandwich Place,Italian Restaurant,Park,Café,Spa,Grocery Store,Japanese Restaurant,Bank,Brewery
4,Central Bay Street,Coffee Shop,Clothing Store,Café,Movie Theater,Sandwich Place,Italian Restaurant,Diner,Hotel,Theater,Burger Joint


In [109]:
# apply clustering algorithm on neighborhoods based on most common categories 
k=5

only_toronto_grouped_clustering = only_toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
#we 
kmeans = KMeans(init = "k-means++",n_clusters=k, n_init=12, random_state=1).fit(only_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [110]:
# add clustering labels
try : 
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
except: 
    neighborhoods_venues_sorted.drop(columns='Cluster Labels',inplace=True)
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = Toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,0,Coffee Shop,Restaurant,Park,Pub,Bakery,Grocery Store,Theater,Café,Playground,Dance Studio
1,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021,0,Coffee Shop,Clothing Store,Boutique,Café,Park,Sushi Restaurant,Steakhouse,Museum,College Arts Building,Beer Bar
2,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.378181,0,Coffee Shop,Clothing Store,Falafel Restaurant,Japanese Restaurant,Bubble Tea Shop,Restaurant,Café,Hotel,Fried Chicken Joint,Italian Restaurant
3,Downtown Toronto,St. James Town,43.651428,-79.375572,0,Café,Coffee Shop,Seafood Restaurant,Italian Restaurant,Theater,American Restaurant,Gastropub,Hotel,Bakery,Cosmetics Shop
4,East Toronto,The Beaches,43.677029,-79.295418,0,Bakery,Health Food Store,Church,Park,Pub,Trail,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store


In [111]:
#we select the rows corresponding to cluster 0
#we select only the columns correspondig to neighborhood column(1) and most common venues (from column 5 to the last one)
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Regent Park , Harbourfront",Coffee Shop,Restaurant,Park,Pub,Bakery,Grocery Store,Theater,Café,Playground,Dance Studio
1,"Queen's Park , Ontario Provincial Government",Coffee Shop,Clothing Store,Boutique,Café,Park,Sushi Restaurant,Steakhouse,Museum,College Arts Building,Beer Bar
2,"Garden District, Ryerson",Coffee Shop,Clothing Store,Falafel Restaurant,Japanese Restaurant,Bubble Tea Shop,Restaurant,Café,Hotel,Fried Chicken Joint,Italian Restaurant
3,St. James Town,Café,Coffee Shop,Seafood Restaurant,Italian Restaurant,Theater,American Restaurant,Gastropub,Hotel,Bakery,Cosmetics Shop
4,The Beaches,Bakery,Health Food Store,Church,Park,Pub,Trail,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
5,Berczy Park,Coffee Shop,Café,Hotel,Seafood Restaurant,Italian Restaurant,Restaurant,Bakery,Japanese Restaurant,Cocktail Bar,Beer Bar
6,Central Bay Street,Coffee Shop,Clothing Store,Café,Movie Theater,Sandwich Place,Italian Restaurant,Diner,Hotel,Theater,Burger Joint
7,Christie,Café,Grocery Store,Playground,Italian Restaurant,Candy Store,Nightclub,Restaurant,Diner,Baby Store,Coffee Shop
8,"Richmond , Adelaide , King",Coffee Shop,Café,Hotel,Restaurant,Cosmetics Shop,Bar,Thai Restaurant,Japanese Restaurant,Shopping Mall,Gift Shop
9,"Dufferin , Dovercourt Village",Park,Bakery,Café,Grocery Store,Brewery,Diner,Smoke Shop,Bus Line,Music Venue,Coffee Shop


In [112]:
#we select the rows corresponding to cluster 1
#we select only the columns correspondig to neighborhood column(1) and most common venues (from column 5 to the last one)
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Lawrence Park,Park,Business Service,Women's Store,Dumpling Restaurant,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [113]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Forest Hill North & West,Playground,Park,Doner Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
33,Rosedale,Park,Playground,Candy Store,Gym / Fitness Center,Grocery Store,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant


In [114]:
toronto_merged.loc[toronto_merged['Cluster Labels'] ==3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Roselawn,Pet Store,Garden,Women's Store,Donut Shop,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [115]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,"Moore Park , Summerhill East",Playground,Convenience Store,Gym,Doner Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [116]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = k
colors_array = cm.rainbow(np.linspace(0, 1, ys))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['latitude'], toronto_merged['longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Code to loop dataframe et merged neighborhoods with similar postal codes

In [None]:
df_test=df_table[['PostalCode','Borough','Neighborhood']].copy()
#1st we create 2 new rows with identical postcode values in order to test our code  : 
df_test=df_test.append(pd.DataFrame({'PostalCode':['M3A','M3A'],'Borough':['North York','North York'],'Neighborhood':['Pierre','Sarah']}))
df_test.reset_index(inplace=True,drop=True)

In [None]:
# we sort the dataframe in order to have identical post code successive to each other : 
df_test=df_test.sort_values(by=['PostalCode'])
df_test.reset_index(inplace=True,drop=True)
# loop to find identical postcode and then merged neighborhoods: 
for index, value in enumerate(df_test['PostalCode']):
    try : 
        i=1
        if index!= len(df_test['PostalCode']) :
            while value == df_test.loc[index+i, 'PostalCode']:  
                df_test.at[index,'Neighborhood']+= ', '+ (df_test.at[index+i,'Neighborhood'])
                #once merged we drop the merged row 
                df_test.drop([index+i],inplace=True)
                i+=1     
    except : 
        print('fin du dataframe')
                       

## Coordinates from csv geospatial 

In [None]:
# coordinates from csv geospatial
df_table.columns
df_2=pd.read_csv('Geospatial_Coordinates.csv')
df_2.rename(columns={'Postal Code': 'PostalCode'},inplace=True)
df_3=pd.merge(df_table[['PostalCode', 'Borough', 'Neighborhood']],df_2, on='PostalCode',how='inner')

In [None]:
#folium map : 
toronto_map= folium.Map(location=[latitude, longitude],tiles='OpenStreetMap', zoom_start=12)
for borough,neighborhood, lat,long in zip(df_3['Borough'],df_3['Neighborhood'],df_3['Latitude'],df_3['Longitude']):
    label=(f'{borough}, {neighborhood}')
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5, 
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1).add_to(toronto_map)
    
toronto_map