# TABLE OF CONTENT : 
 1. [Scrapping Wikipedia Web page](#1)<br>
 2. [Adding coordinates of neighborhoods](#2)<br>
 3. [Folium maps](#3)<br>
 4. [Cluster neighborhoods](#4)<br>

In [1]:
#Installing beautifulsoup package
#!pip install beautifulsoup4

In [32]:
#import of libraries necessary to scrap the web page 
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
import folium
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# 1. SCRAPPING WIKIPEDIA WEB PAGE <a id="1"></a>

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
url

'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
#parsing the web page wikipedia
page_response=requests.get(url,timeout=5)
page_content = BeautifulSoup(page_response.content, 'lxml')

In [4]:
#creation of the data frame containing the table from wikipedia page : 
# varibale with all the tables found in the page 
tables=page_content.find_all('table')
#focus on the table we are interested in : 
table=tables[0].tbody
#creation of our data frame :
df_table=pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])
#iteration in the table in order to extract content and add it to our dataframe 
for row in table.find_all('tr') : 
    cols=row.find_all('td')
    try : 
        df_table=df_table.append({'PostalCode':cols[0].text,
                                          'Borough': cols[1].text, 
                                          'Neighborhood': cols[2].text, 
                                         }, ignore_index=True)
    except: 
        pass

df_table

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,Regent Park / Harbourfront\n
...,...,...,...
175,M5Z\n,Not assigned\n,\n
176,M6Z\n,Not assigned\n,\n
177,M7Z\n,Not assigned\n,\n
178,M8Z\n,Etobicoke\n,Mimico NW / The Queensway West / South of Bloo...


In [5]:
#data frame cleaning by removin the '\n' using  REGEX: 
import re
for index,row in df_table.iterrows():
    for col in df_table.columns:
        row[col]= re.sub(r'\n','',row[col])
df_table

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


In [7]:
#drop rows where no borough were attributed and copy borough neighborhood where no neighborood
for index, row in df_table.iterrows():
    row['Neighborhood']=row['Neighborhood'].replace('/',',')
    if row['Borough']=='Not assigned':
        df_table.drop([index],inplace=True)
    if row['Neighborhood']== '':
        row['Neighborhood']=row['Borough']
df_table.reset_index(inplace=True,drop=True)
df_table.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
# checking if any null values are stil in the data frame : 
df_clean=df_table.isnull()
for column in df_clean.columns: 
    print(column)
    print(df_clean[column].value_counts())

PostalCode
False    103
Name: PostalCode, dtype: int64
Borough
False    103
Name: Borough, dtype: int64
Neighborhood
False    103
Name: Neighborhood, dtype: int64


In [9]:
df_table.shape

(103, 3)

In [134]:
#export data to csv
import csv
#df_table.to_csv('postal_code.csv')

# 2. Adding coordinates of neighborhoods <a id="2"></a>


In [11]:
# instal of geocoder 
#!pip install geocoder

In [2]:
import geocoder # import geocoder

In [15]:
# initialize your variable to None
lat_lng_coords = None
for index,code in enumerate(df_table['PostalCode']): 
    postal_code=code
    print(index, 'st row to complete')
# loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.geocodefarm('{}, Toronto, Ontario,Canada'.format(postal_code))
        lat_lng_coords = g.latlng
        print(lat_lng_coords)
    df_table.at[index,'latitude'] = lat_lng_coords[0]
    df_table.at[index,'longitude'] = lat_lng_coords[1]
    print(index, 'st row completed')
    lat_lng_coords = None

0 st row to complete
[43.7518806457716, -79.3303604125129]
0 st row completed
1 st row to complete
[43.7304191589716, -79.3128204341299]
1 st row completed
2 st row to complete
None
None
[43.6551399230715, -79.362648010213]
2 st row completed
3 st row to complete
[43.7232093811716, -79.4514083861301]
3 st row completed
4 st row to complete
None
[43.6644897460715, -79.393020629813]
4 st row completed
5 st row to complete
[43.6627693176715, -79.528312683113]
5 st row completed
6 st row to complete


KeyboardInterrupt: 

In [253]:
# extraction of the dataframe into a csv file
#df_table.to_csv('postal_code.csv')

# 3. Folium Maps <a id="3"></a>

In [3]:
#reading the previously completed csv with cordinates
df_table=pd.read_csv('postal_code.csv',index_col=0)
df_table

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.751881,-79.330360
1,M4A,North York,Victoria Village,43.730419,-79.312820
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.655140,-79.362648
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.723209,-79.451408
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.664490,-79.393021
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653690,-79.511124
99,M4Y,Downtown Toronto,Church and Wellesley,43.666592,-79.381302
100,M7Y,East Toronto,Business reply mail Processing CentrE,43.648689,-79.385437
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.632881,-79.489548


In [4]:
#checking data with folium maps of neigborhoods
#1st coordinates of Toronto
g2 = geocoder.geocodefarm('Toronto, Ontario,Canada')
latlong= g2.latlng
latitude= latlong[0]
longitude = latlong[1]
print(f'latitude {latitude} & longitude {longitude}')

latitude 43.6486892707151 & longitude -79.385437011713


In [5]:
#folium map : 
toronto_map= folium.Map(location=[latitude, longitude],tiles='OpenStreetMap', zoom_start=10)
for borough,neighborhood, lat,long in zip(df_table['Borough'],df_table['Neighborhood'],df_table['latitude'],df_table['longitude']):
    label=(f'{borough}, {neighborhood}')
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5, 
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1,
                        parse_html=False).add_to(toronto_map)
    
toronto_map

# 4. Cluster neighborhoods<a id="4"></a>

In [9]:
#import librairies : 
# library to handle JSON files
import json 
 # library to handle requests
import requests 
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [10]:
#GET INFORMATION ON FOURSQUARE API
#FOURSQUARE CREDENTIALS
CLIENT_ID = 'CUF5UKNECLDYIDGLLSJJF32FI1RYQJBR1WRLHIFQEVRSJJO4' 
CLIENT_SECRET = 'SW40I4VDCJKKBFFXLQOFRMBRW0AQ1DWO3ZO23QF3JUXLKWFJ' 
VERSION = '20180605' 

print('Credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

#foursquare url : 
url_base='https://api.foursquare.com/v2/'

Credentails:
CLIENT_ID: CUF5UKNECLDYIDGLLSJJF32FI1RYQJBR1WRLHIFQEVRSJJO4
CLIENT_SECRET:SW40I4VDCJKKBFFXLQOFRMBRW0AQ1DWO3ZO23QF3JUXLKWFJ


In [11]:
Toronto_data=pd.DataFrame(columns=df_table.columns[1:])

In [12]:
for index,row in enumerate(df_table['Borough']):
    if 'Toronto' in row: 
        Toronto_data=Toronto_data.append(df_table.loc[index,'Borough':'longitude'])
Toronto_data.reset_index(inplace=True,drop=True)

In [13]:
Toronto_data

Unnamed: 0,Borough,Neighborhood,latitude,longitude
0,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648
1,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021
2,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.378181
3,Downtown Toronto,St. James Town,43.651428,-79.375572
4,East Toronto,The Beaches,43.677029,-79.295418
5,Downtown Toronto,Berczy Park,43.645309,-79.37368
6,Downtown Toronto,Central Bay Street,43.65609,-79.384933
7,Downtown Toronto,Christie,43.668781,-79.420708
8,Downtown Toronto,"Richmond , Adelaide , King",43.6497,-79.382584
9,West Toronto,"Dufferin , Dovercourt Village",43.665089,-79.438713


In [14]:
# visualisation of Toronto Boroughs : 

borough= folium.Map(location=[latitude,longitude],zoom_start=11)
for lat,long,bo,nei in zip(Toronto_data['latitude'],Toronto_data['longitude'],Toronto_data['Borough'],Toronto_data['Neighborhood']):
    label=(bo +' -\n'+ nei)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5,
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1,
                        parse_html=False
                       ).add_to(borough)
borough

In [27]:
# connect to foursquare and get the venues for all neigborhoods
RADIUS=500
LIMIT= 100
temp=pd.DataFrame()
all_toronto_venues = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood', 'latitude', 'longitude','venu_name','venue_category'])
for code, bo, nei,lat,long in zip(df_table['PostalCode'],df_table['Borough'],df_table['Neighborhood'],df_table['latitude'],df_table['longitude']):
    url=url_base+f'venues/explore?&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&v={VERSION}&ll={lat},{long}&radius={RADIUS}&limit={LIMIT}'
    results=requests.get(url).json()    
    for i in range(len(results['response']['groups'][0]['items'])):
        temp.at[i,'PostalCode']=code
        temp.at[i,'Borough']=bo
        temp.at[i,'Neighborhood']=nei
        temp.at[i,'latitude']=lat
        temp.at[i,'longitude']=long
        temp.at[i,'venu_name']=results['response']['groups'][0]['items'][i]['venue']['name']
        temp.at[i,'venue_category']=results['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
    all_toronto_venues=all_toronto_venues.append(temp,ignore_index=True)
all_toronto_venues

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,venu_name,venue_category
0,M3A,North York,Parkwoods,43.751881,-79.330360,Brookbanks Park,Park
1,M3A,North York,Parkwoods,43.751881,-79.330360,PetSmart,Pet Store
2,M3A,North York,Parkwoods,43.751881,-79.330360,Variety Store,Food & Drink Shop
3,M4A,North York,Victoria Village,43.730419,-79.312820,Memories of Africa,Grocery Store
4,M4A,North York,Victoria Village,43.730419,-79.312820,Eglinton Ave E & Sloane Ave/Bermondsey Rd,Intersection
...,...,...,...,...,...,...,...
9749,M7Y,East Toronto,Business reply mail Processing CentrE,43.648689,-79.385437,Mr Burrito,Burrito Place
9750,M7Y,East Toronto,Business reply mail Processing CentrE,43.648689,-79.385437,Rexall Pharma Plus,Pharmacy
9751,M7Y,East Toronto,Business reply mail Processing CentrE,43.648689,-79.385437,Ematei 絵馬亭,Japanese Restaurant
9752,M7Y,East Toronto,Business reply mail Processing CentrE,43.648689,-79.385437,Meli Baklava & Chocolate Bar,Chocolate Shop


In [33]:
all_toronto_venues

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,venu_name,venue_category
0,M3A,North York,Parkwoods,43.751881,-79.33036,Brookbanks Park,Park
1,M3A,North York,Parkwoods,43.751881,-79.33036,PetSmart,Pet Store
2,M3A,North York,Parkwoods,43.751881,-79.33036,Variety Store,Food & Drink Shop
3,M4A,North York,Victoria Village,43.730419,-79.31282,Memories of Africa,Grocery Store
4,M4A,North York,Victoria Village,43.730419,-79.31282,Eglinton Ave E & Sloane Ave/Bermondsey Rd,Intersection
5,M3A,North York,Parkwoods,43.751881,-79.33036,Variety Store,Food & Drink Shop
6,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Roselle Desserts,Bakery
7,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Tandem Coffee,Coffee Shop
8,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Figs Breakfast & Lunch,Breakfast Spot
9,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,Morning Glory Cafe,Breakfast Spot


In [48]:
# group by neighborhoods the most venues category 
dummies=pd.get_dummies(all_toronto_venues[['venue_category']], prefix="", prefix_sep="")
dummies['Neighborhood']=all_toronto_venues['Neighborhood']
#getting last column at the bigining (neigborhoods):
ordered_columns=[dummies.columns[-1]]+ list(dummies.columns[:-1])
dummies=dummies[ordered_columns]
dummies
all_toronto_grouped=dummies.groupby('Neighborhood').mean().reset_index()
all_toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,ATM,Accessories Store,Adult Boutique,Airport,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Auto Garage,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Candy Store,Caribbean Restaurant,Carpet Store,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,College Theater,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Cuban Restaurant,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,Gift Shop,Gluten-free Restaurant,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Hawaiian Restaurant,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hotel,Hotel Bar,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kitchen Supply Store,Korean Restaurant,Lake,Latin American Restaurant,Leather Goods Store,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Luggage Store,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Park,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Store,Pharmacy,Pilates Studio,Pizza Place,Platform,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Restaurant,Rock Climbing Spot,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Repair,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soup Place,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Stationery Store,Steakhouse,Storage Facility,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood , Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park , Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.112903,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.129032,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.129032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.013841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010381,0.0,0.0,0.0,0.0,0.0,0.0,0.013841,0.0,0.0,0.013841,0.024221,0.0,0.0,0.0,0.010381,0.020761,0.0,0.0,0.0,0.010381,0.0,0.0,0.0,0.0,0.034602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.024221,0.0,0.0,0.0,0.020761,0.0,0.0,0.0,0.0,0.013841,0.027682,0.083045,0.0,0.0,0.0,0.0,0.0,0.0,0.010381,0.0,0.00346,0.0,0.0,0.013841,0.0,0.010381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013841,0.0,0.0,0.0,0.0,0.0,0.013841,0.0,0.0,0.0,0.0,0.0,0.020761,0.034602,0.0,0.0,0.010381,0.0,0.0,0.0,0.0,0.0,0.010381,0.010381,0.010381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013841,0.013841,0.013841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027682,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038062,0.013841,0.010381,0.0,0.013841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00346,0.048443,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013841,0.0,0.0,0.0,0.013841,0.0,0.0,0.034602,0.00346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051903,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055363,0.0,0.0,0.034602,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013841,0.0,0.0,0.0,0.0,0.034602,0.0,0.0,0.0,0.0,0.0,0.0,0.010381,0.0,0.0,0.0,0.0,0.0,0.010381,0.0,0.0,0.0,0.0,0.0,0.0,0.00346,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
# keep the 10 most present venues per neighborhoods
#sort the venues :
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues] 


In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = all_toronto_grouped['Neighborhood']

for ind in np.arange(all_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(all_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Badminton Court,Sushi Restaurant,Park,Skating Rink,Shopping Mall,Pool,Discount Store,Supermarket,Eastern European Restaurant,Dog Run
1,"Alderwood , Long Branch",Pizza Place,Gym,Sandwich Place,Convenience Store,Pub,Farm,Farmers Market,Falafel Restaurant,Event Space,Discount Store
2,Bayview Village,Trail,Construction & Landscaping,Park,Golf Driving Range,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
3,"Bedford Park , Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Liquor Store,Sports Club,Butcher,Juice Bar,Sushi Restaurant,Pub,Comfort Food Restaurant
4,Berczy Park,Coffee Shop,Seafood Restaurant,Restaurant,Lounge,Italian Restaurant,Steakhouse,Portuguese Restaurant,Fast Food Restaurant,Breakfast Spot,Movie Theater


In [102]:
# apply clustering algorithm on neighborhoods based on most common categories 
k=5

all_toronto_grouped_clustering = all_toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, n_init=15, random_state=0).fit(all_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1])

In [103]:
# add clustering labels
try : 
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
except: 
    neighborhoods_venues_sorted.drop(columns='Cluster Labels',inplace=True)
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_table

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.751881,-79.33036,3.0,Food & Drink Shop,Park,Pet Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store
1,M4A,North York,Victoria Village,43.730419,-79.31282,1.0,Intersection,Grocery Store,Women's Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Mexican Restaurant
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.723209,-79.451408,1.0,Clothing Store,Bookstore,Women's Store,Toy / Game Store,Furniture / Home Store,Men's Store,Restaurant,Pharmacy,American Restaurant,Café
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021,1.0,Coffee Shop,Café,Music Store,Pub,Hotel Bar,Convenience Store,Sandwich Place,Salad Place,Clothing Store,Chinese Restaurant


In [104]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,Scarborough,0.0,Trail,Playground,Women's Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
35,East York,0.0,Playground,Music Venue,Park,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
39,North York,0.0,Trail,Construction & Landscaping,Park,Golf Driving Range,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
73,Central Toronto,0.0,Playground,Gym Pool,Park,Garden,Farmers Market,Farm,Falafel Restaurant,Event Space,Discount Store,Fast Food Restaurant
83,Central Toronto,0.0,Trail,Playground,Gym,Park,Ethiopian Restaurant,Distribution Center,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
91,Downtown Toronto,0.0,Playground,Tennis Court,Building,Park,Women's Store,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant


In [105]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1.0,Intersection,Grocery Store,Women's Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant
2,Downtown Toronto,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Mexican Restaurant
3,North York,1.0,Clothing Store,Bookstore,Women's Store,Toy / Game Store,Furniture / Home Store,Men's Store,Restaurant,Pharmacy,American Restaurant,Café
4,Downtown Toronto,1.0,Coffee Shop,Café,Music Store,Pub,Hotel Bar,Convenience Store,Sandwich Place,Salad Place,Clothing Store,Chinese Restaurant
5,Etobicoke,1.0,Pharmacy,Skating Rink,Shopping Mall,Grocery Store,Bank,Café,Park,Falafel Restaurant,Farm,Dog Run
7,North York,1.0,Coffee Shop,Intersection,Spa,Soccer Field,Gym,Park,Gas Station,Burger Joint,Supermarket,Beer Store
8,East York,1.0,Pizza Place,Fast Food Restaurant,Brewery,Athletics & Sports,Pharmacy,Rock Climbing Spot,Café,Breakfast Spot,Bank,Gastropub
9,Downtown Toronto,1.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Restaurant,Lingerie Store,Bar,Hotel,Falafel Restaurant,Greek Restaurant,Fried Chicken Joint
10,North York,1.0,Pizza Place,Asian Restaurant,Fast Food Restaurant,Sushi Restaurant,Gas Station,Rental Car Location,Mediterranean Restaurant,Ice Cream Shop,Grocery Store,Japanese Restaurant
13,North York,1.0,Coffee Shop,Intersection,Spa,Soccer Field,Gym,Park,Gas Station,Burger Joint,Supermarket,Beer Store


In [106]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,2.0,Movie Theater,Women's Store,Comic Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space


In [107]:
toronto_merged.loc[toronto_merged['Cluster Labels'] ==3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3.0,Food & Drink Shop,Park,Pet Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store
16,York,3.0,Trail,Field,Hockey Arena,Grocery Store,Park,Ethiopian Restaurant,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
18,Scarborough,3.0,Construction & Landscaping,Gym / Fitness Center,Park,Women's Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant
49,North York,3.0,Park,Bakery,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store,Distribution Center
61,Central Toronto,3.0,Park,Dim Sum Restaurant,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store,Distribution Center
68,Central Toronto,3.0,Restaurant,Park,Women's Store,Ethiopian Restaurant,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space
69,West Toronto,3.0,Residential Building (Apartment / Condo),Park,Women's Store,Ethiopian Restaurant,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space
88,Etobicoke,3.0,Park,Yoga Studio,ATM,Tennis Court,Skating Rink,Grocery Store,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant
98,Etobicoke,3.0,Park,Pool,Event Space,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store


In [108]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,4.0,History Museum,Bar,Dog Run,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [109]:
# initialize the plot with the specified dimensions.
fig = plt.figure(figsize=(15, 10))

# colors uses a color map, which will produce an array of colors based on
# the number of labels. We use set(k_means_labels) to get the
# unique labels.
colors = plt.cm.Spectral(np.linspace(0, 1, len(set(k_means_labels))))

# create a plot
ax = fig.add_subplot(1, 1, 1)

# loop through the data and plot the datapoints and centroids.
# k will range from 0-3, which will match the number of clusters in the dataset.
for k, col in zip(range(len([[4,4], [-2, -1], [2, -3], [1, 1]])), colors):

    # create a list of all datapoints, where the datapoitns that are 
    # in the cluster (ex. cluster 0) are labeled as true, else they are
    # labeled as false.
    my_members = (k_means_labels == k)
    
    # define the centroid, or cluster center.
    cluster_center = k_means_cluster_centers[k]
    
    # plot the datapoints with color col.
    ax.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.')
    
    # plot the centroids with specified color, but with a darker outline
    ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,  markeredgecolor='k', markersize=6)

# title of the plot
ax.set_title('KMeans')

# remove x-axis ticks
ax.set_xticks(())

# remove y-axis ticks
ax.set_yticks(())

# show the plot
plt.show()

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.751881,-79.33036,3.0,Food & Drink Shop,Park,Pet Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Women's Store
1,M4A,North York,Victoria Village,43.730419,-79.31282,1.0,Intersection,Grocery Store,Women's Store,Event Space,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65514,-79.362648,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Electronics Store,Mexican Restaurant
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.723209,-79.451408,1.0,Clothing Store,Bookstore,Women's Store,Toy / Game Store,Furniture / Home Store,Men's Store,Restaurant,Pharmacy,American Restaurant,Café
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66449,-79.393021,1.0,Coffee Shop,Café,Music Store,Pub,Hotel Bar,Convenience Store,Sandwich Place,Salad Place,Clothing Store,Chinese Restaurant


# Code pour boucler sur le dataframe et fusionner les neighborhoods ayant les mêmes code postals 

In [242]:
df_test=df_table[['PostalCode','Borough','Neighborhood']].copy()
#ajout de deux lignes supplémentaires avec même code postal pour l'exercice : 
df_test=df_test.append(pd.DataFrame({'PostalCode':['M3A','M3A'],'Borough':['North York','North York'],'Neighborhood':['Pierre','Sarah']}))
df_test.reset_index(inplace=True,drop=True)

In [243]:
#on trie les valeurs pour que les codes postaux identiques soient à la suite dans le data frame : 
df_test=df_test.sort_values(by=['PostalCode'])
df_test.reset_index(inplace=True,drop=True)
# Boucle pour trouver les codes postals identiques et fusionner les neigborhoods: 
for index, value in enumerate(df_test['PostalCode']):
    try : 
        i=1
        if index!= len(df_test['PostalCode']) :
            while value == df_test.loc[index+i, 'PostalCode']:  
                df_test.at[index,'Neighborhood']+= ', '+ (df_test.at[index+i,'Neighborhood'])
                df_test.drop([index+i],inplace=True)
                i+=1     
    except : 
        print('fin du dataframe')
                       

fin du dataframe
fin du dataframe


In [244]:
df_test[df_test['PostalCode']=='M3A']

Unnamed: 0,PostalCode,Borough,Neighborhood
25,M3A,North York,"Pierre, Parkwoods, Sarah"


In [210]:
df_test

Unnamed: 0,PostalCode,Borough,Neighborhood
6,M1B,Scarborough,"Malvern , Rouge"
12,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
18,M1E,Scarborough,"Guildwood , Morningside , West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
...,...,...,...
64,M9N,York,Weston
70,M9P,Etobicoke,Westmount
77,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov..."
89,M9V,Etobicoke,"South Steeles , Silverstone , Humbergate , Jam..."


In [92]:
# coordinates from csv geospatial
df_table.columns
df_2=pd.read_csv('Geospatial_Coordinates.csv')
df_2.rename(columns={'Postal Code': 'PostalCode'},inplace=True)
df_3=pd.merge(df_table[['PostalCode', 'Borough', 'Neighborhood']],df_2, on='PostalCode',how='inner')

In [94]:
#folium map : 
toronto_map= folium.Map(location=[latitude, longitude],tiles='OpenStreetMap', zoom_start=12)
for borough,neighborhood, lat,long in zip(df_3['Borough'],df_3['Neighborhood'],df_3['Latitude'],df_3['Longitude']):
    label=(f'{borough}, {neighborhood}')
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(location=[lat,long],
                        popup=label,
                        radius=5, 
                        fill=True,
                        fill_color='#3388ff',
                        fill_opacity=1).add_to(toronto_map)
    
toronto_map