# Toronto

In [73]:
import pandas as pd
import numpy as np

In [74]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data = pd.read_html(url)[0]

In [75]:
data.rename(columns={'Postcode': 'PostalCode'}, inplace=True)  
ix = data['Borough'] == 'Not assigned'
data.drop(data[ix].index, inplace=True)

In [76]:
data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [77]:
data['PostalCode'].nunique(), len(data), data['PostalCode'].nunique() == len(data)

(103, 210, False)

In [78]:
post_neighbour_dic = {}
for _, row in data.iterrows():
    if row['PostalCode'] in post_neighbour_dic:
        post_neighbour_dic[row['PostalCode']].append(row['Neighbourhood']) 
    else:
        post_neighbour_dic[row['PostalCode']] = [row['Neighbourhood']] 

In [79]:
post_neighbour_dic['M9B']

['Cloverdale',
 'Islington',
 'Martin Grove',
 'Princess Gardens',
 'West Deane Park']

In [80]:
data['Neighbourhood'] = data['PostalCode'].apply(lambda x: ','.join(post_neighbour_dic[x]))

data.drop_duplicates(inplace=True) 

In [81]:
data.sample(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
143,M4N,Central Toronto,Lawrence Park
4,M5A,Downtown Toronto,Harbourfront
81,M4J,East York,East Toronto
10,M1B,Scarborough,"Rouge,Malvern"
48,M6E,York,Caledonia-Fairbanks
61,M1H,Scarborough,Cedarbrae
98,M5K,Downtown Toronto,"Design Exchange,Toronto Dominion Centre"
235,M1W,Scarborough,L'Amoreaux West
239,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade
173,M7R,Mississauga,Canada Post Gateway Processing Centre


In [82]:
sum(data['Neighbourhood'] == 'Not assigned')

1

In [83]:
for _, row in data.iterrows():
    if row['Neighbourhood'] == 'Not assigned': row['Neighbourhood'] = row['Borough']

In [84]:
sum(data['Neighbourhood'] == 'Not assigned')

0

In [85]:
data[data['Borough'].str.contains("Queen")]

Unnamed: 0,PostalCode,Borough,Neighbourhood
7,M7A,Queen's Park,Queen's Park


In [86]:
data.shape

(103, 3)

In [87]:
!pip install pgeocode
import pgeocode

nomi = pgeocode.Nominatim('ca') 



In [88]:
data['Latitude'] = data['PostalCode'].apply(lambda x: nomi.query_postal_code(x))['latitude']
data['Longitude'] = data['PostalCode'].apply(lambda x: nomi.query_postal_code(x))['longitude']

In [89]:
data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.7545,-79.33
3,M4A,North York,Victoria Village,43.7276,-79.3148
4,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626
5,M6A,North York,"Lawrence Heights,Lawrence Manor",43.7223,-79.4504
7,M7A,Queen's Park,Queen's Park,43.6641,-79.3889


In [90]:
data_toronto = data[data['Borough'].str.contains('Toronto')].reset_index(drop=True)
data_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626
1,M9A,Downtown Toronto,Queen's Park,43.6662,-79.5282
2,M5B,Downtown Toronto,"Ryerson,Garden District",43.6572,-79.3783
3,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
4,M4E,East Toronto,The Beaches,43.6784,-79.2941


In [91]:
data_toronto['Borough'].unique()

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

In [92]:
data_toronto['Neighbourhood'].nunique()

39

In [93]:
import json 
import requests 

from sklearn.cluster import KMeans 

!pip install folium
import folium



In [94]:
latitude, longitude = data_toronto.loc[0, 'Latitude'], data_toronto.loc[0, 'Longitude']
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(data_toronto['Latitude'], data_toronto['Longitude'], 
                                            data_toronto['Borough'], data_toronto['Neighbourhood']):
    label = f"{neighborhood}, {borough}"
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#1E90FF',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# New York

In [95]:
url2 = "https://www.health.ny.gov/statistics/cancer/registry/appendix/neighborhoods.htm"
data2 = pd.read_html(url2)[0]

In [96]:
from itertools import chain

# return list from series of comma-separated strings
def chainer(s):
    return list(chain.from_iterable(s.str.split(',')))

# calculate lengths of splits
lens = data2['ZIP Codes'].str.split(',').map(len)

# create new dataframe, repeating or chaining as appropriate
data3 = pd.DataFrame({'Borough': np.repeat(data2['Borough'], lens),
                    'Neighborhood': np.repeat(data2['Neighborhood'], lens),
                    'ZIP Codes': chainer(data2['ZIP Codes'])})

data3.head()

Unnamed: 0,Borough,Neighborhood,ZIP Codes
0,Bronx,Central Bronx,10453
0,Bronx,Central Bronx,10457
0,Bronx,Central Bronx,10460
1,Bronx,Bronx Park and Fordham,10458
1,Bronx,Bronx Park and Fordham,10467


In [97]:
data3['ZIP Codes'].nunique(), len(data3), data3['ZIP Codes'].nunique() == len(data3)

(178, 178, True)

In [98]:
post_neighbour_dic2 = {}
for _, row2 in data3.iterrows():
    if row2['ZIP Codes'] in post_neighbour_dic2:
        post_neighbour_dic2[row2['ZIP Codes']].append(row2['Neighborhood']) 
    else:
        post_neighbour_dic2[row2['ZIP Codes']] = [row2['Neighborhood']]

In [99]:
data3.drop_duplicates(inplace=True)

In [100]:
data3.sample(10)

Unnamed: 0,Borough,Neighborhood,ZIP Codes
21,Manhattan,Gramercy Park and Murray Hill,10017
34,Queens,Rockaways,11691
23,Manhattan,Lower Manhattan,10004
29,Queens,North Queens,11359
24,Manhattan,Lower East Side,10003
31,Queens,Jamaica,11412
12,Brooklyn,Northwest Brooklyn,11215
29,Queens,North Queens,11360
22,Manhattan,Greenwich Village and Soho,10012
0,Bronx,Central Bronx,10460


In [101]:
sum(data3['Neighborhood'] == 'Not assigned')

0

In [102]:
nomi2 = pgeocode.Nominatim('us')

In [103]:
data3['Latitude'] = data3['ZIP Codes'].apply(lambda x: nomi2.query_postal_code(x))['latitude']
data3['Longitude'] = data3['ZIP Codes'].apply(lambda x: nomi2.query_postal_code(x))['longitude']

In [104]:
data3.head()

Unnamed: 0,Borough,Neighborhood,ZIP Codes,Latitude,Longitude
0,Bronx,Central Bronx,10453,40.852,-73.9129
0,Bronx,Central Bronx,10457,,
0,Bronx,Central Bronx,10460,,
1,Bronx,Bronx Park and Fordham,10458,40.8633,-73.8895
1,Bronx,Bronx Park and Fordham,10467,,


In [105]:
data3['Borough'].unique()

array(['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'],
      dtype=object)

In [106]:
data3['Neighborhood'].nunique()

42

In [107]:
data3 = data3.dropna()

In [108]:
latitude, longitude = data3.loc[0, 'Latitude'], data3.loc[0, 'Longitude']
map_ny = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(data3['Latitude'], data3['Longitude'], 
                                            data3['Borough'], data3['Neighborhood']):
    label = f"{neighborhood}, {borough}"
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#1E90FF',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ny)  
    
map_ny

# Paris

In [109]:
url3 = "https://www.worldpostalcodes.org/en/france/arrondissement/map-of-postal-codes-in-paris"
data4 = pd.read_html(url3)[0]

In [110]:
data4 = pd.DataFrame({'PostalCodes':data4.values.ravel()})
data4 = data4.dropna()
data4 = data4.astype({'PostalCodes': int})

data4.head()

Unnamed: 0,PostalCodes
0,75001
1,75002
2,75003
3,75004
4,75005


In [118]:
data4 = data4.assign(Borough = 'Paris')
data4 = data4.assign(Neighborhood = 'Paris')

In [119]:
data4.head()

Unnamed: 0,PostalCodes,Borough,Neighborhood
0,75001,Paris,Paris
1,75002,Paris,Paris
2,75003,Paris,Paris
3,75004,Paris,Paris
4,75005,Paris,Paris


In [120]:
data4['PostalCodes'].nunique(), len(data4), data4['PostalCodes'].nunique() == len(data4)

(21, 21, True)

In [121]:
data4.drop_duplicates(inplace=True)

In [122]:
data4.sample(10)

Unnamed: 0,PostalCodes,Borough,Neighborhood
1,75002,Paris,Paris
14,75015,Paris,Paris
3,75004,Paris,Paris
2,75003,Paris,Paris
11,75012,Paris,Paris
4,75005,Paris,Paris
18,75019,Paris,Paris
19,75020,Paris,Paris
16,75017,Paris,Paris
5,75006,Paris,Paris


In [123]:
nomi3 = pgeocode.Nominatim('fr')

In [124]:
data4['Latitude'] = data4['PostalCodes'].apply(lambda x: nomi3.query_postal_code(x))['latitude']
data4['Longitude'] = data4['PostalCodes'].apply(lambda x: nomi3.query_postal_code(x))['longitude']

In [125]:
data4.head()

Unnamed: 0,PostalCodes,Borough,Neighborhood,Latitude,Longitude
0,75001,Paris,Paris,48.8592,2.34525
1,75002,Paris,Paris,48.8655,2.3457
2,75003,Paris,Paris,48.8637,2.35515
3,75004,Paris,Paris,48.8601,2.34975
4,75005,Paris,Paris,48.8448,2.34795


In [372]:
latitude, longitude = data4.loc[0, 'Latitude'], data4.loc[0, 'Longitude']
map_paris = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(data4['Latitude'], data4['Longitude'], 
                                            data4['Borough'], data4['Neighborhood']):
    label = f"{neighborhood}, {borough}"
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#1E90FF',
        fill_opacity=0.7,
        parse_html=False).add_to(map_paris)  
    
map_paris

# London

In [357]:
urlE = "https://en.wikipedia.org/wiki/E_postcode_area"
dataE = pd.read_html(urlE)[1]

urlEC = "https://en.wikipedia.org/wiki/EC_postcode_area"
dataEC = pd.read_html(urlEC)[1]

urlN = "https://en.wikipedia.org/wiki/N_postcode_area"
dataN = pd.read_html(urlN)[1]

urlNW = "https://en.wikipedia.org/wiki/NW_postcode_area"
dataNW = pd.read_html(urlNW)[1]

urlSE = "https://en.wikipedia.org/wiki/SE_postcode_area"
dataSE = pd.read_html(urlSE)[1]

urlSW = "https://en.wikipedia.org/wiki/SW_postcode_area"
dataSW = pd.read_html(urlSW)[1]

urlW = "https://en.wikipedia.org/wiki/W_postcode_area"
dataW = pd.read_html(urlW)[1]

urlWC = "https://en.wikipedia.org/wiki/WC_postcode_area"
dataWC = pd.read_html(urlWC)[1]


data5 = dataE.append([dataEC, dataN, dataNW, dataSE, dataSW, dataW, dataWC])

data5.head()

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area
0,E1,LONDON,"Eastern head district: Aldgate (part), Bishops...","Tower Hamlets, Hackney, City of London"
1,E1W,LONDON,"Wapping, St Katharine Docks, Stepney (part), S...",Tower Hamlets
2,E2,LONDON,"Bethnal Green district: Bethnal Green, Haggers...","Tower Hamlets, Hackney"
3,E3,LONDON,"Bow district: Bow, Bow Common, Bromley-by-Bow,...","Tower Hamlets, Newham"
4,E4,LONDON,"Chingford district: Chingford, Sewardstone, Hi...","Waltham Forest, Enfield, Epping Forest (Essex)"


In [358]:
data5['Postcode district'].nunique(), len(data5), data5['Postcode district'].nunique() == len(data5)

(184, 184, True)

In [359]:
data5.drop_duplicates(inplace=True)

In [360]:
data5.sample(10)

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area
11,WC2H,LONDON,"Leicester Square, St. Giles","Camden, Westminster"
24,W13,LONDON,"West Ealing district: West Ealing, Northfields...",Ealing
10,SE10,LONDON,"Greenwich district: Greenwich, Maze Hill, Gree...","Greenwich, Lewisham"
9,WC2B,LONDON,"Drury Lane, Kingsway, Aldwych","Camden, Westminster"
5,E5,LONDON,"Clapton district: Leyton (Part), Upper Clapton...","Hackney, Waltham Forest"
24,N81,LONDON,Electoral Reform Services,non-geographic
4,SE4,LONDON,"Brockley district: Brockley, Crofton Park",Lewisham
23,SE23,LONDON,"Forest Hill district: Forest Hill, Honor Oak, ...","Lewisham, Southwark"
1,E1W,LONDON,"Wapping, St Katharine Docks, Stepney (part), S...",Tower Hamlets
10,E10,LONDON,"Leyton district: Leyton, Temple Mills, Hackney...","Waltham Forest, Hackney"


In [361]:
nomi4 = pgeocode.Nominatim('gb')

In [362]:
data5['Latitude'] = data5['Postcode district'].apply(lambda x: nomi4.query_postal_code(x))['latitude']
data5['Longitude'] = data5['Postcode district'].apply(lambda x: nomi4.query_postal_code(x))['longitude']

In [363]:
data5.head()

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area,Latitude,Longitude
0,E1,LONDON,"Eastern head district: Aldgate (part), Bishops...","Tower Hamlets, Hackney, City of London",51.5051,-0.061773
1,E1W,LONDON,"Wapping, St Katharine Docks, Stepney (part), S...",Tower Hamlets,,
2,E2,LONDON,"Bethnal Green district: Bethnal Green, Haggers...","Tower Hamlets, Hackney",51.55,-0.055525
3,E3,LONDON,"Bow district: Bow, Bow Common, Bromley-by-Bow,...","Tower Hamlets, Newham",51.525,-0.026571
4,E4,LONDON,"Chingford district: Chingford, Sewardstone, Hi...","Waltham Forest, Enfield, Epping Forest (Essex)",51.6303,0.0005


In [364]:
data5 = data5.dropna()

In [365]:
data5.head()

Unnamed: 0,Postcode district,Post town,Coverage,Local authority area,Latitude,Longitude
0,E1,LONDON,"Eastern head district: Aldgate (part), Bishops...","Tower Hamlets, Hackney, City of London",51.5051,-0.061773
2,E2,LONDON,"Bethnal Green district: Bethnal Green, Haggers...","Tower Hamlets, Hackney",51.55,-0.055525
3,E3,LONDON,"Bow district: Bow, Bow Common, Bromley-by-Bow,...","Tower Hamlets, Newham",51.525,-0.026571
4,E4,LONDON,"Chingford district: Chingford, Sewardstone, Hi...","Waltham Forest, Enfield, Epping Forest (Essex)",51.6303,0.0005
5,E5,LONDON,"Clapton district: Leyton (Part), Upper Clapton...","Hackney, Waltham Forest",51.564,-0.054517


In [366]:
data5 = data5.assign(Borough = 'London')
data5 = data5.assign(Neighborhood = 'London')
data5 = data5.drop(['Coverage'], axis=1)

data5.head()

Unnamed: 0,Postcode district,Post town,Local authority area,Latitude,Longitude,Borough,Neighborhood
0,E1,LONDON,"Tower Hamlets, Hackney, City of London",51.5051,-0.061773,London,London
2,E2,LONDON,"Tower Hamlets, Hackney",51.55,-0.055525,London,London
3,E3,LONDON,"Tower Hamlets, Newham",51.525,-0.026571,London,London
4,E4,LONDON,"Waltham Forest, Enfield, Epping Forest (Essex)",51.6303,0.0005,London,London
5,E5,LONDON,"Hackney, Waltham Forest",51.564,-0.054517,London,London


In [367]:
data5 = data5.drop(['Local authority area'], axis=1)

data5.head()

Unnamed: 0,Postcode district,Post town,Latitude,Longitude,Borough,Neighborhood
0,E1,LONDON,51.5051,-0.061773,London,London
2,E2,LONDON,51.55,-0.055525,London,London
3,E3,LONDON,51.525,-0.026571,London,London
4,E4,LONDON,51.6303,0.0005,London,London
5,E5,LONDON,51.564,-0.054517,London,London


In [368]:
data5 = data5.drop(['Post town'], axis=1)

data5.head()

Unnamed: 0,Postcode district,Latitude,Longitude,Borough,Neighborhood
0,E1,51.5051,-0.061773,London,London
2,E2,51.55,-0.055525,London,London
3,E3,51.525,-0.026571,London,London
4,E4,51.6303,0.0005,London,London
5,E5,51.564,-0.054517,London,London


In [370]:
data_london = data5[data5['Borough'].str.contains('London')].reset_index(drop=True)

In [371]:
latitude, longitude = data_london.loc[0, 'Latitude'], data_london.loc[0, 'Longitude']
map_london = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(data_london['Latitude'], data_london['Longitude'], 
                                            data_london['Borough'], data_london['Neighborhood']):
    label = f"{neighborhood}, {borough}"
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#1E90FF',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london