# Neighborhoods in Toronto

## Part 1

## Scrape data form wikipedia page

In [1]:
import pandas as pd

In [9]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

#### Filter out rows with 'Not assigned' Boroughs

In [14]:
df = df[df['Borough'] != 'Not assigned']

In [16]:
c = df[df['Neighbourhood'] == 'Not assigned']

In [17]:
c

Unnamed: 0,Postcode,Borough,Neighbourhood


#### There are no rows with 'Not assigned' neighbourhoods. Hence no further processing is required to address it.

In [18]:
g = df.groupby("Postcode")

In [46]:
#create empty dataframe
new_df = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

# iterate over group by
for postcode in g.groups:
    l = g.groups[postcode]
    n = ''
    b = ''
    for e in l:
        n +=  (df.loc[e]['Neighbourhood'] + ',')
        b = df.loc[e]['Borough']
    # append to dataframe
    new_df = new_df.append({'PostalCode' : postcode, 'Borough' : b, 'Neighborhood' : n}, ignore_index=True)

In [49]:
new_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern,"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union,"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill,"
3,M1G,Scarborough,"Woburn,"
4,M1H,Scarborough,"Cedarbrae,"


In [47]:
new_df.shape

(103, 3)

# Part 2

In [51]:
geo_coords = pd.read_csv('https://cocl.us/Geospatial_data')

In [52]:
geo_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [53]:
geo_coords.shape

(103, 3)

In [56]:
geo_coords.rename(columns={'Postal Code' : 'PostalCode'}, inplace = True)

## merge above two dataframes on PostalCode

In [57]:
m = pd.merge(new_df, geo_coords, on='PostalCode')

In [58]:
m.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern,",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union,",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill,",43.763573,-79.188711
3,M1G,Scarborough,"Woburn,",43.770992,-79.216917
4,M1H,Scarborough,"Cedarbrae,",43.773136,-79.239476


In [59]:
m.shape

(103, 5)

# Part 3

## Clustering Neighbourhoods

In [66]:
CLIENT_ID = 'HK3ZHLLVPB3RNF33MU3N120GPO5LEU3OVHEMJBCFMDGXCK5L'
CLIENT_SECRET = 'VEGOVTKT2AKY1OVUCNYHZMFQA0P2EQ2H2YI0IDT1CEJYGM0F'
VERSION = '20200312'
LIMIT = 100
radius = 500
import requests

### function to get nearby venues

In [63]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [71]:
toronto_venues = getNearbyVenues(names=m['Neighborhood'], latitudes=m['Latitude'], longitudes=m['Longitude'])

Rouge,Malvern,
Highland Creek,Rouge Hill,Port Union,
Guildwood,Morningside,West Hill,
Woburn,
Cedarbrae,
Scarborough Village,
East Birchmount Park,Ionview,Kennedy Park,
Clairlea,Golden Mile,Oakridge,
Cliffcrest,Cliffside,Scarborough Village West,
Birch Cliff,Cliffside West,
Dorset Park,Scarborough Town Centre,Wexford Heights,
Maryvale,Wexford,
Agincourt,
Clarks Corners,Sullivan,Tam O'Shanter,
Agincourt North,L'Amoreaux East,Milliken,Steeles East,
L'Amoreaux West,
Upper Rouge,
Hillcrest Village,
Fairview,Henry Farm,Oriole,
Bayview Village,
Silver Hills,York Mills,
Newtonbrook,Willowdale,
Willowdale South,
York Mills West,
Willowdale West,
Parkwoods,
Don Mills North,
Flemingdon Park,Don Mills South,
Bathurst Manor,Downsview North,Wilson Heights,
Northwood Park,York University,
CFB Toronto,Downsview East,
Downsview West,
Downsview Central,
Downsview Northwest,
Victoria Village,
Woodbine Gardens,Parkview Hill,
Woodbine Heights,
The Beaches,
Leaside,
Thorncliffe Park,
East Toronto,
The Danf

In [72]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern,",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge,Malvern,",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek,Rouge Hill,Port Union,",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood,Morningside,West Hill,",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood,Morningside,West Hill,",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa


In [73]:
toronto_venues.shape

(2214, 7)

In [74]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond,",100,100,100,100,100,100
"Agincourt North,L'Amoreaux East,Milliken,Steeles East,",3,3,3,3,3,3
"Agincourt,",4,4,4,4,4,4
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown,",12,12,12,12,12,12
"Alderwood,Long Branch,",10,10,10,10,10,10
"Bathurst Manor,Downsview North,Wilson Heights,",19,19,19,19,19,19
"Bayview Village,",4,4,4,4,4,4
"Bedford Park,Lawrence Manor East,",25,25,25,25,25,25
"Berczy Park,",55,55,55,55,55,55
"Birch Cliff,Cliffside West,",4,4,4,4,4,4


In [75]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 267 uniques categories.


In [76]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
toronto_onehot.shape

(2214, 267)

In [78]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide,King,Richmond,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.020000,0.00,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000,0.01
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
2,"Agincourt,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
4,"Alderwood,Long Branch,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
5,"Bathurst Manor,Downsview North,Wilson Heights,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.052632,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
6,"Bayview Village,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
7,"Bedford Park,Lawrence Manor East,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
8,"Berczy Park,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.018182,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00
9,"Birch Cliff,Cliffside West,",0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00


In [79]:
toronto_grouped.shape

(101, 267)

In [81]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 10

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 6, 0, 7, 7, 0, 0, 0, 0, 0], dtype=int32)

In [87]:
m1 = pd.merge(m, toronto_grouped, on='Neighborhood', how='inner')

In [88]:
m1.shape

(101, 271)

In [89]:
m1['Cluster Labels'] = kmeans.labels_

In [90]:
m1.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Cluster Labels
0,M1B,Scarborough,"Rouge,Malvern,",43.806686,-79.194353,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union,",43.784535,-79.160497,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6
2,M1E,Scarborough,"Guildwood,Morningside,West Hill,",43.763573,-79.188711,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,M1G,Scarborough,"Woburn,",43.770992,-79.216917,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
4,M1H,Scarborough,"Cedarbrae,",43.773136,-79.239476,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7


In [92]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

In [95]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          92 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0   conda-forge
    geopy:         1.21.0-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.21.0         | 58 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


In [96]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [99]:
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(m1['Latitude'], m1['Longitude'], m1['Neighborhood'], m1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters