# Segmenting and Clustering Neighborhoods in Toronto

### Solomin Oleg 
### 18 April 2020

In [1]:
# Url to get Data from Wikipedia concerning "List of postal codes of Canada: M"
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [2]:
# importing pandas library
import pandas as pd

In [3]:
#getting data from url

raw_data=pd.read_html(url)

In [4]:
# reading table from page - I've got recipe from www
 
df=raw_data[0]

In [5]:
# just checking what we get

df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [6]:
# so we have some problem with "Not assigned" Borough and "NA" Neighboroughood 

df_adj=df.dropna(axis=0)
df_adj.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [7]:
# and finally we can get shape of clean dataset

df_adj.shape

(103, 3)

In [8]:
# csv file with coordinates

url_coord = 'http://cocl.us/Geospatial_data'

# trying to get coordinates of Neighborhood

coord=pd.read_csv(url_coord)

# have to rename Postal Code to Postal code in order to get key column later in merging of two dataframes
coord_adj = coord.rename(columns={'Postal Code' : 'Postal code'})

In [9]:
# checking shape of coordinate dataframe
coord_adj.shape

(103, 3)

In [10]:
# checking coordinate dataframe
coord_adj.head()

Unnamed: 0,Postal code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# merging two adjusted dataframes
result=pd.merge(df_adj,coord_adj, on='Postal code')

In [12]:
# checking result

result.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


## York borough

In [23]:
# selecting all borough with YORK
york_data = result[result['Borough'].str.contains('York')].reset_index(drop=True)
york_data.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937


In [26]:
import folium
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [27]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [30]:
# create map of Toronto - York's boroughs using latitude and longitude values

map_york = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(york_data['Latitude'], york_data['Longitude'], york_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_york)  
    
map_york

## THE END