#### Importing Required Libraries

In [6]:
import numpy as np
import pandas as pd
import json
import folium
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans


#### Scrapping table from Wikipedia page into pandas dataframe

In [17]:
toronto_table = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)
toronto_df = toronto_table[0]
print(toronto_df.head())
print(toronto_df.shape)

  Postal Code           Borough               Neighborhood
0         M1A      Not assigned               Not assigned
1         M2A      Not assigned               Not assigned
2         M3A        North York                  Parkwoods
3         M4A        North York           Victoria Village
4         M5A  Downtown Toronto  Regent Park, Harbourfront
(180, 3)


#### Renaming Columns as required by guidelines

In [18]:
toronto_df = toronto_df.rename(columns={'Postal Code': 'PostalCode'})
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


#### Doing a general EDA to know how many Not Assigned Boroughs are present in the data

In [19]:
toronto_df['Borough'].value_counts()

Not assigned        77
North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
York                 5
East York            5
Mississauga          1
Name: Borough, dtype: int64

#### Dropping rows where Borough is Not assinged and reseting the index.

In [20]:
toronto_df = toronto_df[toronto_df['Borough'] != 'Not assigned'].reset_index(drop=True)
print(toronto_df.shape)
print(toronto_df.head())

(103, 3)
  PostalCode           Borough                                 Neighborhood
0        M3A        North York                                    Parkwoods
1        M4A        North York                             Victoria Village
2        M5A  Downtown Toronto                    Regent Park, Harbourfront
3        M6A        North York             Lawrence Manor, Lawrence Heights
4        M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government


#### Joining Neighborhoods in the same row which have same Postal Code and Borough

In [21]:
toronto_df = toronto_df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: ','.join(x)).reset_index()
toronto_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Printing the shape of Filtered/ Clean DataFrame having Toronto Neighborhood Data

In [29]:
toronto_df.shape

(103, 3)

#### Reading Geospatial Data CSV to get latitude and longitude of neighborhoods in cleaned toronto dataframe

In [31]:
geospatial_df = pd.read_csv('https://cocl.us/Geospatial_data')
geospatial_df = geospatial_df.rename(columns={'Postal Code': 'PostalCode'})
geospatial_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Joining geospatial dataframe with toronto neighborhood dataframe on the key Postal Code

In [34]:
joined_df = pd.merge(toronto_df, geospatial_df, on='PostalCode', how='left')
print(joined_df.head())
print(joined_df.shape)

  PostalCode      Borough                            Neighborhood   Latitude  \
0        M1B  Scarborough                          Malvern, Rouge  43.806686   
1        M1C  Scarborough  Rouge Hill, Port Union, Highland Creek  43.784535   
2        M1E  Scarborough       Guildwood, Morningside, West Hill  43.763573   
3        M1G  Scarborough                                  Woburn  43.770992   
4        M1H  Scarborough                               Cedarbrae  43.773136   

   Longitude  
0 -79.194353  
1 -79.160497  
2 -79.188711  
3 -79.216917  
4 -79.239476  
(103, 5)


#### Doing a general EDA for the Toronto Data

In [36]:
print('Toronto has {} boroughs and {} neighborhoods.'.format(
        len(joined_df['Borough'].unique()),
        joined_df.shape[0]
    ))

Toronto has 10 boroughs and 103 neighborhoods.


#### Using GeoLocator Library to find coordinates for Toronto, CA

In [37]:
address = 'Toronto, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Canada are 43.6534817, -79.3839347.


  app.launch_new_instance()


In [38]:
neighbor = joined_df

### Creating Map of Toronto

In [41]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbor['Latitude'], neighbor['Longitude'], neighbor['Borough'], neighbor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='black',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Finding Boroughs where Toronto is present in the Borough name

In [45]:
neighbor['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
East York            5
York                 5
Mississauga          1
Name: Borough, dtype: int64

#### We will be doing Cluster analysis in 4 regions: Downtown Toronto, Central Toronto, West Toronto, East Toronto

In [48]:
df = neighbor[neighbor['Borough'].isin(['Downtown Toronto', 'Central Toronto', 'West Toronto', 'East Toronto'])].reset_index(drop=True)
print(df.head(10))
print(df.shape)

  PostalCode          Borough  \
0        M4E     East Toronto   
1        M4K     East Toronto   
2        M4L     East Toronto   
3        M4M     East Toronto   
4        M4N  Central Toronto   
5        M4P  Central Toronto   
6        M4R  Central Toronto   
7        M4S  Central Toronto   
8        M4T  Central Toronto   
9        M4V  Central Toronto   

                                        Neighborhood   Latitude  Longitude  
0                                        The Beaches  43.676357 -79.293031  
1                       The Danforth West, Riverdale  43.679557 -79.352188  
2                     India Bazaar, The Beaches West  43.668999 -79.315572  
3                                    Studio District  43.659526 -79.340923  
4                                      Lawrence Park  43.728020 -79.388790  
5                                   Davisville North  43.712751 -79.390197  
6                  North Toronto West, Lawrence Park  43.715383 -79.405678  
7                   

#### Visualizing 4 Areas that we are considering

In [50]:
# create map of Toronto using latitude and longitude values
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='black',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map