# Applied Data Science Capstone
#### (This notebook will be mainly used for the capstone project)

* You might need to install some libraries to run this notebook. Uncomment any of these lines if you haven't installed yet

In [None]:
#GEOCODER
#!conda install -c conda-forge/label/cf202003 geocoder
#print('Geocoder installed successfully')

#FOLIUM
#!conda install -c conda-forge folium=0.5.0 --yes
#print('Folium installed successfully')

In [10]:
import pandas as pd
import numpy as np
import geocoder
import folium

## Getting Canada's Postal Codes

In [38]:
#Reading the Wikipedia table
link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
CanadaPC = pd.read_html(link)[0]

#CLEANING THE TABLE
# Filters Not Assigend Borough
df_clean = CanadaPC.loc[(CanadaPC['Borough'] != 'Not assigned')] 
# If there's no Neighbohood assigns Borough
df_clean.Neighborhood.fillna(df_clean.Borough, inplace=True)           
# Resets the index of the DF
df_clean = df_clean.reset_index(drop=True)                             
# Replaces '/' separator to ',' separtor for multiple neighborhoods
df_clean['Neighborhood'].replace(to_replace ='/', value = ',', regex = True,inplace=True)


#Showing df
df_clean.head(20)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [13]:
df_clean.shape

(103, 3)

## Getting Neighborhood Locations

#### Testing one Postal Code to check geocoder

In [31]:
# initialize your variable to None
lat_lng_coords = None

postal_code = 'M5G'
# loop until you get the coordinates
while(lat_lng_coords is None):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
    lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

print(latitude)
print(longitude)

43.65607218800005
-79.38565318999997


#### Getting coords for every Postal Code

In [36]:
neighborhoods = pd.DataFrame(columns=['Postal_Code', 'Latitude', 'Longitude'])

for i in range(len(df)) :
    
    lat_lng_coords = None

    postal_code = df.loc[i, "Postal code"]
    
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    neighborhood_lat = lat_lng_coords[0]
    neighborhood_lon = lat_lng_coords[1]
    
    neighborhoods = neighborhoods.append({'Postal_Code': postal_code,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

neighborhoods.head()

Unnamed: 0,Postal_Code,Latitude,Longitude
0,M3A,43.752935,-79.335641
1,M4A,43.728102,-79.31189
2,M5A,43.650964,-79.353041
3,M6A,43.723265,-79.451211
4,M7A,43.66179,-79.38939


In [41]:
neighborhoods.shape

(103, 3)

In [39]:
df = df_clean.set_index('Postal code').join(neighborhoods.set_index('Postal_Code'))

In [45]:
df.reset_index()
df.head(20)

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.752935,-79.335641
M4A,North York,Victoria Village,43.728102,-79.31189
M5A,Downtown Toronto,"Regent Park , Harbourfront",43.650964,-79.353041
M6A,North York,"Lawrence Manor , Lawrence Heights",43.723265,-79.451211
M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66179,-79.38939
M9A,Etobicoke,Islington Avenue,43.667481,-79.528953
M1B,Scarborough,"Malvern , Rouge",43.808626,-79.189913
M3B,North York,Don Mills,43.7489,-79.35722
M4B,East York,"Parkview Hill , Woodbine Gardens",43.707193,-79.311529
M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529


### Obtaining Toronto Neighborhoods only

In [54]:
df.Borough.unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

In [50]:
toronto_data = df[df['Borough'].str.contains('Toronto', regex=False)]
print(toronto_data.shape)
toronto_data.head(10)

(39, 4)


Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M5A,Downtown Toronto,"Regent Park , Harbourfront",43.650964,-79.353041
M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.66179,-79.38939
M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529
M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
M4E,East Toronto,The Beaches,43.678148,-79.295349
M5E,Downtown Toronto,Berczy Park,43.645196,-79.373855
M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653
M6G,Downtown Toronto,Christie,43.668602,-79.420387
M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650542,-79.384116
M6H,West Toronto,"Dufferin , Dovercourt Village",43.66491,-79.438664


## Map of Toronto Neighborhoods

In [55]:
#First we need to obtain Toronto's coords
g = geocoder.arcgis('Toronto, ON')
lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.648690000000045, -79.38543999999996.


In [56]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto