### This Notebook will be used for the neighbourhood analysis capstone project for coursera

In [1]:
import pandas as pd
import numpy as np

#### Reading the contents from a wikipedia page

In [14]:
table=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

#### Retrieving the contents of the table to a dataframe

In [15]:
df=table[0]

#### Setting Postal Code as the index of the dataframe

In [16]:
df.set_index(df["Postal Code"], inplace=True)

In [17]:
df.drop("Postal Code", axis=1, inplace=True)

In [18]:
df

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...
M5Z,Not assigned,Not assigned
M6Z,Not assigned,Not assigned
M7Z,Not assigned,Not assigned
M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


#### Removing any boroughs which are not assigned

In [19]:
df=df[df.Borough!="Not assigned"]
df

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
M4Y,Downtown Toronto,Church and Wellesley
M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


#### Checking for unassigned neighbourhoods with assigned boroughs

In [20]:
df[df.Neighbourhood=="Not assigned"]

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1


In [21]:
df.shape

(103, 2)

#### Getting the Geographical location data of every neighbourhood in every borough

In [22]:
ll_data=pd.read_csv("C:/Users/Srinivasa Rao/Downloads/Geospatial_Coordinates.csv", index_col="Postal Code")
ll_data

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


#### Assigning the latitudes and longitudes to their respective locations

In [23]:
df["Latitude"]=ll_data["Latitude"]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Latitude"]=ll_data["Latitude"]


Unnamed: 0_level_0,Borough,Neighbourhood,Latitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M3A,North York,Parkwoods,43.753259
M4A,North York,Victoria Village,43.725882
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301
...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654
M4Y,Downtown Toronto,Church and Wellesley,43.665860
M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258


In [24]:
df["Longitude"]=ll_data["Longitude"]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Longitude"]=ll_data["Longitude"]


Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [25]:
from geopy.geocoders import Nominatim 

import requests
from pandas.io.json import json_normalize


import matplotlib.cm as cm
import matplotlib.colors as colors


from sklearn.cluster import KMeans


import folium
print('Libraries imported.')

Libraries imported.


#### Obtaining the location of Toronto, Canada to create a map

In [26]:
address="Toronto, Canada"
geolocator=Nominatim(user_agent="Toronto_Locator")
location=geolocator.geocode(address)
longitude=location.longitude
latitude=location.latitude
print("Goegraphical co-ordinates of Toronto:({},{})".format(longitude,latitude))

Goegraphical co-ordinates of Toronto:(-79.3839347,43.6534817)


#### Generating the map of Canada with each location marked with a circle

In [32]:
map_toronto=folium.Map(location=[latitude,longitude], zoom_level=10)

for lat, long, borough, neighbourhood in zip(df["Latitude"], df["Longitude"], df["Borough"], df["Neighbourhood"]):
    label='{},{}'.format(neighbourhood, borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat, long],
    radius=3,
    popup=label,
    color='blue',
    fill=True,
    fill_colour='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
    
map_toronto

#### Generating a clustered map to cluster nearby locations

In [37]:
from folium import plugins

map_toronto=folium.Map(location=[latitude,longitude], zoom_level=10)

clusters=plugins.MarkerCluster().add_to(map_toronto)

for lat, long, borough, neighbourhood in zip(df["Latitude"], df["Longitude"], df["Borough"], df["Neighbourhood"]):
    label='{},{}'.format(neighbourhood, borough)
    label=folium.Popup(label,parse_html=True)
    folium.Marker(
    location=[lat, long],
    popup=label).add_to(clusters)
    
map_toronto
