# PART 1

## Import required packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Storing the url and reading data from the url

In [2]:
html = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
table = pd.read_html(html)

## Just checking the type of the data

In [3]:
type(table[0])

pandas.core.frame.DataFrame

## Copying the contents of the table to variable data

In [4]:
data = table[0].copy()

## Original table from wikipedia

In [5]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


## Removing the rows which who's borough is not assigned

In [6]:
data = data[data.Borough != 'Not assigned']

In [7]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## Finding out the rows who's Neighbourhood is not assigned

In [8]:
data[data.Neighbourhood == 'Not assigned'] ## No rows

Unnamed: 0,Postal Code,Borough,Neighbourhood


## Using the. shapre metho to print number of rows of the dataframe

In [9]:
data.shape[0]

103

# PART 2

## Load the geo codes data

In [10]:
geo_codes = pd.read_csv('http://cocl.us/Geospatial_data')

## Store the postal code values from the original data

In [11]:
post_code = data['Postal Code']

## Store the latitude and longitude values for corresponding postal codes

In [12]:
latitude = []
longitude = []
for pcd in post_code:
    i = 0
    for pcg in geo_codes['Postal Code']:
        if pcd == pcg:
            #print('i = ', i)
            latitude.append(geo_codes['Latitude'][i])
            longitude.append(geo_codes['Longitude'][i])
        i += 1

## Adding the columns longitude and latitude to the earlier created data frame

In [13]:
data['Latitude'] = latitude
data['Longitude'] = longitude

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## Display the data

In [14]:
data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
165,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# PART 3

## Importing necessary libraries

In [15]:
import json

from geopy.geocoders import Nominatim 

import requests 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors


from sklearn.cluster import KMeans

import folium # map rendering library


In [16]:
column_names = ['Borough', 'Neighbourhood', 'Latitude', 'Longitude'] 

In [17]:
neighborhoods = data[column_names]

In [18]:
neighborhoods.reset_index(drop = True, inplace = True)

In [19]:
neighborhoods.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [20]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


## Finding the latitude and longitude values for toronto

In [21]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## Finding the boroughs with toronto

In [22]:
toronto_data = neighborhoods[neighborhoods['Borough'].str.contains('Toronto',regex=False)]

## Creating map

In [23]:
map_toronto = folium.Map(location=[43.6534817, -79.3839347], zoom_start=10)

In [24]:
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

In [25]:
map_toronto

## Using K-Means clustering to cluster the neighbourhoods in toronto

In [26]:
## Importing libraries
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

In [27]:
k = 6
cluster = toronto_data[['Latitude', 'Longitude']]
kmeans = KMeans(n_clusters = k, random_state = 0).fit(cluster)
kmeans.labels_
toronto_data.insert(0, 'Cluster Labels', kmeans.labels_)

In [28]:
toronto_data.head()

Unnamed: 0,Cluster Labels,Borough,Neighbourhood,Latitude,Longitude
2,0,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,2,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,2,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,2,Downtown Toronto,St. James Town,43.651494,-79.375418
19,4,East Toronto,The Beaches,43.676357,-79.293031


In [29]:
map_clust = folium.Map(location=[43.651070,-79.347015],zoom_start=10)


x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, neighbourhood, cluster in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood'], toronto_data['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clust)
       
map_clust