# Getting location of the neighborhoods

# (Please go at end to see code for this Part)

## A) Loading the required table from Wikipedia

In [2]:
#importing Required Packages

import pandas as pd

In [3]:
tables=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [4]:
len(tables)

3

In [7]:
df=tables[0]

In [8]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [9]:
df.shape

(180, 3)

## B) Removing "Not assigned" Borough

In [12]:
#Removing Not Assigned Borough
df=df[df['Borough']!= "Not assigned"]

In [13]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## C) Checking for Postal Codes that have more than 1 rows

In [17]:
df.describe()

Unnamed: 0,Postal Code,Borough,Neighborhood
count,103,103,103
unique,103,10,99
top,M2M,North York,Downsview
freq,1,24,4


We see that all Postal codes have a unique values. Hence we don't need to combine the rows with same postal codes but different neighborhoods.

## D) Checking for Neighborhoods which have "Not assigned" value but has a value for Borough

In [15]:
df[df['Neighborhood']== "Not assigned"]

Unnamed: 0,Postal Code,Borough,Neighborhood


We find that no such values are present when checking for Neighborhoods which have "Not assigned" value but has a value for Borough.  
So we don't need to assign the same neighborhood name as borough name. 

## E) Checking No of rows in the dataframe

In [22]:
print("There are {} no of rows present".format(df.shape[0]))

There are 103 no of rows present


#### Hence we notice that there are 103 rows present.

## Resetting Index

In [42]:
df=df.reset_index(drop=True)

# Q2) Getting the location of Neighborhoods

In [30]:
#Importing Required Package
from geopy.geocoders import Nominatim

We create a blank list of latitude and longitude and then store the latitude and longitude of each location in the list

In [68]:
geolocater=Nominatim(user_agent="toronto_agent")
latitude=[]
longitude=[]

for i in range(0,len(df)):
   # initialize variable to None
   location= None
   

   address =df['Neighborhood'][i]+","+df["Borough"][i]
   location = geolocater.geocode(address)
   
   if(location ==None): 
     address =df["Borough"][i]
     location = geolocater.geocode(address)
   
   latitude.append(location.latitude)
   longitude.append(location.longitude)


In [71]:
#Adding the columns to the dataframe
df["Latitude"]=latitude
df["Longitude"]=longitude

In [72]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4A,North York,Victoria Village,43.732658,-79.311189
1,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654174,-79.380812
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.754326,-79.449117
3,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.654174,-79.380812
4,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.668499,-79.53397
