# **Toronto Neighborhoods**

## PART 1

In [19]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

Initialize BeautifulSoup and download text from Wikipedia

In [20]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

Alter the data and convert it to dataframe

In [21]:
# Initialize variables
first = []
pc = []
bo = []
ne = []

# Find the table and scrape content
table = soup.find('table', class_='wikitable sortable')
assigned = table.find_all('td')

for row in table.find_all('td'):
    first.append(row.text)
    
# Iterate through data and save it to 3 separate lists
rows = len(first)//3

for i in range(0,rows,3):
    pc.append(first[i])
for i in range(1,rows,3):
    bo.append(first[i])
for i in range(2,rows,3):
    ne.append(first[i].strip())

# Initialize dataframe and fill it with content
df = pd.DataFrame()
df['Postcode'] = pc
df['Borough'] = bo
df['Neighbourhood'] = ne

# Drop rows where 'Borough' is not assigned
df = df[df.Borough != 'Not assigned']

# Group Neighbourhoods with same Postcode
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x.astype(str).unique())).reset_index()

# Assign name to Neighbourhood if 'Not assigned'
count = range(df.shape[0])

for i in count:
    if (df['Neighbourhood'][i] == 'Not assigned'):
        df['Neighbourhood'][i] = df['Borough'][i]

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [22]:
df.shape

(41, 3)

---------------------------------------------------

## PART 2

Get coordinates using Geocoder

In [5]:
import geocoder # import geocoder
import numpy as np

In [None]:
# initialize your variable to None
lat_lng_coords = None

# Get random Postal Code from list
rnum = np.random.randint(low=0, high=40, size=1)
postal_code = df['Postcode'][rnum]
print('Your postal code is: '+ postal_code)

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

print(latitude, longitude)

16    Your postal code is: M4A
Name: Postcode, dtype: object


**Doesn't work too well, so I'm gonna use the .csv**

In [23]:
df_loc = pd.read_csv('https://cocl.us/Geospatial_data')

print('Data downloaded and read into a dataframe!')

Data downloaded and read into a dataframe!


In [30]:
df_combined = df.merge(df_loc,left_on='Postcode',right_on='Postal Code',how='left')
df_combined = df_combined.drop(['Postal Code'], axis=1)
df_combined

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M2H,North York,Hillcrest Village,43.803762,-79.363452
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
9,M2K,North York,Bayview Village,43.786947,-79.385975


In [18]:
numbers = [1, 2, 3]
letters = ["A", "B", "C"]

for numbers_value, letters_value in zip(numbers, letters):
    print(numbers_value, letters_value)

1 A
2 B
3 C
