# City of Toronto Data
### Using Wikipedia, I scraped data to explore, segment, and cluster the neighborhoods in the city of Toronto

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np


In [2]:
#URL to wiki page
wiki_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# download wiki page 
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

#### Downloading and Extracting Data From Wikipedia
##### Assumptions: Wikipedia data is correct


In [3]:
# use data attribute text to extract XML as a string and assign to page
page = (wiki_page.text)

# extracting data
beginning= page.find('<table class="wikitable sortable">')
end= page.find('</tbody></table>')

wiki_table_text = (page[beginning:end])
# print (wiki_table_text)

In [4]:
# parse html and create pandas
soup = BeautifulSoup(wiki_table_text,"html.parser")
table_rows = soup.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
        
# drop rows with value 'not assigned' in borough column
df = pd.DataFrame(res, columns=["Postcode", "Borough", "Neighbourhood"])
df_filtered = df.query("Borough != 'Not assigned'")


#### Cleaning the Data
##### Assumptions: Dropped data with unassigned boroughs, unassigned Neighborhoods were replaced with Borough name

In [5]:
#group by Postcode and Borough to make neighbourhood a list
def create_list(neighbourhood_list):
    final_string = ""
    for neighbourhood in neighbourhood_list:
        final_string = final_string + neighbourhood + ', '
    return final_string[:-2]

df_new = df_filtered.groupby(['Postcode', 'Borough']).agg({'Neighbourhood': create_list})

In [6]:
df_new = df_new.reset_index()

In [7]:
df_new['Neighbourhood'] = np.where (df_new['Neighbourhood'] == 'Not assigned', df_new['Borough'], df_new['Neighbourhood'])
df_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [8]:
df_new.shape

(103, 3)

In [9]:
# import sys

# !{sys.executable} -m pip install geocoder

In [10]:
# import geocoder 

# def get_coords(row):
#     Postcode = row['Postcode']
#     # initialize your variable to None
#     lat_lng_coords = None

#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(Postcode))
#         lat_lng_coords = g.latlng

#     latitude = lat_lng_coords[0]
#     longitude = lat_lng_coords[1]
#     return {latitude:lat_lng_coords[0], longitude:lat_lng_coords[1]}

# df_new['Latitude'] = df_new.apply(get_coords, axis=1)
# df_new

In [21]:
# Read data from file 'filename.csv' 
coords_data = pd.read_csv("Geospatial_Coordinates.csv") 
# Preview the first 5 lines of the loaded data 
coords_data.head()

FileNotFoundError: File b'Geospatial_Coordinates.csv' does not exist

In [22]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [34]:
df_data = df_data_1.rename(columns={'Postal Code': 'Postcode'})
df_all = df_new.set_index('Postcode').join(df_data.set_index('Postcode'))
df_all


Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
M1J,Scarborough,Scarborough Village,43.744734,-79.239476
M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
