# **Week 3 capstone assignment Part 2**

# Sementing and Clustering Neighborhoods in Toronto

#### Preprocessing from earlier notebook

In [46]:
import pandas as pd
import requests
import bs4

res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = bs4.BeautifulSoup(res.text,"lxml")

wiki_table = soup.find("table",{"class":"wikitable sortable"})
rows = wiki_table.find_all("tr")

postal_codes = []
boroughs = []
neighborhoods = []

for row in rows:
    cells = row.find_all("td")
    if len(cells) > 0:
        postal_codes.append(cells[0].text.strip())
        boroughs.append(cells[1].text.strip())
        neighborhoods.append(cells[2].text.strip())

# zip lists so each row has one of each value
combined_list = list(zip(postal_codes, boroughs, neighborhoods))
column_names = ['PostalCode', 'Borough', 'Neighborhood']

df = pd.DataFrame(combined_list, columns=column_names)

df = df[df.Borough != 'Not assigned']
df.reset_index(drop=True)

df

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


---

---

# Create dataframe like

<img src="week3_img2.png">

In [47]:
# Organize the dataframe a little
# Sort by PostalCode column and reset index count to 0

df.sort_values(by=['PostalCode'])
df.reset_index(drop=True, inplace=True)

df['Latitude'] = ""
df['Longitude'] = ""
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",,
3,M6A,North York,"Lawrence Manor, Lawrence Heights",,
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",,
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",,
99,M4Y,Downtown Toronto,Church and Wellesley,,
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",,
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",,


**NOTE: Geocoder requests don't seem to be working, will continue with CSV provided.**

In [3]:
# NOTE: DO NOT USE
# import geocoder

# for index, row in df.iterrows():
#     lat_lng_coords = None
#     postal_code = row['PostalCode']

#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#       g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#       lat_lng_coords = g.latlng

#     df.loc[index, 'Latitude'] = lat_lng_coords[0]
#     df.loc[index, 'Longitude'] = lat_lng_coords[1]
    
# df

### Read in CSV

In [40]:
filename = 'Geospatial_Coordinates.csv'
csv_df = pd.read_csv(filename)
csv_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### For PostalCode in DataFrame, add Lat/Lon values from csv

In [49]:
for index, row in df.iterrows():
    postal_code = row['PostalCode']
    csv_row = csv_df.loc[csv_df['Postal Code'] == postal_code]
    
    # Assign first row [0] column Lat/Lon from csv_row to df row @ index on column Lat/Lon
    df.loc[index, 'Latitude'] = csv_row.iloc[0]['Latitude']
    df.loc[index, 'Longitude'] = csv_row.iloc[0]['Longitude']

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6543,-79.3606
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7185,-79.4648
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6623,-79.3895


### Final DataFrame

In [60]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6543,-79.3606
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7185,-79.4648
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6623,-79.3895
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.6537,-79.5069
99,M4Y,Downtown Toronto,Church and Wellesley,43.6659,-79.3832
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.6627,-79.3216
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.6363,-79.4985
