# <h1><center>Segmenting and Clustering Neighborhoods in Toronto</center></h1>

<p>First of all we will import <i>pandas</i> library for scraping and wrangling data.

In [55]:
import pandas as pd # library for data analsysis

<p>Now we will use <i>pandas</i> read_html() method to read list of postal codes of canada</p>

In [56]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
# Extract tables
dfs = pd.read_html(url)

# Get first table                                                                                                           
df = dfs[0]

<p>Our orignial data frame shows (180,3) shape dimension</p>

In [57]:
df.shape

(180, 3)

<p>We will only process the cells that have an <b>assigned borough</b> and we will ignore cells with a borough that is <b>Not assigned</b>.</p>

In [58]:
df = df[df['Borough']!='Not assigned']

<p>Now we will reset_index and display head</p>

In [59]:
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


<p>Displaying unique Postal Codes</p>

In [60]:
df['Postal Code'].unique()

array(['M3A', 'M4A', 'M5A', 'M6A', 'M7A', 'M9A', 'M1B', 'M3B', 'M4B',
       'M5B', 'M6B', 'M9B', 'M1C', 'M3C', 'M4C', 'M5C', 'M6C', 'M9C',
       'M1E', 'M4E', 'M5E', 'M6E', 'M1G', 'M4G', 'M5G', 'M6G', 'M1H',
       'M2H', 'M3H', 'M4H', 'M5H', 'M6H', 'M1J', 'M2J', 'M3J', 'M4J',
       'M5J', 'M6J', 'M1K', 'M2K', 'M3K', 'M4K', 'M5K', 'M6K', 'M1L',
       'M2L', 'M3L', 'M4L', 'M5L', 'M6L', 'M9L', 'M1M', 'M2M', 'M3M',
       'M4M', 'M5M', 'M6M', 'M9M', 'M1N', 'M2N', 'M3N', 'M4N', 'M5N',
       'M6N', 'M9N', 'M1P', 'M2P', 'M4P', 'M5P', 'M6P', 'M9P', 'M1R',
       'M2R', 'M4R', 'M5R', 'M6R', 'M7R', 'M9R', 'M1S', 'M4S', 'M5S',
       'M6S', 'M1T', 'M4T', 'M5T', 'M1V', 'M4V', 'M5V', 'M8V', 'M9V',
       'M1W', 'M4W', 'M5W', 'M8W', 'M9W', 'M1X', 'M4X', 'M5X', 'M8X',
       'M4Y', 'M7Y', 'M8Y', 'M8Z'], dtype=object)

<p>Finally the shape of our refined dataframe</p>

In [61]:
df.shape

(103, 3)

# Geographical coordinates of the neighborhoods in the Toronto

In [119]:
# define the dataframe columns
column_names = ['PostalCode','Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


In [120]:
filename = "https://cocl.us/Geospatial_data"
dt = pd.read_csv(filename)
dt.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [121]:
for index, row in df.iterrows():
    postal_code = row['Postal Code']
    borough = row['Borough']
    neighborhood = row['Neighborhood']
    
    lat = dt[dt['Postal Code']==postal_code].Latitude.to_string()
    lon = dt[dt['Postal Code']==postal_code].Longitude.to_string()
    
    neighborhoods = neighborhoods.append({'PostalCode':postal_code,
                                          'Borough': borough,
                                          'Neighborhood': neighborhood,
                                          'Latitude': lat,
                                          'Longitude': lon}, ignore_index=True)

In [124]:
neighborhoods.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,25 43.753259,25 -79.329656
1,M4A,North York,Victoria Village,34 43.725882,34 -79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",53 43.65426,53 -79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",71 43.718518,71 -79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",85 43.662301,85 -79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",93 43.667856,93 -79.532242
6,M1B,Scarborough,"Malvern, Rouge",0 43.806686,0 -79.194353
7,M3B,North York,Don Mills,26 43.745906,26 -79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",35 43.706397,35 -79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",54 43.657162,54 -79.378937
