# Segmenting and Clustering Neighborhoods in Toronto - Part 3: Clustering

## 1. Having explored Toronto's neighborhoods in part 2, we complete the task by clustering them.

In [51]:
import pandas as pd

## 2. Import longitude and latitude information for each postal code.

In [52]:
latlong = pd.read_csv("http://cocl.us/Geospatial_data/") 
latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [53]:
#Rename "Postal Code" with "PostalCode"
latlong.columns = ['PostalCode','Latitude','Longitude']
latlong.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 3. Make sure the 'new' dataframe's shape is the same as that described by the question

In [54]:
latlong.shape

(103, 3)

In [55]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
table = pd.read_html(url, header=0,keep_default_na=False) 
tdf = table[0]
tdf.columns = ['PostalCode','Borough','Neighborhood']
tdf = tdf.query('Borough != "Not assigned"').reset_index(drop=True)
tdf1=tdf.groupby('PostalCode', as_index=False).agg(lambda x: ', '.join(set(x.dropna())))
tdf1.loc[tdf1['Neighborhood'] == 'Not assigned', 'Neighborhood' ] = tdf1['Borough']
tdf1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union"
2,M1E,Scarborough,"Morningside, West Hill, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 4. We merge the tables, with Postal Code, Borough, Neighborhood, Latitude, and Longitude as column headings.

In [56]:
tdf2 = pd.merge(left=tdf1, right=latlong, on="PostalCode", how="right")
tdf2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## 5. Now, we'll take a closer look at Toronto.

### I could not import Nominatum, despite my best efforts - so am jumping straight to latitude and longitude. 

In [57]:
import folium 

# create map of York using latitude and longitude values

map_Toronto = folium.Map(location=[43.761539, -79.411079], zoom_start=11)

# add markers to map
for lat, lng, bor, nei in zip(tdf2['Latitude'], tdf2['Longitude'], tdf2['Borough'], tdf2['Neighborhood']):
    
    label = '{}, {}'.format(nei, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Toronto)  
    
map_Toronto

#### Define Foursquare Credentials and Version

In [48]:
# define Foursquare Credentials and Version
CLIENT_ID = 'PKVKP4RSK1UDWBSX241ZZ2BFGZ2055XNG13MU1OZ5G4HO0FY' # your Foursquare ID
CLIENT_SECRET = 'IGWQRC4QT0EDQ0SSRIB4MEURVPRJ2TYL2LJP1BK55TZVGDIU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: PKVKP4RSK1UDWBSX241ZZ2BFGZ2055XNG13MU1OZ5G4HO0FY
CLIENT_SECRET:IGWQRC4QT0EDQ0SSRIB4MEURVPRJ2TYL2LJP1BK55TZVGDIU
