# Question 3
### Explore and cluster Toronto neighborhood data

In [185]:
#recreate dataframe from parts 1 and 2
import pandas as pd
toronto_codes = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
toronto_codes = toronto_codes[toronto_codes['Borough'] != 'Not assigned'].reset_index(drop = True)
latlong = pd.read_csv('https://cocl.us/Geospatial_data')
latlong.sort_values(by = 'Postal Code', inplace = True)
toronto_codes.sort_values(by = 'Postal code', inplace = True)
toronto_codes.reset_index(drop = True, inplace = True)
toronto_codes[['Latitude','Longitude']] = latlong[['Latitude','Longitude']]

In [186]:
toronto_codes.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Let's plot a map with all the postal codes labeled

In [5]:
#install folium
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

In [187]:
toronto_map = folium.Map(location = [43.6532,-79.3832], zoom_start = 11)

for lat, long, bor, neigh in zip(toronto_codes['Latitude'],toronto_codes['Longitude'],toronto_codes['Borough'],toronto_codes['Neighborhood']):
    label = '{}, in {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=4,
        popup=label,
        color='green',
        fill=True,
        fill_opacity=0.5,
        parse_html=False).add_to(toronto_map)

toronto_map

## Make use of Foursquare

In [7]:
# The code was removed by Watson Studio for sharing.

In [12]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import numpy as np

### I'm going to cluster neighborhoods based on the most common kind of restaurant

In [127]:
#modify the function from the lab to search for restaurants within ~half a mile of each neighborhood center
def getNearbyRestaurants(names, latitudes, longitudes, radius=800, LIMIT = 50):
    
    restaurant_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query=restaurant'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue
        for v in results:
            try:
                restaurant_list.append((name, v['name'], v['categories'][0]['name']))
            except:
                restaurant_list.append((name, v['name'], np.nan))

    nearby_restaurants = pd.DataFrame(restaurant_list)
    nearby_restaurants.columns = ['Neighborhood', 
                  'Restaurant', 
                  'Restaurant Category']
    
    return(nearby_restaurants)

In [129]:
toronto_restaurants = getNearbyRestaurants(names=toronto_codes['Neighborhood'],
                                   latitudes=toronto_codes['Latitude'],
                                   longitudes=toronto_codes['Longitude']
                                  )


Malvern / Rouge
Rouge Hill / Port Union / Highland Creek
Guildwood / Morningside / West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park / Ionview / East Birchmount Park
Golden Mile / Clairlea / Oakridge
Cliffside / Cliffcrest / Scarborough Village West
Birch Cliff / Cliffside West
Dorset Park / Wexford Heights / Scarborough Town Centre
Wexford / Maryvale
Agincourt
Clarks Corners / Tam O'Shanter / Sullivan
Milliken / Agincourt North / Steeles East / L'Amoreaux East
Steeles West / L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview / Henry Farm / Oriole
Bayview Village
York Mills / Silver Hills
Willowdale / Newtonbrook
Willowdale
York Mills West
Willowdale
Parkwoods
Don Mills
Don Mills
Bathurst Manor / Wilson Heights / Downsview North
Northwood Park / York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill / Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West / Riverdale
India Bazaar / The Beaches 

NameError: name 'toronot_restaurants' is not defined

In [130]:
print(toronto_restaurants.shape)
toronto_restaurants.head()


(1078, 3)


Unnamed: 0,Neighborhood,Restaurant,Restaurant Category
0,Rouge Hill / Port Union / Highland Creek,Ted's Restaurant,Breakfast Spot
1,Guildwood / Morningside / West Hill,Wonder Season Chinese Restaurant,
2,Guildwood / Morningside / West Hill,Mahar Restaurant,
3,Guildwood / Morningside / West Hill,Peking Garden Restaurant,Chinese Restaurant
4,Guildwood / Morningside / West Hill,McDonald's,Fast Food Restaurant


In [131]:
toronto_restaurants_backup = toronto_restaurants #in case I mess up the dataframe

In [133]:
#replace NaN values with "Uncategorized" (which I realize I should have just done in the original function)
toronto_restaurants['Restaurant Category'].fillna('Uncategorized', inplace = True)
toronto_restaurants.head()

Unnamed: 0,Neighborhood,Restaurant,Restaurant Category
0,Rouge Hill / Port Union / Highland Creek,Ted's Restaurant,Breakfast Spot
1,Guildwood / Morningside / West Hill,Wonder Season Chinese Restaurant,Uncategorized
2,Guildwood / Morningside / West Hill,Mahar Restaurant,Uncategorized
3,Guildwood / Morningside / West Hill,Peking Garden Restaurant,Chinese Restaurant
4,Guildwood / Morningside / West Hill,McDonald's,Fast Food Restaurant


In [143]:
#one-hot encoding on restaurant category
toronto_onehot = pd.get_dummies(toronto_restaurants[['Restaurant Category']],prefix="", prefix_sep="")
toronto_onehot['Neighborhood'] = toronto_restaurants['Neighborhood']
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Bar,Beer Bar,Bistro,Breakfast Spot,Brewery,...,Szechuan Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Uncategorized,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
0,Agincourt,0.000000,0.083333,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.083333,0.000000,0.000000,0.00000,0.0
1,Alderwood / Long Branch,0.000000,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0
2,Bathurst Manor / Wilson Heights / Downsview North,0.000000,0.000000,0.0,0.000000,0.200000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.2
3,Bedford Park / Lawrence Manor East,0.000000,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.166667,0.000000,0.000000,0.00000,0.0
4,Berczy Park,0.000000,0.047619,0.0,0.047619,0.023810,0.02381,0.000000,0.023810,0.0,...,0.000000,0.023810,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.02381,0.0
5,Birch Cliff / Cliffside West,0.000000,0.000000,0.0,0.500000,0.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.500000,0.000000,0.000000,0.00000,0.0
6,Brockton / Parkdale Village / Exhibition Place,0.000000,0.000000,0.0,0.000000,0.083333,0.00000,0.000000,0.083333,0.0,...,0.000000,0.083333,0.00,0.083333,0.000000,0.000000,0.000000,0.083333,0.00000,0.0
7,Business reply mail Processing CentrE,0.000000,0.000000,0.0,0.000000,0.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0
8,CN Tower / King and Spadina / Railway Lands / ...,0.000000,0.000000,0.0,0.000000,1.000000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0
9,Caledonia-Fairbanks,0.000000,0.200000,0.0,0.000000,0.200000,0.00000,0.000000,0.000000,0.0,...,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.0


In [189]:
#k-means clustering
from sklearn.cluster import KMeans

k = 5
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped.drop('Neighborhood',1))
toronto_grouped.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_grouped.head()

Unnamed: 0,Cluster Labels,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Bar,Beer Bar,Bistro,Breakfast Spot,...,Szechuan Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Uncategorized,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint
0,2,Agincourt,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0
1,3,Alderwood / Long Branch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Bathurst Manor / Wilson Heights / Downsview North,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2
3,3,Bedford Park / Lawrence Manor East,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0
4,3,Berczy Park,0.0,0.047619,0.0,0.047619,0.02381,0.02381,0.0,0.02381,...,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0


In [161]:
df1 = toronto_grouped[['Cluster Labels', 'Neighborhood']]
toronto_neighborhoods_clustered = df1.merge(toronto_codes, on = 'Neighborhood', how = 'left').drop_duplicates()
toronto_neighborhoods_clustered

Unnamed: 0,Cluster Labels,Neighborhood,Postal code,Borough,Latitude,Longitude
0,2,Agincourt,M1S,Scarborough,43.794200,-79.262029
1,3,Alderwood / Long Branch,M8W,Etobicoke,43.602414,-79.543484
2,3,Bathurst Manor / Wilson Heights / Downsview North,M3H,North York,43.754328,-79.442259
3,3,Bedford Park / Lawrence Manor East,M5M,North York,43.733283,-79.419750
4,3,Berczy Park,M5E,Downtown Toronto,43.644771,-79.373306
5,3,Birch Cliff / Cliffside West,M1N,Scarborough,43.692657,-79.264848
6,3,Brockton / Parkdale Village / Exhibition Place,M6K,West Toronto,43.636847,-79.428191
7,3,Business reply mail Processing CentrE,M7Y,East Toronto,43.662744,-79.321558
8,3,CN Tower / King and Spadina / Railway Lands / ...,M5V,Downtown Toronto,43.628947,-79.394420
9,3,Caledonia-Fairbanks,M6E,York,43.689026,-79.453512


In [191]:
#plot map with clustered neighborhoods
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
toronto_map_clusters = folium.Map(location=[43.6532,-79.3832], zoom_start = 11)

# set color scheme for the clusters
col = ['green', 'blue', 'red', 'yellow','black']

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_neighborhoods_clustered['Latitude'], toronto_neighborhoods_clustered['Longitude'], toronto_neighborhoods_clustered['Neighborhood'], toronto_neighborhoods_clustered['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=4,
        popup=label,
        color=col[cluster-1],
        fill=True,
        fill_color=col[cluster-1],
        fill_opacity=0.5).add_to(toronto_map_clusters)
       
toronto_map_clusters

In [167]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [172]:
# toronto_grouped.drop('Cluster Labels', axis =1, inplace = True)

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_restaurants_sorted = pd.DataFrame(columns=columns)
toronto_restaurants_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_restaurants_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_restaurants_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Agincourt,Chinese Restaurant,Sandwich Place,American Restaurant,Indian Restaurant,Uncategorized
1,Alderwood / Long Branch,Korean Restaurant,Pizza Place,Wings Joint,German Restaurant,Falafel Restaurant
2,Bathurst Manor / Wilson Heights / Downsview North,Middle Eastern Restaurant,Bar,Sandwich Place,Wings Joint,Indian Restaurant
3,Bedford Park / Lawrence Manor East,Restaurant,Italian Restaurant,Uncategorized,Filipino Restaurant,Sushi Restaurant
4,Berczy Park,Restaurant,Fast Food Restaurant,Japanese Restaurant,American Restaurant,Italian Restaurant


In [173]:
toronto_restaurants_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [174]:
toronto_restaurants_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2,Agincourt,Chinese Restaurant,Sandwich Place,American Restaurant,Indian Restaurant,Uncategorized
1,3,Alderwood / Long Branch,Korean Restaurant,Pizza Place,Wings Joint,German Restaurant,Falafel Restaurant
2,3,Bathurst Manor / Wilson Heights / Downsview North,Middle Eastern Restaurant,Bar,Sandwich Place,Wings Joint,Indian Restaurant
3,3,Bedford Park / Lawrence Manor East,Restaurant,Italian Restaurant,Uncategorized,Filipino Restaurant,Sushi Restaurant
4,3,Berczy Park,Restaurant,Fast Food Restaurant,Japanese Restaurant,American Restaurant,Italian Restaurant


In [177]:
toronto_restaurants_sorted.loc[toronto_restaurants_sorted['Cluster Labels'] == 0]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
26,0,Eringate / Bloordale Gardens / Old Burnhamthor...,Restaurant,Wings Joint,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant
37,0,Humber Summit,Restaurant,Wings Joint,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant
77,0,Victoria Village,Restaurant,Wings Joint,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant
83,0,Woburn,Restaurant,Wings Joint,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant


In [178]:
toronto_restaurants_sorted.loc[toronto_restaurants_sorted['Cluster Labels'] == 1]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
27,1,Fairview / Henry Farm / Oriole,Japanese Restaurant,Middle Eastern Restaurant,Wings Joint,German Restaurant,Falafel Restaurant
57,1,Rosedale,Japanese Restaurant,Wings Joint,German Restaurant,Falafel Restaurant,Fast Food Restaurant


In [180]:
toronto_restaurants_sorted.loc[toronto_restaurants_sorted['Cluster Labels'] == 2]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,2,Agincourt,Chinese Restaurant,Sandwich Place,American Restaurant,Indian Restaurant,Uncategorized
12,2,Central Bay Street,Chinese Restaurant,Restaurant,Indian Restaurant,Dim Sum Restaurant,Diner
15,2,Clarks Corners / Tam O'Shanter / Sullivan,Chinese Restaurant,Caribbean Restaurant,Wings Joint,German Restaurant,Fast Food Restaurant
16,2,Cliffside / Cliffcrest / Scarborough Village West,Chinese Restaurant,Wings Joint,Event Space,Fast Food Restaurant,Filipino Restaurant
20,2,Del Ray / Mount Dennis / Keelsdale and Silvert...,Chinese Restaurant,Wings Joint,Event Space,Fast Food Restaurant,Filipino Restaurant
22,2,Dorset Park / Wexford Heights / Scarborough To...,Chinese Restaurant,Indian Restaurant,Middle Eastern Restaurant,Wings Joint,German Restaurant
23,2,Downsview,Chinese Restaurant,Latin American Restaurant,Restaurant,Filipino Restaurant,Turkish Restaurant
36,2,Hillcrest Village,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Wings Joint,General Entertainment
38,2,Humewood-Cedarvale,Breakfast Spot,Middle Eastern Restaurant,Chinese Restaurant,Wings Joint,German Restaurant
41,2,Kensington Market / Chinatown / Grange Park,Chinese Restaurant,Vietnamese Restaurant,Dim Sum Restaurant,Korean Restaurant,Asian Restaurant


In [181]:
toronto_restaurants_sorted.loc[toronto_restaurants_sorted['Cluster Labels'] == 3]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,3,Alderwood / Long Branch,Korean Restaurant,Pizza Place,Wings Joint,German Restaurant,Falafel Restaurant
2,3,Bathurst Manor / Wilson Heights / Downsview North,Middle Eastern Restaurant,Bar,Sandwich Place,Wings Joint,Indian Restaurant
3,3,Bedford Park / Lawrence Manor East,Restaurant,Italian Restaurant,Uncategorized,Filipino Restaurant,Sushi Restaurant
4,3,Berczy Park,Restaurant,Fast Food Restaurant,Japanese Restaurant,American Restaurant,Italian Restaurant
5,3,Birch Cliff / Cliffside West,Uncategorized,Asian Restaurant,Wings Joint,German Restaurant,Falafel Restaurant
6,3,Brockton / Parkdale Village / Exhibition Place,Japanese Restaurant,Thai Restaurant,Indian Restaurant,Fast Food Restaurant,Diner
7,3,Business reply mail Processing CentrE,Falafel Restaurant,Italian Restaurant,Wings Joint,German Restaurant,Fast Food Restaurant
8,3,CN Tower / King and Spadina / Railway Lands / ...,Bar,Wings Joint,Greek Restaurant,Fast Food Restaurant,Filipino Restaurant
9,3,Caledonia-Fairbanks,Spanish Restaurant,American Restaurant,Bar,Latin American Restaurant,Middle Eastern Restaurant
10,3,Canada Post Gateway Processing Centre,Indian Restaurant,Uncategorized,Turkish Restaurant,Restaurant,Chinese Restaurant


In [183]:
toronto_restaurants_sorted.loc[toronto_restaurants_sorted['Cluster Labels'] == 4]

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
44,4,Leaside,Indian Restaurant,Sushi Restaurant,Wings Joint,German Restaurant,Falafel Restaurant
73,4,The Kingsway / Montgomery Road / Old Mill North,Sushi Restaurant,Wings Joint,German Restaurant,Falafel Restaurant,Fast Food Restaurant
85,4,York Mills West,Diner,Sushi Restaurant,Wings Joint,German Restaurant,Falafel Restaurant
