# The Battle of Neighborhood: An Italian adventure in Toronto

Before starting the exam, let's import the libraries

In [392]:
import requests # library to handle requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np # library to handle data in a vectorized manner
import requests
# Matplotlib and associated plotting modules
import matplotlib as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# import k-means from clustering stage
from sklearn.cluster import KMeans
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API la
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.10

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.10

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Libraries imported.


# Web Scraping: let's get the data from Wikipedia

Let's get the data from Wikipedia!

In [393]:
postalcode_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(postalcode_url,'lxml')

Let's generate the Table by doing some web scraping

In [394]:
My_table = soup.find('table',{'class':'wikitable sortable'})
#select all the cells
cells = My_table.find_all('td')

#Generate empty lists for Postalcode, Borough and Neighborhood, and then populate them with for cycles 
post= []
for i in range(0, len(cells), 3):
      post.append(cells[i].text.strip())
bor = []
for j in range(1, len(cells), 3):
    bor.append(cells[j].text.strip())
nei = []
for k in range(2, len(cells), 3):
    nei.append(cells[k].text.strip())

df = pd.DataFrame(data=[post, bor, nei]).transpose() #Need to do that to have the same table, so we transpose it 
df.columns = ['Postalcode', 'Borough', 'Neighborhood']
df #let's have a look at our table

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


Let's Exclude from the table all the cells with no assigned Borough, generating a new dataframe

In [395]:
df1= df[df.Borough != 'Not assigned']
df1

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


Combine into one row the neighboorhoods separated with a comma

In [396]:
df2=df1.groupby(['Postalcode', 'Borough'])['Neighborhood'].apply(','.join).reset_index()
df2

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


When the Neighborhood is not assigned, we make it equal to the Borough

In [397]:
df2.loc[df2['Neighborhood'] == 'Not assigned', ['Neighborhood']] = df2['Borough']
df2

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe(required by exercise 1)

In [398]:
df2.shape

(103, 3)

# Exercise 2

Let's import the data from the csv file

In [399]:
!wget -q -O 'Toronto_long_lat_data.csv'  http://cocl.us/Geospatial_data

Let's read the data and create a pandas dataframe

In [400]:
df3 = pd.read_csv('Toronto_long_lat_data.csv')
df3.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Let's change the name of the columns. This step is vital since we later merge two tables based on a column name 

In [401]:
df3.columns=['Postalcode','Latitude','Longitude']
df3

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [402]:
df3.shape #same number of row of before, so we can merge the two tables

(103, 3)

Let's merge the tables, creating the table required by exercise 2

In [403]:
df4= pd.merge(df2,df3, on='Postalcode')
df4

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


# Exercise 3

Use geopy library to get the latitude and longitude values of Toronto.

In [404]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [405]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Let's get the list of borough of our next analysis

In [406]:
df4['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

## Downtown Toronto analysis

Segment and cluster the neighborhoods in Downtown Toronto. So let's slice the original dataframe and create a new dataframe of Downtown Toronto 

In [407]:
downtown_toronto = df4[df4['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_toronto.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


Foursquare Credentials and version

In [410]:
CLIENT_ID = 'DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL' # your Foursquare ID
CLIENT_SECRET = 'G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL
CLIENT_SECRET:G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP


Let's explore the first neighborhood in our dataframe: get the n

In [411]:
downtown_toronto.loc[0, 'Neighborhood']

'Rosedale'

In [412]:
neighborhood_latitude = downtown_toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = downtown_toronto.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = downtown_toronto.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


Now, let's get the top 100 venues that are in Rosedale within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [413]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.6795626,-79.37752940000001&radius=500&limit=100'

Send the GET request and examine the resutls

In [414]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c678abf7dde002c468acc'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Rosedale',
  'headerFullLocation': 'Rosedale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.6840626045, 'lng': -79.37131878274371},
   'sw': {'lat': 43.675062595499995, 'lng': -79.38374001725632}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bae2150f964a520df873be3',
       'name': 'Mooredale House',
       'location': {'address': '146 Crescent Rd.',
        'crossStreet': 'btwn. Lamport Ave. and Mt. Pleasant Rd.',
        'lat': 43.678630645646535,
        'lng': -79.38009142511322,
        'lab

Let's borrow the **get_category_type** function from the Foursquare lab.

In [415]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [416]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Mooredale House,Building,43.678631,-79.380091
1,Rosedale Park,Playground,43.682328,-79.378934
2,Whitney Park,Park,43.682036,-79.373788
3,Alex Murray Parkette,Park,43.6783,-79.382773
4,Milkman's Lane,Trail,43.676352,-79.373842


Let's determine how many venus are returned by Foursquare


In [417]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in Downtown Toronto

In [418]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *downtown_toronto_venues*.

In [419]:

downtown_toronto_venues = getNearbyVenues(names=downtown_toronto['Neighborhood'],
                                   latitudes=downtown_toronto['Latitude'],
                                   longitudes=downtown_toronto['Longitude']
                                  )


Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie


#### Let's check the size of the resulting dataframe

In [420]:
print(downtown_toronto_venues.shape)
downtown_toronto_venues.head()

(1284, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Mooredale House,43.678631,-79.380091,Building
1,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
2,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
3,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
4,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail


Let's check how many venues were returned for each neighborhood

In [421]:
downtown_toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",16,16,16,16,16,16
"Cabbagetown,St. James Town",45,45,45,45,45,45
Central Bay Street,84,84,84,84,84,84
"Chinatown,Grange Park,Kensington Market",100,100,100,100,100,100
Christie,16,16,16,16,16,16
Church and Wellesley,84,84,84,84,84,84
"Commerce Court,Victoria Hotel",100,100,100,100,100,100
"Design Exchange,Toronto Dominion Centre",100,100,100,100,100,100


#### Let's find out how many unique categories can be curated from all the returned venues

In [422]:
print('There are {} uniques categories.'.format(len(downtown_toronto_venues['Venue Category'].unique())))

There are 203 uniques categories.


#### Analyze Each Neighborhood

In [423]:
# one hot encoding
downtown_toronto_onehot = pd.get_dummies(downtown_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_toronto_onehot['Neighborhood'] = downtown_toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_toronto_onehot.columns[-1]] + list(downtown_toronto_onehot.columns[:-1])
downtown_toronto_onehot = downtown_toronto_onehot[fixed_columns]

downtown_toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [424]:
downtown_toronto_onehot.shape

(1284, 203)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [425]:
downtown_toronto_grouped= downtown_toronto_onehot.groupby('Neighborhood').mean().reset_index()
downtown_toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,...,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.06,0.0,0.04,0.01,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,...,0.011905,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.011905
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [426]:
num_top_venues = 5

for hood in downtown_toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_toronto_grouped[downtown_toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.05
2              Bar  0.04
3  Thai Restaurant  0.04
4       Steakhouse  0.04


----Berczy Park----
          venue  freq
0   Coffee Shop  0.11
1  Cocktail Bar  0.05
2          Café  0.04
3    Steakhouse  0.04
4      Beer Bar  0.04


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3   Harbor / Marina  0.06
4       Coffee Shop  0.06


----Cabbagetown,St. James Town----
                venue  freq
0          Restaurant  0.07
1         Coffee Shop  0.07
2  Italian Restaurant  0.04
3              Market  0.04
4                 Pub  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.14
1  Italian Restaurant  0.06
2                Café  0.05
3      Ice Cream Shop  0.05
4        Burger Joint  0

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [427]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [428]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_toronto_grouped['Neighborhood']

for ind in np.arange(downtown_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Steakhouse,Bar,Thai Restaurant,Burger Joint,Asian Restaurant,Cosmetics Shop,American Restaurant,Hotel
1,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Seafood Restaurant,Farmers Market,Beer Bar,Steakhouse,Café,Italian Restaurant
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Service,Airport Terminal,Airport Lounge,Boat or Ferry,Sculpture Garden,Coffee Shop,Boutique,Bar,Airport Gate,Airport Food Court
3,"Cabbagetown,St. James Town",Coffee Shop,Restaurant,Market,Pub,Italian Restaurant,Bakery,Pizza Place,Café,Breakfast Spot,Butcher
4,Central Bay Street,Coffee Shop,Italian Restaurant,Ice Cream Shop,Café,Sandwich Place,Burger Joint,Chinese Restaurant,Indian Restaurant,Sushi Restaurant,Middle Eastern Restaurant


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [429]:
# set number of clusters
kclusters = 5

downtown_toronto_grouped_clustering = downtown_toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 3, 0, 0, 2, 4, 0, 0, 0], dtype=int32)

In [430]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_toronto_merged = downtown_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_toronto_merged = downtown_toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_toronto_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Building,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675,0,Coffee Shop,Restaurant,Market,Pub,Italian Restaurant,Bakery,Pizza Place,Café,Breakfast Spot,Butcher
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Café,Gastropub,Fast Food Restaurant,Hotel,Pub
3,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636,0,Coffee Shop,Park,Café,Pub,Bakery,Theater,Mexican Restaurant,Restaurant,Breakfast Spot,Gym / Fitness Center
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Fast Food Restaurant,Café,Middle Eastern Restaurant,Restaurant,Diner,Ice Cream Shop,Italian Restaurant


#### Visualize the results

In [431]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_toronto_merged['Latitude'], downtown_toronto_merged['Longitude'], downtown_toronto_merged['Neighborhood'], downtown_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examine Clusters

In [432]:
#CLuster 1
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 0, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,0,Coffee Shop,Restaurant,Market,Pub,Italian Restaurant,Bakery,Pizza Place,Café,Breakfast Spot,Butcher
2,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Café,Gastropub,Fast Food Restaurant,Hotel,Pub
3,Downtown Toronto,0,Coffee Shop,Park,Café,Pub,Bakery,Theater,Mexican Restaurant,Restaurant,Breakfast Spot,Gym / Fitness Center
4,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Fast Food Restaurant,Café,Middle Eastern Restaurant,Restaurant,Diner,Ice Cream Shop,Italian Restaurant
5,Downtown Toronto,0,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Cocktail Bar,Cosmetics Shop,Breakfast Spot,Beer Bar,Gastropub
6,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Seafood Restaurant,Farmers Market,Beer Bar,Steakhouse,Café,Italian Restaurant
7,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Ice Cream Shop,Café,Sandwich Place,Burger Joint,Chinese Restaurant,Indian Restaurant,Sushi Restaurant,Middle Eastern Restaurant
8,Downtown Toronto,0,Coffee Shop,Café,Steakhouse,Bar,Thai Restaurant,Burger Joint,Asian Restaurant,Cosmetics Shop,American Restaurant,Hotel
9,Downtown Toronto,0,Coffee Shop,Hotel,Aquarium,Café,Italian Restaurant,Sporting Goods Shop,Brewery,Pizza Place,Scenic Lookout,Bakery
10,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Gastropub,Bar,Gym,Bakery,American Restaurant


In [433]:
#CLuster 2
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 1, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Park,Playground,Trail,Building,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar


In [434]:
#CLuster 3
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 2, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,2,Café,Bookstore,Bar,Restaurant,Japanese Restaurant,Bakery,Italian Restaurant,Beer Bar,College Gym,Sandwich Place
13,Downtown Toronto,2,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bakery,Vietnamese Restaurant,Bar,Mexican Restaurant,Dumpling Restaurant,Coffee Shop,Comfort Food Restaurant


In [435]:
#CLuster 4
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 3, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,3,Airport Service,Airport Terminal,Airport Lounge,Boat or Ferry,Sculpture Garden,Coffee Shop,Boutique,Bar,Airport Gate,Airport Food Court


In [436]:
#CLuster 5
downtown_toronto_merged.loc[downtown_toronto_merged['Cluster Labels'] == 4, downtown_toronto_merged.columns[[1] + list(range(5, downtown_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,4,Café,Grocery Store,Park,Baby Store,Coffee Shop,Italian Restaurant,Athletics & Sports,Restaurant,Nightclub,Convenience Store


## Scarborough Analysis

In [437]:
Scarborough = df4[df4['Borough'] == 'Scarborough'].reset_index(drop=True)
Scarborough.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Let's explore the first neighborhood in our dataframe: get the n

In [440]:
Scarborough.loc[0, 'Neighborhood']

'Rouge,Malvern'

In [441]:
neighborhood_latitude = Scarborough.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Scarborough.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Scarborough.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.806686299999996, -79.19435340000001.


Now, let's get the top 100 venues that are in Rouge, Malvern within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [442]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=500&limit=100'

Send the GET request and examine the resutls

In [443]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c6799697e3a00253cef97'},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 1,
  'suggestedBounds': {'ne': {'lat': 43.8111863045, 'lng': -79.18812958073042},
   'sw': {'lat': 43.80218629549999, 'lng': -79.2005772192696}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bb6b9446edc76b0d771311c',
       'name': "Wendy's",
       'location': {'crossStreet': 'Morningside & Sheppard',
        'lat': 43.80744841934756,
        'lng': -79.19905558052072,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'lng': -79.19905558052072}],
        'distance': 387,
        'cc': 'CA',
        'city': 'Toronto',
    

Let's borrow the **get_category_type** function from the Foursquare lab.

In [444]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [445]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056


Let's determine how many venus are returned by Foursquare


In [446]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in Scarborough

In [447]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *downtown_toronto_venues*.

In [448]:

Scarborough_venues = getNearbyVenues(names=Scarborough['Neighborhood'],
                                   latitudes=Scarborough['Latitude'],
                                   longitudes=Scarborough['Longitude']
                                  )


Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge


#### Let's check the size of the resulting dataframe

In [449]:
print(Scarborough_venues.shape)
Scarborough_venues.head()

(89, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


Let's check how many venues were returned for each neighborhood

In [450]:
Scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2
"Birch Cliff,Cliffside West",4,4,4,4,4,4
Cedarbrae,7,7,7,7,7,7
"Clairlea,Golden Mile,Oakridge",10,10,10,10,10,10
"Clarks Corners,Sullivan,Tam O'Shanter",11,11,11,11,11,11
"Cliffcrest,Cliffside,Scarborough Village West",2,2,2,2,2,2
"Dorset Park,Scarborough Town Centre,Wexford Heights",7,7,7,7,7,7
"East Birchmount Park,Ionview,Kennedy Park",4,4,4,4,4,4
"Guildwood,Morningside,West Hill",8,8,8,8,8,8


#### Let's find out how many unique categories can be curated from all the returned venues

In [451]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 55 uniques categories.


#### Analyze Each Neighborhood

In [452]:
# one hot encoding
Scarborough_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Scarborough_onehot['Neighborhood'] = Scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Scarborough_onehot.columns[-1]] + list(Scarborough_onehot.columns[:-1])
Scarborough_onehot = Scarborough_onehot[fixed_columns]

Scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bubble Tea Shop,Bus Line,...,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Spa,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [453]:
Scarborough_onehot.shape

(89, 56)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [454]:
Scarough_grouped= Scarborough_onehot.groupby('Neighborhood').mean().reset_index()
Scarough_grouped

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bubble Tea Shop,Bus Line,...,Playground,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Spa,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
4,"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,...,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0
5,"Clarks Corners,Sullivan,Tam O'Shanter",0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0
6,"Cliffcrest,Cliffside,Scarborough Village West",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Dorset Park,Scarborough Town Centre,Wexford He...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857
8,"East Birchmount Park,Ionview,Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Guildwood,Morningside,West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,...,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [455]:
num_top_venues = 5

for hood in Scarough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Scarough_grouped[Scarough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                    venue  freq
0                  Lounge  0.25
1          Breakfast Spot  0.25
2            Skating Rink  0.25
3  Furniture / Home Store  0.25
4     American Restaurant  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
                       venue  freq
0                       Park   0.5
1                 Playground   0.5
2        American Restaurant   0.0
3          Korean Restaurant   0.0
4  Latin American Restaurant   0.0


----Birch Cliff,Cliffside West----
                   venue  freq
0        College Stadium  0.25
1           Skating Rink  0.25
2  General Entertainment  0.25
3                   Café  0.25
4                   Park  0.00


----Cedarbrae----
                  venue  freq
0  Caribbean Restaurant  0.14
1   Fried Chicken Joint  0.14
2                Bakery  0.14
3                  Bank  0.14
4       Thai Restaurant  0.14


----Clairlea,Golden Mile,Oakridge----
          venue  freq
0        Bakery   0.2
1      Bu

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [456]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [457]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Scarough_grouped['Neighborhood']

for ind in np.arange(Scarough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Scarough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Furniture / Home Store,Breakfast Spot,Lounge,Vietnamese Restaurant,Coffee Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
2,"Birch Cliff,Cliffside West",Skating Rink,General Entertainment,Café,College Stadium,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,Grocery Store,Furniture / Home Store
3,Cedarbrae,Thai Restaurant,Athletics & Sports,Bakery,Bank,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Vietnamese Restaurant,Department Store,History Museum
4,"Clairlea,Golden Mile,Oakridge",Bus Line,Bakery,Intersection,Metro Station,Soccer Field,Park,Bus Station,Fast Food Restaurant,Discount Store,Electronics Store


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [458]:
# set number of clusters
kclusters = 5

Scarough_grouped_clustering = Scarough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Scarough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 1, 1, 1, 1, 2, 1, 1, 1], dtype=int32)

In [459]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelstwo', kmeans.labels_)
Scarough_grouped_merged =Scarborough

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Scarough_grouped_merged = Scarough_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Scarough_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Electronics Store
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,4.0,History Museum,Bar,Vietnamese Restaurant,Coffee Shop,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,1.0,Intersection,Breakfast Spot,Medical Center,Electronics Store,Pizza Place,Rental Car Location,Spa,Mexican Restaurant,Fried Chicken Joint,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Insurance Office,Korean Restaurant,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Thai Restaurant,Athletics & Sports,Bakery,Bank,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Vietnamese Restaurant,Department Store,History Museum


#### Examine Clusters

In [460]:
#CLuster 1
Scarough_grouped_merged.loc[Scarough_grouped_merged['Cluster Labelstwo'] == 0, Scarough_grouped_merged.columns[[1] + list(range(5, Scarough_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0.0,Fast Food Restaurant,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Electronics Store


In [461]:
#CLuster 2
Scarough_grouped_merged.loc[Scarough_grouped_merged['Cluster Labelstwo'] == 1, Scarough_grouped_merged.columns[[1] + list(range(5, Scarough_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1.0,Intersection,Breakfast Spot,Medical Center,Electronics Store,Pizza Place,Rental Car Location,Spa,Mexican Restaurant,Fried Chicken Joint,Fast Food Restaurant
3,Scarborough,1.0,Coffee Shop,Insurance Office,Korean Restaurant,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
4,Scarborough,1.0,Thai Restaurant,Athletics & Sports,Bakery,Bank,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Vietnamese Restaurant,Department Store,History Museum
6,Scarborough,1.0,Coffee Shop,Playground,Discount Store,Department Store,Vietnamese Restaurant,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store
7,Scarborough,1.0,Bus Line,Bakery,Intersection,Metro Station,Soccer Field,Park,Bus Station,Fast Food Restaurant,Discount Store,Electronics Store
9,Scarborough,1.0,Skating Rink,General Entertainment,Café,College Stadium,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,Grocery Store,Furniture / Home Store
10,Scarborough,1.0,Indian Restaurant,Vietnamese Restaurant,Pet Store,Latin American Restaurant,Light Rail Station,Chinese Restaurant,Bar,Discount Store,Hakka Restaurant,Grocery Store
11,Scarborough,1.0,Middle Eastern Restaurant,Auto Garage,Bakery,Sandwich Place,Breakfast Spot,Vietnamese Restaurant,College Stadium,Hakka Restaurant,Grocery Store,General Entertainment
12,Scarborough,1.0,Skating Rink,Furniture / Home Store,Breakfast Spot,Lounge,Vietnamese Restaurant,Coffee Shop,Hakka Restaurant,Grocery Store,General Entertainment,Fried Chicken Joint
13,Scarborough,1.0,Pizza Place,Fast Food Restaurant,Thai Restaurant,Italian Restaurant,Shopping Mall,Bank,Fried Chicken Joint,Chinese Restaurant,Pharmacy,Noodle House


In [462]:
#CLuster 3
Scarough_grouped_merged.loc[Scarough_grouped_merged['Cluster Labelstwo'] == 2, Scarough_grouped_merged.columns[[1] + list(range(5, Scarough_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,2.0,American Restaurant,Motel,Insurance Office,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant


In [463]:
#CLuster 4
Scarough_grouped_merged.loc[Scarough_grouped_merged['Cluster Labelstwo'] == 3, Scarough_grouped_merged.columns[[1] + list(range(5, Scarough_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,3.0,Grocery Store,Playground,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
14,Scarborough,3.0,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant


In [464]:
#CLuster 5
Scarough_grouped_merged.loc[Scarough_grouped_merged['Cluster Labelstwo'] == 3, Scarough_grouped_merged.columns[[1] + list(range(5, Scarough_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,3.0,Grocery Store,Playground,Vietnamese Restaurant,Coffee Shop,History Museum,Hakka Restaurant,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
14,Scarborough,3.0,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant


## North York Analysis

In [465]:
NY = df4[df4['Borough'] == 'North York'].reset_index(drop=True)
NY.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills,York Mills",43.75749,-79.374714
4,M2M,North York,"Newtonbrook,Willowdale",43.789053,-79.408493


Let's explore the first neighborhood in our dataframe: get the n

In [468]:
NY.loc[0, 'Neighborhood']

'Hillcrest Village'

In [469]:
neighborhood_latitude = NY.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = NY.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = NY.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8037622, -79.3634517.


Now, let's get the top 100 venues that are in Hillcrest within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [470]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.8037622,-79.3634517&radius=500&limit=100'

Send the GET request and examine the resutls

In [471]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c67a2e92f3500251c69da'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.808262204500004,
    'lng': -79.3572281853783},
   'sw': {'lat': 43.7992621955, 'lng': -79.3696752146217}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad9dce6f964a520651b21e3',
       'name': "Eagle's Nest Golf Club",
       'location': {'address': '10000 Dufferin Rd',
        'lat': 43.805454826002794,
        'lng': -79.36418592243415,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.805454826002794,
          'lng': -79.36418592243415}],
        'distance': 197,
        'cc': 'CA',
        'city': 'Toronto

Let's borrow the **get_category_type** function from the Foursquare lab.

In [472]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [473]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,AY Jackson Pool,Pool,43.804515,-79.366138
2,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
3,Duncan Creek Park,Dog Run,43.805539,-79.360695
4,A.Y. Jackson Secondary School Track,Athletics & Sports,43.805068,-79.366677


Let's determine how many venus are returned by Foursquare


In [474]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in NY

In [475]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *NY_venues*.

In [476]:

NY_venues = getNearbyVenues(names=NY['Neighborhood'],
                                   latitudes=NY['Latitude'],
                                   longitudes=NY['Longitude']
                                  )


Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park,Lawrence Manor East
Lawrence Heights,Lawrence Manor
Glencairn
Downsview,North Park,Upwood Park
Humber Summit
Emery,Humberlea


#### Let's check the size of the resulting dataframe

In [477]:
print(NY_venues.shape)
NY_venues.head()

(240, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,Hillcrest Village,43.803762,-79.363452,A.Y. Jackson Secondary School Track,43.805068,-79.366677,Athletics & Sports


Let's check how many venues were returned for each neighborhood

In [478]:
NY_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor,Downsview North,Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",23,23,23,23,23,23
"CFB Toronto,Downsview East",3,3,3,3,3,3
Don Mills North,4,4,4,4,4,4
Downsview Central,4,4,4,4,4,4
Downsview Northwest,5,5,5,5,5,5
Downsview West,6,6,6,6,6,6
"Downsview,North Park,Upwood Park",3,3,3,3,3,3
"Emery,Humberlea",1,1,1,1,1,1


#### Let's find out how many unique categories can be curated from all the returned venues

In [479]:
print('There are {} uniques categories.'.format(len(NY_venues['Venue Category'].unique())))

There are 107 uniques categories.


#### Analyze Each Neighborhood

In [480]:
# one hot encoding
NY_onehot = pd.get_dummies(NY_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
NY_onehot['Neighborhood'] = NY_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [NY_onehot.columns[-1]] + list(NY_onehot.columns[:-1])
NY_onehot = NY_onehot[fixed_columns]

NY_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [481]:
NY_onehot.shape

(240, 108)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [482]:
NY_grouped= NY_onehot.groupby('Neighborhood').mean().reset_index()
NY_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,"Bathurst Manor,Downsview North,Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park,Lawrence Manor East",0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,...,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CFB Toronto,Downsview East",0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Downsview,North Park,Upwood Park",0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Emery,Humberlea",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [483]:
num_top_venues = 5

for hood in NY_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = NY_grouped[NY_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor,Downsview North,Wilson Heights----
                  venue  freq
0           Coffee Shop  0.11
1      Sushi Restaurant  0.06
2   Fried Chicken Joint  0.06
3  Fast Food Restaurant  0.06
4        Sandwich Place  0.06


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Bank  0.25
2                 Café  0.25
3  Japanese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park,Lawrence Manor East----
                venue  freq
0  Italian Restaurant  0.09
1         Coffee Shop  0.09
2    Greek Restaurant  0.04
3             Butcher  0.04
4       Grocery Store  0.04


----CFB Toronto,Downsview East----
                   venue  freq
0             Playground  0.33
1                   Park  0.33
2                Airport  0.33
3  Vietnamese Restaurant  0.00
4              Juice Bar  0.00


----Don Mills North----
                  venue  freq
0  Gym / Fitness Center  0.25
1  Caribbean Restaurant  0.25
2               

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [770]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [771]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = NY_grouped['Neighborhood']

for ind in np.arange(NY_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(NY_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Pharmacy,Shopping Mall,Frozen Yogurt Shop,Fast Food Restaurant,Diner,Deli / Bodega,Middle Eastern Restaurant,Pizza Place,Bridal Shop
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop
2,"Bedford Park,Lawrence Manor East",Italian Restaurant,Coffee Shop,Greek Restaurant,Sandwich Place,Fast Food Restaurant,Grocery Store,Indian Restaurant,Japanese Restaurant,Juice Bar,Liquor Store
3,"CFB Toronto,Downsview East",Airport,Playground,Park,Women's Store,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
4,Don Mills North,Caribbean Restaurant,Gym / Fitness Center,Café,Japanese Restaurant,Women's Store,Dog Run,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [772]:
# set number of clusters
kclusters = 5

NY_grouped_clustering = NY_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(NY_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32)

In [487]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelthree', kmeans.labels_)
NY_grouped_merged =NY

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
NY_grouped_merged = NY_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

NY_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,0,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
1,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Asian Restaurant,Food Court,Cosmetics Shop,Japanese Restaurant,Bus Station,Bakery,Women's Store
2,M2K,North York,Bayview Village,43.786947,-79.385975,0,Japanese Restaurant,Chinese Restaurant,Café,Bank,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop
3,M2L,North York,"Silver Hills,York Mills",43.75749,-79.374714,4,,,,,,,,,,
4,M2M,North York,"Newtonbrook,Willowdale",43.789053,-79.408493,2,,,,,,,,,,


#### Examine Clusters

In [773]:
#CLuster 1
NY_grouped_merged.loc[NY_grouped_merged['Cluster Labelthree'] == 0, NY_grouped_merged.columns[[1] + list(range(5, NY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Golf Course,Dog Run,Pool,Athletics & Sports,Mediterranean Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
1,North York,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Asian Restaurant,Food Court,Cosmetics Shop,Japanese Restaurant,Bus Station,Bakery,Women's Store
2,North York,0,Japanese Restaurant,Chinese Restaurant,Café,Bank,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop
5,North York,0,,,,,,,,,,
6,North York,0,,,,,,,,,,
7,North York,0,,,,,,,,,,
8,North York,0,,,,,,,,,,
9,North York,0,Caribbean Restaurant,Gym / Fitness Center,Café,Japanese Restaurant,Women's Store,Dog Run,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
10,North York,0,Beer Store,Coffee Shop,Grocery Store,Gym,Italian Restaurant,Japanese Restaurant,Restaurant,Dim Sum Restaurant,Shopping Mall,Sporting Goods Shop
11,North York,0,Coffee Shop,Pharmacy,Shopping Mall,Frozen Yogurt Shop,Fast Food Restaurant,Diner,Deli / Bodega,Middle Eastern Restaurant,Pizza Place,Bridal Shop


In [774]:
#CLuster 2
NY_grouped_merged.loc[NY_grouped_merged['Cluster Labelthree'] == 1, NY_grouped_merged.columns[[1] + list(range(5, NY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,North York,1,Baseball Field,Women's Store,Electronics Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop


In [775]:
#CLuster 3
NY_grouped_merged.loc[NY_grouped_merged['Cluster Labelthree'] == 2, NY_grouped_merged.columns[[1] + list(range(5, NY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,North York,2,,,,,,,,,,


In [776]:
#CLuster 4
NY_grouped_merged.loc[NY_grouped_merged['Cluster Labelthree'] == 3, NY_grouped_merged.columns[[1] + list(range(5, NY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,3,Empanada Restaurant,Women's Store,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop


In [777]:
#CLuster 5
NY_grouped_merged.loc[NY_grouped_merged['Cluster Labelthree'] == 4, NY_grouped_merged.columns[[1] + list(range(5, NY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelthree,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,4,,,,,,,,,,


## East York Analysis

In [493]:
EY = df4[df4['Borough'] == 'East York'].reset_index(drop=True)
EY.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
1,M4C,East York,Woodbine Heights,43.695344,-79.318389
2,M4G,East York,Leaside,43.70906,-79.363452
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372
4,M4J,East York,East Toronto,43.685347,-79.338106


Let's explore the first neighborhood in our dataframe: get the n

In [496]:
EY.loc[0, 'Neighborhood']

'Woodbine Gardens,Parkview Hill'

In [497]:
neighborhood_latitude = EY.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = EY.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = EY.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Woodbine Gardens,Parkview Hill are 43.7063972, -79.309937.


Now, let's get the top 100 venues that are in Woodbine Gardens,Parkview Hill within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [498]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.7063972,-79.309937&radius=500&limit=100'

Send the GET request and examine the resutls

In [499]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c67ad018cbb0039bf447d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': "O'Connor - Parkview",
  'headerFullLocation': "O'Connor - Parkview, Toronto",
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 12,
  'suggestedBounds': {'ne': {'lat': 43.7108972045, 'lng': -79.30372360313615},
   'sw': {'lat': 43.701897195499996, 'lng': -79.31615039686386}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b5a3842f964a52023b528e3',
       'name': 'Jawny Bakers',
       'location': {'address': "804 O'Connor Dr",
        'crossStreet': 'St Clair E',
        'lat': 43.705782646822,
        'lng': -79.31291304477831,
        'labeledLatLngs':

Let's borrow the **get_category_type** function from the Foursquare lab.

In [500]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [501]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Jawny Bakers,Gastropub,43.705783,-79.312913
1,East York Gymnastics,Gym / Fitness Center,43.710654,-79.309279
2,Shoppers Drug Mart,Pharmacy,43.705892,-79.31241
3,TD Canada Trust,Bank,43.70574,-79.31227
4,Pizza Pizza,Pizza Place,43.705159,-79.31313


Let's determine how many venus are returned by Foursquare


In [502]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

12 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in EY

In [503]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *EY venues*.

In [504]:

EY_venues = getNearbyVenues(names=EY['Neighborhood'],
                                   latitudes=EY['Latitude'],
                                   longitudes=EY['Longitude']
                                  )


Woodbine Gardens,Parkview Hill
Woodbine Heights
Leaside
Thorncliffe Park
East Toronto


#### Let's check the size of the resulting dataframe

In [505]:
print(EY_venues.shape)
EY_venues.head()

(75, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,Jawny Bakers,43.705783,-79.312913,Gastropub
1,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,East York Gymnastics,43.710654,-79.309279,Gym / Fitness Center
2,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,Shoppers Drug Mart,43.705892,-79.31241,Pharmacy
3,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,TD Canada Trust,43.70574,-79.31227,Bank
4,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,Pizza Pizza,43.705159,-79.31313,Pizza Place


Let's check how many venues were returned for each neighborhood

In [506]:
EY_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
East Toronto,4,4,4,4,4,4
Leaside,34,34,34,34,34,34
Thorncliffe Park,16,16,16,16,16,16
"Woodbine Gardens,Parkview Hill",12,12,12,12,12,12
Woodbine Heights,9,9,9,9,9,9


#### Let's find out how many unique categories can be curated from all the returned venues

In [507]:
print('There are {} uniques categories.'.format(len(EY_venues['Venue Category'].unique())))

There are 45 uniques categories.


#### Analyze Each Neighborhood

In [508]:
# one hot encoding
EY_onehot = pd.get_dummies(EY_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
EY_onehot['Neighborhood'] = EY_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [EY_onehot.columns[-1]] + list(EY_onehot.columns[:-1])
EY_onehot = EY_onehot[fixed_columns]

EY_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,...,Shopping Mall,Skating Rink,Smoothie Shop,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Video Store,Warehouse Store,Yoga Studio
0,"Woodbine Gardens,Parkview Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Woodbine Gardens,Parkview Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Woodbine Gardens,Parkview Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Woodbine Gardens,Parkview Hill",0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Woodbine Gardens,Parkview Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [509]:
EY_onehot.shape

(75, 46)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [510]:
EY_grouped= EY_onehot.groupby('Neighborhood').mean().reset_index()
EY_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,...,Shopping Mall,Skating Rink,Smoothie Shop,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Video Store,Warehouse Store,Yoga Studio
0,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Leaside,0.0,0.0,0.029412,0.029412,0.029412,0.029412,0.029412,0.029412,0.058824,...,0.029412,0.0,0.029412,0.088235,0.029412,0.029412,0.058824,0.0,0.0,0.0
2,Thorncliffe Park,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0625
3,"Woodbine Gardens,Parkview Hill",0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Woodbine Heights,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,...,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [511]:
num_top_venues = 5

for hood in EY_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = EY_grouped[EY_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----East Toronto----
               venue  freq
0               Park  0.50
1        Pizza Place  0.25
2  Convenience Store  0.25
3   Asian Restaurant  0.00
4         Restaurant  0.00


----Leaside----
                    venue  freq
0             Coffee Shop  0.12
1     Sporting Goods Shop  0.09
2            Burger Joint  0.06
3  Furniture / Home Store  0.06
4        Sushi Restaurant  0.06


----Thorncliffe Park----
               venue  freq
0       Burger Joint  0.12
1  Indian Restaurant  0.12
2      Grocery Store  0.06
3        Coffee Shop  0.06
4    Warehouse Store  0.06


----Woodbine Gardens,Parkview Hill----
                  venue  freq
0  Fast Food Restaurant  0.17
1           Pizza Place  0.17
2  Gym / Fitness Center  0.08
3                  Bank  0.08
4    Athletics & Sports  0.08


----Woodbine Heights----
              venue  freq
0  Asian Restaurant  0.11
1    Cosmetics Shop  0.11
2       Video Store  0.11
3        Beer Store  0.11
4              Park  0.11




### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [512]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [513]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = EY_grouped['Neighborhood']

for ind in np.arange(EY_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(EY_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,Park,Pizza Place,Convenience Store,Yoga Studio,Fast Food Restaurant,Electronics Store,Dessert Shop,Curling Ice,Cosmetics Shop,Coffee Shop
1,Leaside,Coffee Shop,Sporting Goods Shop,Sushi Restaurant,Furniture / Home Store,Burger Joint,Mexican Restaurant,Dessert Shop,Electronics Store,Clothing Store,Fish & Chips Shop
2,Thorncliffe Park,Burger Joint,Indian Restaurant,Yoga Studio,Pizza Place,Bank,Coffee Shop,Warehouse Store,Gym,Liquor Store,Park
3,"Woodbine Gardens,Parkview Hill",Pizza Place,Fast Food Restaurant,Athletics & Sports,Gastropub,Bank,Gym / Fitness Center,Intersection,Café,Pharmacy,Pet Store
4,Woodbine Heights,Asian Restaurant,Skating Rink,Curling Ice,Cosmetics Shop,Park,Pharmacy,Bus Stop,Video Store,Beer Store,Bike Shop


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [514]:
# set number of clusters
kclusters = 5

EY_grouped_clustering = EY_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(EY_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 1, 4, 0], dtype=int32)

In [515]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelfour', kmeans.labels_)
EY_grouped_merged =EY

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
EY_grouped_merged = EY_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

EY_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937,4,Pizza Place,Fast Food Restaurant,Athletics & Sports,Gastropub,Bank,Gym / Fitness Center,Intersection,Café,Pharmacy,Pet Store
1,M4C,East York,Woodbine Heights,43.695344,-79.318389,0,Asian Restaurant,Skating Rink,Curling Ice,Cosmetics Shop,Park,Pharmacy,Bus Stop,Video Store,Beer Store,Bike Shop
2,M4G,East York,Leaside,43.70906,-79.363452,3,Coffee Shop,Sporting Goods Shop,Sushi Restaurant,Furniture / Home Store,Burger Joint,Mexican Restaurant,Dessert Shop,Electronics Store,Clothing Store,Fish & Chips Shop
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372,1,Burger Joint,Indian Restaurant,Yoga Studio,Pizza Place,Bank,Coffee Shop,Warehouse Store,Gym,Liquor Store,Park
4,M4J,East York,East Toronto,43.685347,-79.338106,2,Park,Pizza Place,Convenience Store,Yoga Studio,Fast Food Restaurant,Electronics Store,Dessert Shop,Curling Ice,Cosmetics Shop,Coffee Shop


#### Visualize the results

#### Examine Clusters

In [516]:
#CLuster 1
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 0, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East York,0,Asian Restaurant,Skating Rink,Curling Ice,Cosmetics Shop,Park,Pharmacy,Bus Stop,Video Store,Beer Store,Bike Shop


In [517]:
#CLuster 2
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 1, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,East York,1,Burger Joint,Indian Restaurant,Yoga Studio,Pizza Place,Bank,Coffee Shop,Warehouse Store,Gym,Liquor Store,Park


In [518]:
#CLuster 3
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 2, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East York,2,Park,Pizza Place,Convenience Store,Yoga Studio,Fast Food Restaurant,Electronics Store,Dessert Shop,Curling Ice,Cosmetics Shop,Coffee Shop


In [519]:
#CLuster 4
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 3, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East York,3,Coffee Shop,Sporting Goods Shop,Sushi Restaurant,Furniture / Home Store,Burger Joint,Mexican Restaurant,Dessert Shop,Electronics Store,Clothing Store,Fish & Chips Shop


In [520]:
#CLuster 5
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 4, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East York,4,Pizza Place,Fast Food Restaurant,Athletics & Sports,Gastropub,Bank,Gym / Fitness Center,Intersection,Café,Pharmacy,Pet Store


## East Toronto Analysis

In [521]:
ET = df4[df4['Borough'] == 'East Toronto'].reset_index(drop=True)
ET.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558


Let's explore the first neighborhood in our dataframe: get the n

In [524]:
ET.loc[0, 'Neighborhood']

'The Beaches'

In [525]:
neighborhood_latitude = ET.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = ET.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = ET.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


Now, let's get the top 100 venues that are in The beaches within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [526]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.67635739999999,-79.2930312&radius=500&limit=100'

Send the GET request and examine the resutls

In [527]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c67b14651320025e9ebc1'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67682094413784,
          'lng': -79.29394208780985}],
        'distanc

Let's borrow the **get_category_type** function from the Foursquare lab.

In [528]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [529]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869


Let's determine how many venus are returned by Foursquare


In [530]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in ET

In [531]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *EY venues*.

In [532]:

ET_venues = getNearbyVenues(names=ET['Neighborhood'],
                                   latitudes=ET['Latitude'],
                                   longitudes=ET['Longitude']
                                  )


The Beaches
The Danforth West,Riverdale
The Beaches West,India Bazaar
Studio District
Business Reply Mail Processing Centre 969 Eastern


#### Let's check the size of the resulting dataframe

In [533]:
print(ET_venues.shape)
ET_venues.head()

(124, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West,Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


Let's check how many venues were returned for each neighborhood

In [534]:
ET_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Business Reply Mail Processing Centre 969 Eastern,18,18,18,18,18,18
Studio District,41,41,41,41,41,41
The Beaches,4,4,4,4,4,4
"The Beaches West,India Bazaar",19,19,19,19,19,19
"The Danforth West,Riverdale",42,42,42,42,42,42


#### Let's find out how many unique categories can be curated from all the returned venues

In [535]:
print('There are {} uniques categories.'.format(len(EY_venues['Venue Category'].unique())))

There are 45 uniques categories.


#### Analyze Each Neighborhood

In [536]:
# one hot encoding
ET_onehot = pd.get_dummies(ET_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ET_onehot['Neighborhood'] = ET_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ET_onehot.columns[-1]] + list(ET_onehot.columns[:-1])
ET_onehot = ET_onehot[fixed_columns]

ET_onehot.head()

Unnamed: 0,Yoga Studio,American Restaurant,Auto Workshop,Bakery,Bank,Bar,Board Shop,Bookstore,Brewery,Bubble Tea Shop,...,Skate Park,Smoke Shop,Spa,Sports Bar,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [537]:
ET_onehot.shape

(124, 69)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [538]:
ET_grouped= ET_onehot.groupby('Neighborhood').mean().reset_index()
ET_grouped

Unnamed: 0,Neighborhood,Yoga Studio,American Restaurant,Auto Workshop,Bakery,Bank,Bar,Board Shop,Bookstore,Brewery,...,Skate Park,Smoke Shop,Spa,Sports Bar,Stationery Store,Steakhouse,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail
0,Business Reply Mail Processing Centre 969 Eastern,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,...,0.055556,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Studio District,0.02439,0.04878,0.0,0.04878,0.02439,0.02439,0.0,0.02439,0.02439,...,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.02439,0.02439,0.0
2,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
3,"The Beaches West,India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,...,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0
4,"The Danforth West,Riverdale",0.02381,0.02381,0.0,0.02381,0.0,0.0,0.0,0.02381,0.02381,...,0.0,0.0,0.02381,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381


#### Let's print each neighborhood along with the top 5 most common venues

In [539]:
num_top_venues = 5

for hood in ET_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ET_grouped[ET_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.11
1         Yoga Studio  0.06
2             Butcher  0.06
3       Garden Center  0.06
4              Garden  0.06


----Studio District----
                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.07
2            Gastropub  0.05
3               Bakery  0.05
4  American Restaurant  0.05


----The Beaches----
                       venue  freq
0                      Trail  0.25
1                        Pub  0.25
2          Health Food Store  0.25
3         Italian Restaurant  0.00
4  Middle Eastern Restaurant  0.00


----The Beaches West,India Bazaar----
                venue  freq
0         Pizza Place  0.11
1                 Pub  0.05
2      Sandwich Place  0.05
3      Ice Cream Shop  0.05
4  Italian Restaurant  0.05


----The Danforth West,Riverdale----
                    venue  freq
0        Greek Restaurant  0.21
1             Coffee Shop  0.10


### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [540]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [541]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ET_grouped['Neighborhood']

for ind in np.arange(ET.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ET_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Garden Center,Pizza Place,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant
1,Studio District,Café,Coffee Shop,Gastropub,American Restaurant,Bakery,Italian Restaurant,Cheese Shop,Diner,Latin American Restaurant,Thrift / Vintage Store
2,The Beaches,Trail,Pub,Health Food Store,Coworking Space,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Dessert Shop
3,"The Beaches West,India Bazaar",Pizza Place,Liquor Store,Pub,Fish & Chips Shop,Gym,Ice Cream Shop,Italian Restaurant,Burrito Place,Burger Joint,Movie Theater
4,"The Danforth West,Riverdale",Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Trail,Diner,Indian Restaurant,Grocery Store,Fruit & Vegetable Store


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [542]:
# set number of clusters
kclusters = 5

ET_grouped_clustering = ET_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ET_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 4, 1, 2, 0], dtype=int32)

In [543]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelfive', kmeans.labels_)
ET_grouped_merged =ET

# merge et grouped with et to add latitude/longitude for each neighborhood
ET_grouped_merged = ET_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ET_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelfive,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Trail,Pub,Health Food Store,Coworking Space,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Dessert Shop
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Trail,Diner,Indian Restaurant,Grocery Store,Fruit & Vegetable Store
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572,2,Pizza Place,Liquor Store,Pub,Fish & Chips Shop,Gym,Ice Cream Shop,Italian Restaurant,Burrito Place,Burger Joint,Movie Theater
3,M4M,East Toronto,Studio District,43.659526,-79.340923,4,Café,Coffee Shop,Gastropub,American Restaurant,Bakery,Italian Restaurant,Cheese Shop,Diner,Latin American Restaurant,Thrift / Vintage Store
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,3,Light Rail Station,Garden Center,Pizza Place,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant


#### Visualize the results

#### Examine Clusters

In [544]:
#CLuster 1
ET_grouped_merged.loc[ET_grouped_merged['Cluster Labelfive'] == 0, ET_grouped_merged.columns[[1] + list(range(5, ET_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfive,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Trail,Diner,Indian Restaurant,Grocery Store,Fruit & Vegetable Store


In [545]:
#CLuster 2
ET_grouped_merged.loc[ET_grouped_merged['Cluster Labelfive'] == 1, ET_grouped_merged.columns[[1] + list(range(5, ET_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfive,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,1,Trail,Pub,Health Food Store,Coworking Space,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Dessert Shop


In [546]:
#CLuster 3
EY_grouped_merged.loc[EY_grouped_merged['Cluster Labelfour'] == 2, EY_grouped_merged.columns[[1] + list(range(5, EY_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfour,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East York,2,Park,Pizza Place,Convenience Store,Yoga Studio,Fast Food Restaurant,Electronics Store,Dessert Shop,Curling Ice,Cosmetics Shop,Coffee Shop


In [547]:
#CLuster 4
ET_grouped_merged.loc[ET_grouped_merged['Cluster Labelfive'] == 3, ET_grouped_merged.columns[[1] + list(range(5, ET_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfive,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,3,Light Rail Station,Garden Center,Pizza Place,Auto Workshop,Brewery,Burrito Place,Butcher,Comic Shop,Farmers Market,Fast Food Restaurant


In [548]:
#CLuster 5
ET_grouped_merged.loc[ET_grouped_merged['Cluster Labelfive'] == 5, ET_grouped_merged.columns[[1] + list(range(5, ET_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelfive,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


## West Toronto Analysis

In [610]:
WT = df4[df4['Borough'] == 'West Toronto'].reset_index(drop=True)
WT.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M6H,West Toronto,"Dovercourt Village,Dufferin",43.669005,-79.442259
1,M6J,West Toronto,"Little Portugal,Trinity",43.647927,-79.41975
2,M6K,West Toronto,"Brockton,Exhibition Place,Parkdale Village",43.636847,-79.428191
3,M6P,West Toronto,"High Park,The Junction South",43.661608,-79.464763
4,M6R,West Toronto,"Parkdale,Roncesvalles",43.64896,-79.456325


Let's explore the first neighborhood in our dataframe: get the n

In [615]:
WT.loc[0, 'Neighborhood']

'Dovercourt Village,Dufferin'

In [616]:
neighborhood_latitude = WT.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = WT.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = WT.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Dovercourt Village,Dufferin are 43.66900510000001, -79.4422593.


Now, let's get the top 100 venues that are in 'Dovercourt Village,Dufferin' within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [617]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.66900510000001,-79.4422593&radius=500&limit=100'

Send the GET request and examine the resutls

In [618]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c6bc4db1d810039b4ca51'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Davenport',
  'headerFullLocation': 'Davenport, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 18,
  'suggestedBounds': {'ne': {'lat': 43.67350510450001,
    'lng': -79.43604977526607},
   'sw': {'lat': 43.664505095500004, 'lng': -79.44846882473394}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5753753b498eeb535c53aed5',
       'name': 'The Greater Good Bar',
       'location': {'address': '229 Geary St',
        'crossStreet': 'at Dufferin St',
        'lat': 43.669409,
        'lng': -79.439267,
        'labeledLatLngs': [{'label': 'disp

Let's borrow the **get_category_type** function from the Foursquare lab.

In [619]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [620]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Greater Good Bar,Bar,43.669409,-79.439267
1,Parallel,Middle Eastern Restaurant,43.669516,-79.438728
2,Happy Bakery & Pastries,Bakery,43.66705,-79.441791
3,Planet Fitness Toronto Galleria,Gym / Fitness Center,43.667588,-79.442574
4,FreshCo,Supermarket,43.667918,-79.440754


Let's determine how many venus are returned by Foursquare


In [621]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in WT

In [622]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *wt_venues*.

In [625]:

WT_venues = getNearbyVenues(names=WT['Neighborhood'],
                                   latitudes=WT['Latitude'],
                                   longitudes=WT['Longitude']
                                  )


Dovercourt Village,Dufferin
Little Portugal,Trinity
Brockton,Exhibition Place,Parkdale Village
High Park,The Junction South
Parkdale,Roncesvalles
Runnymede,Swansea


#### Let's check the size of the resulting dataframe

In [626]:
print(WT_venues.shape)
WT_venues.head()

(177, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Dovercourt Village,Dufferin",43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
1,"Dovercourt Village,Dufferin",43.669005,-79.442259,Parallel,43.669516,-79.438728,Middle Eastern Restaurant
2,"Dovercourt Village,Dufferin",43.669005,-79.442259,Happy Bakery & Pastries,43.66705,-79.441791,Bakery
3,"Dovercourt Village,Dufferin",43.669005,-79.442259,Planet Fitness Toronto Galleria,43.667588,-79.442574,Gym / Fitness Center
4,"Dovercourt Village,Dufferin",43.669005,-79.442259,FreshCo,43.667918,-79.440754,Supermarket


Let's check how many venues were returned for each neighborhood

In [627]:
WT_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Brockton,Exhibition Place,Parkdale Village",23,23,23,23,23,23
"Dovercourt Village,Dufferin",18,18,18,18,18,18
"High Park,The Junction South",22,22,22,22,22,22
"Little Portugal,Trinity",64,64,64,64,64,64
"Parkdale,Roncesvalles",15,15,15,15,15,15
"Runnymede,Swansea",35,35,35,35,35,35


#### Let's find out how many unique categories can be curated from all the returned venues

In [628]:
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 55 uniques categories.


#### Analyze Each Neighborhood

In [629]:
# one hot encoding
WT_onehot = pd.get_dummies(WT_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
WT_onehot['Neighborhood'] = WT_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [WT_onehot.columns[-1]] + list(WT_onehot.columns[:-1])
WT_onehot =WT_onehot[fixed_columns]

WT_onehot.head()

Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bistro,Bookstore,...,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Dovercourt Village,Dufferin",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Dovercourt Village,Dufferin",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Dovercourt Village,Dufferin",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Dovercourt Village,Dufferin",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Dovercourt Village,Dufferin",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [630]:
WT_onehot.shape

(177, 90)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [631]:
WT_grouped= WT_onehot.groupby('Neighborhood').mean().reset_index()
WT_grouped

Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bistro,Bookstore,...,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
1,"Dovercourt Village,Dufferin",0.0,0.0,0.0,0.0,0.111111,0.055556,0.055556,0.0,0.0,...,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"High Park,The Junction South",0.0,0.045455,0.0,0.0,0.045455,0.0,0.090909,0.0,0.045455,...,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0
3,"Little Portugal,Trinity",0.015625,0.0,0.046875,0.015625,0.03125,0.0,0.109375,0.015625,0.0,...,0.0,0.0,0.015625,0.0,0.0,0.015625,0.015625,0.03125,0.015625,0.015625
4,"Parkdale,Roncesvalles",0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Runnymede,Swansea",0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,...,0.0,0.057143,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [632]:
num_top_venues = 5

for hood in WT_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = WT_grouped[WT_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Brockton,Exhibition Place,Parkdale Village----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3     Yoga Studio  0.04
4    Climbing Gym  0.04


----Dovercourt Village,Dufferin----
         venue  freq
0     Pharmacy  0.11
1  Supermarket  0.11
2       Bakery  0.11
3  Music Venue  0.06
4         Park  0.06


----High Park,The Junction South----
                       venue  freq
0                        Bar  0.09
1         Mexican Restaurant  0.09
2                       Café  0.09
3  Cajun / Creole Restaurant  0.05
4                      Diner  0.05


----Little Portugal,Trinity----
              venue  freq
0               Bar  0.11
1       Coffee Shop  0.06
2  Asian Restaurant  0.05
3       Pizza Place  0.03
4        Restaurant  0.03


----Parkdale,Roncesvalles----
            venue  freq
0  Breakfast Spot  0.13
1       Gift Shop  0.13
2     Coffee Shop  0.07
3   Movie Theater  0.07
4      Restaurant  0.07


----Runnymede,Swansea---

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [633]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [634]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = WT_grouped['Neighborhood']

for ind in np.arange(WT_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(WT_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Brockton,Exhibition Place,Parkdale Village",Coffee Shop,Breakfast Spot,Café,Yoga Studio,Sandwich Place,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
1,"Dovercourt Village,Dufferin",Supermarket,Bakery,Pharmacy,Liquor Store,Fast Food Restaurant,Café,Smoke Shop,Brewery,Discount Store,Music Venue
2,"High Park,The Junction South",Mexican Restaurant,Café,Bar,Grocery Store,Fried Chicken Joint,Park,Fast Food Restaurant,Flea Market,Cajun / Creole Restaurant,Diner
3,"Little Portugal,Trinity",Bar,Coffee Shop,Asian Restaurant,Café,New American Restaurant,French Restaurant,Restaurant,Men's Store,Cocktail Bar,Pizza Place
4,"Parkdale,Roncesvalles",Gift Shop,Breakfast Spot,Dog Run,Dessert Shop,Movie Theater,Italian Restaurant,Bank,Bar,Eastern European Restaurant,Bookstore


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [635]:
# set number of clusters
kclusters = 5

WT_grouped_clustering = WT_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(WT_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 2, 4, 1, 0, 1], dtype=int32)

In [636]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelseight', kmeans.labels_)
WT_grouped_merged =WT

# merge WT_grouped with toronto_data to add latitude/longitude for each neighborhood
WT_grouped_merged = WT_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

WT_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6H,West Toronto,"Dovercourt Village,Dufferin",43.669005,-79.442259,2,Supermarket,Bakery,Pharmacy,Liquor Store,Fast Food Restaurant,Café,Smoke Shop,Brewery,Discount Store,Music Venue
1,M6J,West Toronto,"Little Portugal,Trinity",43.647927,-79.41975,1,Bar,Coffee Shop,Asian Restaurant,Café,New American Restaurant,French Restaurant,Restaurant,Men's Store,Cocktail Bar,Pizza Place
2,M6K,West Toronto,"Brockton,Exhibition Place,Parkdale Village",43.636847,-79.428191,3,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Sandwich Place,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
3,M6P,West Toronto,"High Park,The Junction South",43.661608,-79.464763,4,Mexican Restaurant,Café,Bar,Grocery Store,Fried Chicken Joint,Park,Fast Food Restaurant,Flea Market,Cajun / Creole Restaurant,Diner
4,M6R,West Toronto,"Parkdale,Roncesvalles",43.64896,-79.456325,0,Gift Shop,Breakfast Spot,Dog Run,Dessert Shop,Movie Theater,Italian Restaurant,Bank,Bar,Eastern European Restaurant,Bookstore


#### Examine Clusters

In [638]:
#CLuster 1
WT_grouped_merged.loc[WT_grouped_merged['Cluster Labelseight'] == 0, WT_grouped_merged.columns[[1] + list(range(5, WT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,West Toronto,0,Gift Shop,Breakfast Spot,Dog Run,Dessert Shop,Movie Theater,Italian Restaurant,Bank,Bar,Eastern European Restaurant,Bookstore


In [639]:
#CLuster 2
WT_grouped_merged.loc[WT_grouped_merged['Cluster Labelseight'] == 1, WT_grouped_merged.columns[[1] + list(range(5, WT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,West Toronto,1,Bar,Coffee Shop,Asian Restaurant,Café,New American Restaurant,French Restaurant,Restaurant,Men's Store,Cocktail Bar,Pizza Place
5,West Toronto,1,Pizza Place,Café,Italian Restaurant,Coffee Shop,Sushi Restaurant,Electronics Store,Pharmacy,Dessert Shop,Diner,Latin American Restaurant


In [640]:
#CLuster 3
WT_grouped_merged.loc[WT_grouped_merged['Cluster Labelseight'] == 2, WT_grouped_merged.columns[[1] + list(range(5, WT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,West Toronto,2,Supermarket,Bakery,Pharmacy,Liquor Store,Fast Food Restaurant,Café,Smoke Shop,Brewery,Discount Store,Music Venue


In [641]:
#CLuster 4
WT_grouped_merged.loc[WT_grouped_merged['Cluster Labelseight'] == 3, WT_grouped_merged.columns[[1] + list(range(5, WT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,West Toronto,3,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Sandwich Place,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant


In [642]:
#CLuster 5
WT_grouped_merged.loc[WT_grouped_merged['Cluster Labelseight'] == 4, WT_grouped_merged.columns[[1] + list(range(5, WT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelseight,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,West Toronto,4,Mexican Restaurant,Café,Bar,Grocery Store,Fried Chicken Joint,Park,Fast Food Restaurant,Flea Market,Cajun / Creole Restaurant,Diner


## Queen's Park Analysis

In [646]:
QP = df4[df4['Borough'] == "Queen's Park"].reset_index(drop=True)
QP

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


Let's explore the first neighborhood in our dataframe: get the n

In [650]:
QP.loc[0, 'Neighborhood']

"Queen's Park"

In [651]:
neighborhood_latitude = QP.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = QP.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = QP.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Queen's Park are 43.6623015, -79.3894938.


Now, let's get the top 100 venues that are in QP within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [652]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.6623015,-79.3894938&radius=500&limit=100'

Send the GET request and examine the resutls

In [653]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c6f716c37db00256791d2'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 38,
  'suggestedBounds': {'ne': {'lat': 43.6668015045, 'lng': -79.38328496866619},
   'sw': {'lat': 43.657801495499996, 'lng': -79.3957026313338}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b9d206bf964a520e69136e3',
       'name': "Queen's Park",
       'location': {'address': 'University Ave.',
        'crossStreet': 'at Wellesley Ave.',
        'lat': 43.66394609897775,
        'lng': -79.39217952520835,
        'labeledL

Let's borrow the **get_category_type** function from the Foursquare lab.

In [654]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [655]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Queen's Park,Park,43.663946,-79.39218
1,Mercatto,Italian Restaurant,43.660391,-79.387664
2,Nando's Flame-Grilled Chicken,Portuguese Restaurant,43.661617,-79.386095
3,Coffee Island,Coffee Shop,43.664271,-79.386972
4,Coffee Public,Coffee Shop,43.660763,-79.386184


Let's determine how many venus are returned by Foursquare


In [656]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

38 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in QP

In [657]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *qp_venues*.

In [658]:

QP_venues = getNearbyVenues(names=QP['Neighborhood'],
                                   latitudes=QP['Latitude'],
                                   longitudes=QP['Longitude']
                                  )


Queen's Park


#### Let's check the size of the resulting dataframe

In [659]:
print(QP_venues.shape)
QP_venues.head()

(38, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Queen's Park,43.662301,-79.389494,Queen's Park,43.663946,-79.39218,Park
1,Queen's Park,43.662301,-79.389494,Mercatto,43.660391,-79.387664,Italian Restaurant
2,Queen's Park,43.662301,-79.389494,Nando's Flame-Grilled Chicken,43.661617,-79.386095,Portuguese Restaurant
3,Queen's Park,43.662301,-79.389494,Coffee Island,43.664271,-79.386972,Coffee Shop
4,Queen's Park,43.662301,-79.389494,Coffee Public,43.660763,-79.386184,Coffee Shop


Let's check how many venues were returned for each neighborhood

In [660]:
QP_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Queen's Park,38,38,38,38,38,38


#### Let's find out how many unique categories can be curated from all the returned venues

In [661]:
print('There are {} uniques categories.'.format(len(QP_venues['Venue Category'].unique())))

There are 28 uniques categories.


#### Analyze Each Neighborhood

In [662]:
# one hot encoding
QP_onehot = pd.get_dummies(QP_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
QP_onehot['Neighborhood'] = QP_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [QP_onehot.columns[-1]] + list(QP_onehot.columns[:-1])
QP_onehot =QP_onehot[fixed_columns]

QP_onehot.head()

Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Bar,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,College Auditorium,...,Park,Portuguese Restaurant,Sandwich Place,Seafood Restaurant,Smoothie Shop,Sushi Restaurant,Theater,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,Queen's Park,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,Queen's Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Queen's Park,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,Queen's Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Queen's Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [663]:
QP_onehot.shape

(38, 29)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [664]:
QP_grouped= QP_onehot.groupby('Neighborhood').mean().reset_index()
QP_grouped

Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Bar,Burger Joint,Burrito Place,Café,Chinese Restaurant,Coffee Shop,College Auditorium,...,Park,Portuguese Restaurant,Sandwich Place,Seafood Restaurant,Smoothie Shop,Sushi Restaurant,Theater,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,Queen's Park,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316,0.236842,0.026316,...,0.052632,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316,0.026316


#### Let's print each neighborhood along with the top 5 most common venues

In [668]:
num_top_venues = 5

for hood in QP_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = QP_grouped[QP_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Queen's Park----
                venue  freq
0         Coffee Shop  0.24
1                Park  0.05
2                 Gym  0.05
3         Art Gallery  0.03
4  Italian Restaurant  0.03




## Missisauga Analysis

In [778]:
MS = df4[df4['Borough'] == 'Missisauga'].reset_index(drop=True)
MS.head()
#No data for the Borough

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude


### A. Let's explore Famiglia Baldassarre --

In [780]:
venue_id = '590390fc35811b1afa825dcc' # ID of Famiglia Baldassarre
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/590390fc35811b1afa825dcc?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [781]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'description', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'hours', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '590390fc35811b1afa825dcc',
 'name': 'Famiglia Baldassarre',
 'contact': {'phone': '6472935395',
  'formattedPhone': '(647) 293-5395',
  'twitter': 'fambaldassarre',
  'instagram': 'famiglia_baldassarre',
  'facebook': '1517083265279594',
  'facebookName': 'Pasta Fresca Baldassarre'},
 'location': {'address': '122 Geary Ave',
  'lat': 43.670388,
  'lng': -79.434999,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.670388,
    'lng': -79.434999}],
  'postalCode': 'M6H 4H1',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['122 Geary Ave', 'Toronto ON M6H 4H1', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/famiglia-baldassarre/590390fc35811b1afa825dcc',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': Tr

### C. Get the number of tips

In [783]:
result['response']['venue']['tips']['count']

1

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [784]:
## Famiglia Baldassarre
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c88808ad62e00398a2c0a'},
 'response': {'tips': {'count': 1,
   'items': [{'id': '590cdc868194fc3de954e58d',
     'createdAt': 1494015110,
     'text': 'In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/590cdc868194fc3de954e58d',
     'photo': {'id': '590cce2959fe5c461e16ab34',
      'createdAt': 1494011433,
      'source': {'name': 'Swarm for iOS', 'url': 'https://www.swarmapp.com'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
      'width': 1440,
      'height': 1920,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount'

#### Get tips and list of associated features

In [785]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [787]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.",2,0,590cdc868194fc3de954e58d,Jordan,Yee,male,510739


### A. Let's explore Famiglia Baldassarre --

In [780]:
venue_id = '590390fc35811b1afa825dcc' # ID of Famiglia Baldassarre
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/590390fc35811b1afa825dcc?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [781]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'description', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'hours', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '590390fc35811b1afa825dcc',
 'name': 'Famiglia Baldassarre',
 'contact': {'phone': '6472935395',
  'formattedPhone': '(647) 293-5395',
  'twitter': 'fambaldassarre',
  'instagram': 'famiglia_baldassarre',
  'facebook': '1517083265279594',
  'facebookName': 'Pasta Fresca Baldassarre'},
 'location': {'address': '122 Geary Ave',
  'lat': 43.670388,
  'lng': -79.434999,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.670388,
    'lng': -79.434999}],
  'postalCode': 'M6H 4H1',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['122 Geary Ave', 'Toronto ON M6H 4H1', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/famiglia-baldassarre/590390fc35811b1afa825dcc',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': Tr

### C. Get the number of tips

In [783]:
result['response']['venue']['tips']['count']

1

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [784]:
## Famiglia Baldassarre
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c88808ad62e00398a2c0a'},
 'response': {'tips': {'count': 1,
   'items': [{'id': '590cdc868194fc3de954e58d',
     'createdAt': 1494015110,
     'text': 'In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/590cdc868194fc3de954e58d',
     'photo': {'id': '590cce2959fe5c461e16ab34',
      'createdAt': 1494011433,
      'source': {'name': 'Swarm for iOS', 'url': 'https://www.swarmapp.com'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
      'width': 1440,
      'height': 1920,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount'

#### Get tips and list of associated features

In [785]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [786]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.",2,0,590cdc868194fc3de954e58d,Jordan,Yee,male,510739


## Central Toronto Analysis

In [706]:
CT = df4[df4['Borough'] == 'Central Toronto'].reset_index(drop=True)
CT.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316


Geographical coordinates of Central Toronto

Let's explore the first neighborhood in our dataframe: get the n

In [708]:
CT.loc[0, 'Neighborhood']

'Lawrence Park'

In [709]:
neighborhood_latitude = CT.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = CT.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = CT.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


Now, let's get the top 100 venues that are in Lawrence Park within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [710]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.7280205,-79.3887901&radius=500&limit=100'

Send the GET request and examine the resutls

In [711]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c790bd9a6e600383ca6e6'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.7325205045, 'lng': -79.3825744605273},
   'sw': {'lat': 43.7235204955, 'lng': -79.3950057394727}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '50e6da19e4b0d8a78a0e9794',
       'name': 'Lawrence Park Ravine',
       'location': {'address': '3055 Yonge Street',
        'crossStreet': 'Lawrence Avenue East',
        'lat': 43.72696303913755,
        'lng': -79.39438246708775,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.72696303913755,
          'lng': -79.39438246708775}],
        'distance': 465,
        'c

Let's borrow the **get_category_type** function from the Foursquare lab.

In [712]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [713]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Lawrence Park Ravine,Park,43.726963,-79.394382
1,Dim Sum Deluxe,Dim Sum Restaurant,43.726953,-79.39426
2,Zodiac Swim School,Swim School,43.728532,-79.38286
3,TTC Bus #162 - Lawrence-Donway,Bus Line,43.728026,-79.382805


Let's determine how many venus are returned by Foursquare


In [714]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in CT

In [715]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *CT_venues*.

In [718]:

CT_venues = getNearbyVenues(names=CT['Neighborhood'],
                                   latitudes=CT['Latitude'],
                                   longitudes=CT['Longitude']
                                  )


Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Roselawn
Forest Hill North,Forest Hill West
The Annex,North Midtown,Yorkville


#### Let's check the size of the resulting dataframe

In [719]:
print(CT_venues.shape)
CT_venues.head()

(119, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Dim Sum Deluxe,43.726953,-79.39426,Dim Sum Restaurant
2,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
3,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
4,Davisville North,43.712751,-79.390197,Sherwood Park,43.716551,-79.387776,Park


Let's check how many venues were returned for each neighborhood

In [720]:
CT_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,37,37,37,37,37,37
Davisville North,8,8,8,8,8,8
"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",15,15,15,15,15,15
"Forest Hill North,Forest Hill West",4,4,4,4,4,4
Lawrence Park,4,4,4,4,4,4
"Moore Park,Summerhill East",5,5,5,5,5,5
North Toronto West,19,19,19,19,19,19
Roselawn,3,3,3,3,3,3
"The Annex,North Midtown,Yorkville",24,24,24,24,24,24


#### Let's find out how many unique categories can be curated from all the returned venues

In [721]:
print('There are {} uniques categories.'.format(len(EC_venues['Venue Category'].unique())))

There are 40 uniques categories.


#### Analyze Each Neighborhood

In [722]:
# one hot encoding
CT_onehot = pd.get_dummies(CT_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
CT_onehot['Neighborhood'] = CT_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [CT_onehot.columns[-1]] + list(CT_onehot.columns[:-1])
CT_onehot = CT_onehot[fixed_columns]

CT_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,Lawrence Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [723]:
CT_onehot.shape

(119, 68)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [724]:
CT_grouped= CT_onehot.groupby('Neighborhood').mean().reset_index()
CT_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.0,0.054054,...,0.0,0.054054,0.0,0.0,0.054054,0.027027,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0
3,"Forest Hill North,Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
4,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Moore Park,Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0
6,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
7,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"The Annex,North Midtown,Yorkville",0.041667,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [725]:
num_top_venues = 5

for hood in CT_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = CT_grouped[CT_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
             venue  freq
0     Dessert Shop  0.08
1   Sandwich Place  0.08
2  Thai Restaurant  0.05
3      Coffee Shop  0.05
4      Pizza Place  0.05


----Davisville North----
            venue  freq
0  Sandwich Place  0.12
1           Hotel  0.12
2   Grocery Store  0.12
3             Gym  0.12
4  Clothing Store  0.12


----Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West----
                 venue  freq
0                  Pub  0.13
1          Coffee Shop  0.13
2  American Restaurant  0.07
3  Fried Chicken Joint  0.07
4          Pizza Place  0.07


----Forest Hill North,Forest Hill West----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2   Mexican Restaurant  0.25
3     Sushi Restaurant  0.25
4  American Restaurant  0.00


----Lawrence Park----
                venue  freq
0         Swim School  0.25
1  Dim Sum Restaurant  0.25
2            Bus Line  0.25
3                Park  0.25
4            Pharmacy  0.00


-

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [726]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [727]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = CT_grouped['Neighborhood']

for ind in np.arange(CT_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(CT_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
1,Davisville North,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",Pub,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant,American Restaurant
3,"Forest Hill North,Forest Hill West",Mexican Restaurant,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
4,Lawrence Park,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [728]:
# set number of clusters
kclusters = 5

CT_grouped_clustering = CT_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(CT_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 3, 4, 2, 0, 1, 0], dtype=int32)

In [729]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelstwo', kmeans.labels_)
CT_grouped_merged =CT

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
CT_grouped_merged = CT_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

CT_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Spa,Mexican Restaurant,Miscellaneous Shop,Park,Diner,Dessert Shop
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316,2,Summer Camp,Tennis Court,Restaurant,Playground,Trail,Gym / Fitness Center,Gym,Grocery Store,Greek Restaurant,Gourmet Shop


#### Examine Clusters

In [730]:
#CLuster 1
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 0, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,0,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,Central Toronto,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Spa,Mexican Restaurant,Miscellaneous Shop,Park,Diner,Dessert Shop
3,Central Toronto,0,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
5,Central Toronto,0,Pub,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant,American Restaurant
8,Central Toronto,0,Coffee Shop,Sandwich Place,Café,Pizza Place,American Restaurant,Indian Restaurant,Jewish Restaurant,Liquor Store,Flower Shop,Park


In [731]:
#CLuster 2
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 1, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Central Toronto,1,Home Service,Pool,Garden,Yoga Studio,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Greek Restaurant


In [732]:
#CLuster 3
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 2, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Summer Camp,Tennis Court,Restaurant,Playground,Trail,Gym / Fitness Center,Gym,Grocery Store,Greek Restaurant,Gourmet Shop


In [733]:
#CLuster 4
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 3, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,3,Mexican Restaurant,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


In [734]:
#CLuster 5
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 4, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,4,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


## Etobicoke Analysis

In [673]:
EC = df4[df4['Borough'] == 'Etobicoke'].reset_index(drop=True)
EC.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
3,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",43.628841,-79.520999


Let's explore the first neighborhood in our dataframe: get the n

In [678]:
EC.loc[0, 'Neighborhood']

'Humber Bay Shores,Mimico South,New Toronto'

In [679]:
neighborhood_latitude = EC.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = EC.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = EC.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Humber Bay Shores,Mimico South,New Toronto are 43.6056466, -79.50132070000001.


Now, let's get the top 100 venues that are in Humber Bay Shores,Mimico South,New Toronto within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [680]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.6056466,-79.50132070000001&radius=500&limit=100'

Send the GET request and examine the resutls

In [681]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c758b2b274a003954798b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 18,
  'suggestedBounds': {'ne': {'lat': 43.6101466045, 'lng': -79.49511771930959},
   'sw': {'lat': 43.6011465955, 'lng': -79.50752368069043}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b119977f964a520488023e3',
       'name': 'LCBO',
       'location': {'address': '2762 Lake Shore Blvd W',
        'crossStreet': 'btwn 1st & 2nd St',
        'lat': 43.60228082768786,
        'lng': -79.4993016827402,
        'labeledLatLngs': [{'label': 'display',
          'lat':

Let's borrow the **get_category_type** function from the Foursquare lab.

In [682]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [683]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,LCBO,Liquor Store,43.602281,-79.499302
1,Domino's Pizza,Pizza Place,43.601676,-79.500908
2,New Toronto Fish & Chips,Restaurant,43.601849,-79.503281
3,Delicia Bakery & Pastry,Bakery,43.601403,-79.503012
4,Lucky Dice Restaurant,Café,43.601392,-79.503056


Let's determine how many venus are returned by Foursquare


In [684]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in EC

In [685]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *EC_venues*.

In [686]:

EC_venues = getNearbyVenues(names=EC['Neighborhood'],
                                   latitudes=EC['Latitude'],
                                   longitudes=EC['Longitude']
                                  )


Humber Bay Shores,Mimico South,New Toronto
Alderwood,Long Branch
The Kingsway,Montgomery Road,Old Mill North
Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea
Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor
Islington Avenue
Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park
Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe
Westmount
Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips
Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown
Northwest


#### Let's check the size of the resulting dataframe

In [687]:
print(EC_venues.shape)
EC_venues.head()

(74, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,Domino's Pizza,43.601676,-79.500908,Pizza Place
2,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
3,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
4,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café


Let's check how many venues were returned for each neighborhood

In [688]:
EC_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",9,9,9,9,9,9
"Alderwood,Long Branch",9,9,9,9,9,9
"Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe",7,7,7,7,7,7
"Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park",1,1,1,1,1,1
"Humber Bay Shores,Mimico South,New Toronto",18,18,18,18,18,18
"Humber Bay,King's Mill Park,Kingsway Park South East,Mimico NE,Old Mill South,The Queensway East,Royal York South East,Sunnylea",2,2,2,2,2,2
"Kingsview Village,Martin Grove Gardens,Richview Gardens,St. Phillips",4,4,4,4,4,4
"Kingsway Park South West,Mimico NW,The Queensway West,Royal York South West,South of Bloor",13,13,13,13,13,13
Northwest,2,2,2,2,2,2
"The Kingsway,Montgomery Road,Old Mill North",2,2,2,2,2,2


#### Let's find out how many unique categories can be curated from all the returned venues

In [689]:
print('There are {} uniques categories.'.format(len(EC_venues['Venue Category'].unique())))

There are 40 uniques categories.


#### Analyze Each Neighborhood

In [690]:
# one hot encoding
EC_onehot = pd.get_dummies(EC_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
EC_onehot['Neighborhood'] = EC_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [EC_onehot.columns[-1]] + list(EC_onehot.columns[:-1])
EC_onehot = EC_onehot[fixed_columns]

EC_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,...,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Supplement Shop,Wings Joint
0,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,"Humber Bay Shores,Mimico South,New Toronto",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Humber Bay Shores,Mimico South,New Toronto",0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [691]:
EC_onehot.shape

(74, 41)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [692]:
EC_grouped= EC_onehot.groupby('Neighborhood').mean().reset_index()
EC_grouped

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Bank,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,...,Pool,Pub,Rental Car Location,Restaurant,River,Sandwich Place,Seafood Restaurant,Skating Rink,Supplement Shop,Wings Joint
0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0
1,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.111111,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0
2,"Bloordale Gardens,Eringate,Markland Wood,Old B...",0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cloverdale,Islington,Martin Grove,Princess Gar...",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Humber Bay Shores,Mimico South,New Toronto",0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.0,0.055556,0.0,0.055556,0.055556,0.0,0.0,0.0
5,"Humber Bay,King's Mill Park,Kingsway Park Sout...",0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Kingsview Village,Martin Grove Gardens,Richvie...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Kingsway Park South West,Mimico NW,The Queensw...",0.0,0.076923,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.076923
8,Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"The Kingsway,Montgomery Road,Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [694]:
num_top_venues = 5

for hood in EC_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = EC_grouped[EC_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
                 venue  freq
0        Grocery Store  0.22
1          Pizza Place  0.11
2           Beer Store  0.11
3       Sandwich Place  0.11
4  Fried Chicken Joint  0.11


----Alderwood,Long Branch----
          venue  freq
0   Pizza Place  0.22
1   Coffee Shop  0.11
2      Pharmacy  0.11
3  Skating Rink  0.11
4           Gym  0.11


----Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe----
               venue  freq
0  Convenience Store  0.14
1         Beer Store  0.14
2           Pharmacy  0.14
3               Café  0.14
4        Pizza Place  0.14


----Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park----
                       venue  freq
0                       Bank   1.0
1        American Restaurant   0.0
2                       Pool   0.0
3         Mexican Restaurant   0.0
4  Middle Eastern Restaurant   0.0


----Humber Bay Shores,Mimico So

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [695]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [696]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = EC_grouped['Neighborhood']

for ind in np.arange(EC_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(EC_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pizza Place,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Burger Joint,Burrito Place
1,"Alderwood,Long Branch",Pizza Place,Pharmacy,Skating Rink,Gym,Sandwich Place,Pub,Pool,Coffee Shop,Bus Line,Drugstore
2,"Bloordale Gardens,Eringate,Markland Wood,Old B...",Pizza Place,Liquor Store,Beer Store,Convenience Store,Café,Coffee Shop,Pharmacy,Chinese Restaurant,Flower Shop,Fast Food Restaurant
3,"Cloverdale,Islington,Martin Grove,Princess Gar...",Bank,Wings Joint,Coffee Shop,Grocery Store,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
4,"Humber Bay Shores,Mimico South,New Toronto",Café,American Restaurant,Pharmacy,Flower Shop,Fried Chicken Joint,Gym,Coffee Shop,Hobby Shop,Liquor Store,Mexican Restaurant


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [697]:
# set number of clusters
kclusters = 5

EC_grouped_clustering = EC_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(EC_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 1, 0, 3, 0, 0, 2, 4], dtype=int32)

In [699]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelstwo', kmeans.labels_)
EC_grouped_merged =EC

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
EC_grouped_merged = EC_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

EC_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"Humber Bay Shores,Mimico South,New Toronto",43.605647,-79.501321,0.0,Café,American Restaurant,Pharmacy,Flower Shop,Fried Chicken Joint,Gym,Coffee Shop,Hobby Shop,Liquor Store,Mexican Restaurant
1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484,0.0,Pizza Place,Pharmacy,Skating Rink,Gym,Sandwich Place,Pub,Pool,Coffee Shop,Bus Line,Drugstore
2,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944,4.0,River,Park,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
3,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509,3.0,Baseball Field,Pool,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
4,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",43.628841,-79.520999,0.0,Wings Joint,Supplement Shop,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Grocery Store


#### Examine Clusters

In [700]:
#CLuster 1
EC_grouped_merged.loc[EC_grouped_merged['Cluster Labelstwo'] == 0, EC_grouped_merged.columns[[1] + list(range(5, EC_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,0.0,Café,American Restaurant,Pharmacy,Flower Shop,Fried Chicken Joint,Gym,Coffee Shop,Hobby Shop,Liquor Store,Mexican Restaurant
1,Etobicoke,0.0,Pizza Place,Pharmacy,Skating Rink,Gym,Sandwich Place,Pub,Pool,Coffee Shop,Bus Line,Drugstore
4,Etobicoke,0.0,Wings Joint,Supplement Shop,Bakery,Burger Joint,Burrito Place,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Grocery Store
7,Etobicoke,0.0,Pizza Place,Liquor Store,Beer Store,Convenience Store,Café,Coffee Shop,Pharmacy,Chinese Restaurant,Flower Shop,Fast Food Restaurant
8,Etobicoke,0.0,Pizza Place,Chinese Restaurant,Sandwich Place,Intersection,Middle Eastern Restaurant,Coffee Shop,Wings Joint,Flower Shop,Fast Food Restaurant,Drugstore
9,Etobicoke,0.0,Pizza Place,Mobile Phone Shop,Bus Line,Park,Chinese Restaurant,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store
10,Etobicoke,0.0,Grocery Store,Pizza Place,Beer Store,Fast Food Restaurant,Coffee Shop,Pharmacy,Fried Chicken Joint,Sandwich Place,Burger Joint,Burrito Place


In [701]:
#CLuster 2
EC_grouped_merged.loc[EC_grouped_merged['Cluster Labelstwo'] == 1, EC_grouped_merged.columns[[1] + list(range(5, EC_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Etobicoke,1.0,Bank,Wings Joint,Coffee Shop,Grocery Store,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store


In [702]:
#CLuster 3
EC_grouped_merged.loc[EC_grouped_merged['Cluster Labelstwo'] == 2, EC_grouped_merged.columns[[1] + list(range(5, EC_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,2.0,Drugstore,Rental Car Location,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Discount Store,Convenience Store,Coffee Shop


In [703]:
#CLuster 4
EC_grouped_merged.loc[EC_grouped_merged['Cluster Labelstwo'] == 3, EC_grouped_merged.columns[[1] + list(range(5, EC_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Etobicoke,3.0,Baseball Field,Pool,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store


In [705]:
#CLuster 5
EC_grouped_merged.loc[EC_grouped_merged['Cluster Labelstwo'] == 4, EC_grouped_merged.columns[[1] + list(range(5, EC_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,4.0,River,Park,Wings Joint,Chinese Restaurant,Fried Chicken Joint,Flower Shop,Fast Food Restaurant,Drugstore,Discount Store,Convenience Store


## Central Toronto Analysis

In [706]:
CT = df4[df4['Borough'] == 'Central Toronto'].reset_index(drop=True)
CT.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316


Geographical coordinates of Central Toronto

Let's explore the first neighborhood in our dataframe: get the n

In [708]:
CT.loc[0, 'Neighborhood']

'Lawrence Park'

In [709]:
neighborhood_latitude = CT.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = CT.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = CT.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


Now, let's get the top 100 venues that are in Lawrence Park within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [710]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.7280205,-79.3887901&radius=500&limit=100'

Send the GET request and examine the resutls

In [711]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c790bd9a6e600383ca6e6'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.7325205045, 'lng': -79.3825744605273},
   'sw': {'lat': 43.7235204955, 'lng': -79.3950057394727}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '50e6da19e4b0d8a78a0e9794',
       'name': 'Lawrence Park Ravine',
       'location': {'address': '3055 Yonge Street',
        'crossStreet': 'Lawrence Avenue East',
        'lat': 43.72696303913755,
        'lng': -79.39438246708775,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.72696303913755,
          'lng': -79.39438246708775}],
        'distance': 465,
        'c

Let's borrow the **get_category_type** function from the Foursquare lab.

In [712]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [713]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Lawrence Park Ravine,Park,43.726963,-79.394382
1,Dim Sum Deluxe,Dim Sum Restaurant,43.726953,-79.39426
2,Zodiac Swim School,Swim School,43.728532,-79.38286
3,TTC Bus #162 - Lawrence-Donway,Bus Line,43.728026,-79.382805


Let's determine how many venus are returned by Foursquare


In [714]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in CT

In [715]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *CT_venues*.

In [718]:

CT_venues = getNearbyVenues(names=CT['Neighborhood'],
                                   latitudes=CT['Latitude'],
                                   longitudes=CT['Longitude']
                                  )


Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Roselawn
Forest Hill North,Forest Hill West
The Annex,North Midtown,Yorkville


#### Let's check the size of the resulting dataframe

In [719]:
print(CT_venues.shape)
CT_venues.head()

(119, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Dim Sum Deluxe,43.726953,-79.39426,Dim Sum Restaurant
2,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
3,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
4,Davisville North,43.712751,-79.390197,Sherwood Park,43.716551,-79.387776,Park


Let's check how many venues were returned for each neighborhood

In [720]:
CT_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,37,37,37,37,37,37
Davisville North,8,8,8,8,8,8
"Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West",15,15,15,15,15,15
"Forest Hill North,Forest Hill West",4,4,4,4,4,4
Lawrence Park,4,4,4,4,4,4
"Moore Park,Summerhill East",5,5,5,5,5,5
North Toronto West,19,19,19,19,19,19
Roselawn,3,3,3,3,3,3
"The Annex,North Midtown,Yorkville",24,24,24,24,24,24


#### Let's find out how many unique categories can be curated from all the returned venues

In [721]:
print('There are {} uniques categories.'.format(len(EC_venues['Venue Category'].unique())))

There are 40 uniques categories.


#### Analyze Each Neighborhood

In [722]:
# one hot encoding
CT_onehot = pd.get_dummies(CT_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
CT_onehot['Neighborhood'] = CT_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [CT_onehot.columns[-1]] + list(CT_onehot.columns[:-1])
CT_onehot = CT_onehot[fixed_columns]

CT_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,Lawrence Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [723]:
CT_onehot.shape

(119, 68)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [724]:
CT_grouped= CT_onehot.groupby('Neighborhood').mean().reset_index()
CT_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.027027,0.0,0.027027,0.0,0.0,0.054054,...,0.0,0.054054,0.0,0.0,0.054054,0.027027,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0
3,"Forest Hill North,Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
4,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Moore Park,Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0
6,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
7,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"The Annex,North Midtown,Yorkville",0.041667,0.041667,0.0,0.0,0.0,0.0,0.041667,0.0,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [725]:
num_top_venues = 5

for hood in CT_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = CT_grouped[CT_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
             venue  freq
0     Dessert Shop  0.08
1   Sandwich Place  0.08
2  Thai Restaurant  0.05
3      Coffee Shop  0.05
4      Pizza Place  0.05


----Davisville North----
            venue  freq
0  Sandwich Place  0.12
1           Hotel  0.12
2   Grocery Store  0.12
3             Gym  0.12
4  Clothing Store  0.12


----Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West----
                 venue  freq
0                  Pub  0.13
1          Coffee Shop  0.13
2  American Restaurant  0.07
3  Fried Chicken Joint  0.07
4          Pizza Place  0.07


----Forest Hill North,Forest Hill West----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2   Mexican Restaurant  0.25
3     Sushi Restaurant  0.25
4  American Restaurant  0.00


----Lawrence Park----
                venue  freq
0         Swim School  0.25
1  Dim Sum Restaurant  0.25
2            Bus Line  0.25
3                Park  0.25
4            Pharmacy  0.00


-

### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [726]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [727]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = CT_grouped['Neighborhood']

for ind in np.arange(CT_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(CT_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
1,Davisville North,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",Pub,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant,American Restaurant
3,"Forest Hill North,Forest Hill West",Mexican Restaurant,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
4,Lawrence Park,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [728]:
# set number of clusters
kclusters = 5

CT_grouped_clustering = CT_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(CT_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 3, 4, 2, 0, 1, 0], dtype=int32)

In [729]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelstwo', kmeans.labels_)
CT_grouped_merged =CT

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
CT_grouped_merged = CT_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

CT_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Spa,Mexican Restaurant,Miscellaneous Shop,Park,Diner,Dessert Shop
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
4,M4T,Central Toronto,"Moore Park,Summerhill East",43.689574,-79.38316,2,Summer Camp,Tennis Court,Restaurant,Playground,Trail,Gym / Fitness Center,Gym,Grocery Store,Greek Restaurant,Gourmet Shop


#### Examine Clusters

In [730]:
#CLuster 1
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 0, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,0,Clothing Store,Hotel,Sandwich Place,Breakfast Spot,Gym,Grocery Store,Food & Drink Shop,Park,Yoga Studio,Flower Shop
2,Central Toronto,0,Coffee Shop,Clothing Store,Sporting Goods Shop,Yoga Studio,Spa,Mexican Restaurant,Miscellaneous Shop,Park,Diner,Dessert Shop
3,Central Toronto,0,Dessert Shop,Sandwich Place,Thai Restaurant,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Coffee Shop,Indoor Play Area,Restaurant
5,Central Toronto,0,Pub,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Light Rail Station,Liquor Store,Pizza Place,Restaurant,American Restaurant
8,Central Toronto,0,Coffee Shop,Sandwich Place,Café,Pizza Place,American Restaurant,Indian Restaurant,Jewish Restaurant,Liquor Store,Flower Shop,Park


In [731]:
#CLuster 2
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 1, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Central Toronto,1,Home Service,Pool,Garden,Yoga Studio,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Greek Restaurant


In [732]:
#CLuster 3
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 2, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Summer Camp,Tennis Court,Restaurant,Playground,Trail,Gym / Fitness Center,Gym,Grocery Store,Greek Restaurant,Gourmet Shop


In [733]:
#CLuster 4
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 3, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,3,Mexican Restaurant,Trail,Jewelry Store,Sushi Restaurant,Yoga Studio,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


In [735]:
#CLuster 5
CT_grouped_merged.loc[CT_grouped_merged['Cluster Labelstwo'] == 4, CT_grouped_merged.columns[[1] + list(range(5, CT_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,4,Bus Line,Park,Swim School,Dim Sum Restaurant,Gourmet Shop,Farmers Market,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Garden


## York Analysis

In [736]:
Y = df4[df4['Borough'] == 'York'].reset_index(drop=True)
Y.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
1,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512
2,M6M,York,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",43.691116,-79.476013
3,M6N,York,"The Junction North,Runnymede",43.673185,-79.487262
4,M9N,York,Weston,43.706876,-79.518188


Geographical coordinates of York

Let's explore the first neighborhood in our dataframe: get the n

In [737]:
Y.loc[0, 'Neighborhood']

'Humewood-Cedarvale'

In [738]:
neighborhood_latitude = Y.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Y.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Y.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Humewood-Cedarvale are 43.6937813, -79.42819140000002.


Now, let's get the top 100 venues that are in Humewood-Cedarvale within a radius of 500 meters.

Let's create the GET request URL. Name your URL **url**.

In [743]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605&ll=43.6937813,-79.42819140000002&radius=500&limit=100'

Send the GET request and examine the resutls

In [744]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c7d6286bc49002c983e9b'},
 'response': {'headerLocation': 'Cedarvale',
  'headerFullLocation': 'Cedarvale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.6982813045, 'lng': -79.4219793104081},
   'sw': {'lat': 43.689281295499995, 'lng': -79.43440348959193}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b0afc19f964a520212b23e3',
       'name': 'Cedarvale Park',
       'location': {'address': '100 glen cedar',
        'crossStreet': 'Strathearn',
        'lat': 43.692534923091934,
        'lng': -79.42870527613704,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.692534923091934,
          'lng': -79.42870527613704}],
        'distance': 144,


Let's borrow the **get_category_type** function from the Foursquare lab.

In [745]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [746]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Cedarvale Park,Field,43.692535,-79.428705
1,Phil White Arena,Hockey Arena,43.691303,-79.431761
2,Cedarvale Ravine,Trail,43.690188,-79.426106
3,Prince's Parkette,Park,43.697385,-79.424704


Let's determine how many venus are returned by Foursquare


In [747]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in Y

In [748]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now let's write the code to run the above function on each neighborhood and create a new dataframe called *Y_venues*.

In [749]:

Y_venues = getNearbyVenues(names=Y['Neighborhood'],
                                   latitudes=Y['Latitude'],
                                   longitudes=Y['Longitude']
                                  )


Humewood-Cedarvale
Caledonia-Fairbanks
Del Ray,Keelesdale,Mount Dennis,Silverthorn
The Junction North,Runnymede
Weston


#### Let's check the size of the resulting dataframe

In [750]:
print(Y_venues.shape)
Y_venues.head()

(19, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Humewood-Cedarvale,43.693781,-79.428191,Cedarvale Park,43.692535,-79.428705,Field
1,Humewood-Cedarvale,43.693781,-79.428191,Phil White Arena,43.691303,-79.431761,Hockey Arena
2,Humewood-Cedarvale,43.693781,-79.428191,Cedarvale Ravine,43.690188,-79.426106,Trail
3,Humewood-Cedarvale,43.693781,-79.428191,Prince's Parkette,43.697385,-79.424704,Park
4,Caledonia-Fairbanks,43.689026,-79.453512,Shoppers Drug Mart,43.690651,-79.45631,Pharmacy


Let's check how many venues were returned for each neighborhood

In [751]:
Y_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Caledonia-Fairbanks,6,6,6,6,6,6
"Del Ray,Keelesdale,Mount Dennis,Silverthorn",5,5,5,5,5,5
Humewood-Cedarvale,4,4,4,4,4,4
"The Junction North,Runnymede",4,4,4,4,4,4


#### Let's find out how many unique categories can be curated from all the returned venues

In [752]:
print('There are {} uniques categories.'.format(len(EC_venues['Venue Category'].unique())))

There are 40 uniques categories.


#### Analyze Each Neighborhood

In [753]:
# one hot encoding
Y_onehot = pd.get_dummies(Y_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Y_onehot['Neighborhood'] = Y_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Y_onehot.columns[-1]] + list(Y_onehot.columns[:-1])
Y_onehot = Y_onehot[fixed_columns]

Y_onehot.head()

Unnamed: 0,Neighborhood,Bar,Bus Line,Convenience Store,Discount Store,Fast Food Restaurant,Field,Grocery Store,Hockey Arena,Market,Park,Pharmacy,Pizza Place,Sandwich Place,Skating Rink,Trail,Women's Store
0,Humewood-Cedarvale,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,Humewood-Cedarvale,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,Humewood-Cedarvale,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,Humewood-Cedarvale,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Caledonia-Fairbanks,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


In [754]:
CT_onehot.shape

(119, 68)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [756]:
Y_grouped= Y_onehot.groupby('Neighborhood').mean().reset_index()
Y_grouped

Unnamed: 0,Neighborhood,Bar,Bus Line,Convenience Store,Discount Store,Fast Food Restaurant,Field,Grocery Store,Hockey Arena,Market,Park,Pharmacy,Pizza Place,Sandwich Place,Skating Rink,Trail,Women's Store
0,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.166667,0.333333,0.166667,0.0,0.0,0.0,0.0,0.166667
1,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",0.2,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0
2,Humewood-Cedarvale,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0
3,"The Junction North,Runnymede",0.0,0.25,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [757]:
num_top_venues = 5

for hood in Y_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Y_grouped[Y_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Caledonia-Fairbanks----
                  venue  freq
0                  Park  0.33
1  Fast Food Restaurant  0.17
2                Market  0.17
3              Pharmacy  0.17
4         Women's Store  0.17


----Del Ray,Keelesdale,Mount Dennis,Silverthorn----
               venue  freq
0                Bar   0.2
1  Convenience Store   0.2
2     Discount Store   0.2
3     Sandwich Place   0.2
4       Skating Rink   0.2


----Humewood-Cedarvale----
          venue  freq
0         Field  0.25
1  Hockey Arena  0.25
2          Park  0.25
3         Trail  0.25
4           Bar  0.00


----The Junction North,Runnymede----
               venue  freq
0           Bus Line  0.25
1  Convenience Store  0.25
2      Grocery Store  0.25
3        Pizza Place  0.25
4                Bar  0.00




### Put the data in a Pandas Dataframe

First, let's write a function to sort the venues in descending order.

In [758]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [759]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Y_grouped['Neighborhood']

for ind in np.arange(Y_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Y_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Caledonia-Fairbanks,Park,Women's Store,Pharmacy,Market,Fast Food Restaurant,Trail,Skating Rink,Sandwich Place,Pizza Place,Hockey Arena
1,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",Skating Rink,Sandwich Place,Discount Store,Convenience Store,Bar,Women's Store,Trail,Pizza Place,Pharmacy,Park
2,Humewood-Cedarvale,Trail,Park,Hockey Arena,Field,Women's Store,Skating Rink,Sandwich Place,Pizza Place,Pharmacy,Market
3,"The Junction North,Runnymede",Pizza Place,Grocery Store,Convenience Store,Bus Line,Women's Store,Trail,Skating Rink,Sandwich Place,Pharmacy,Park


### Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [761]:
# set number of clusters
kclusters = 4

Y_grouped_clustering = Y_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Y_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 3, 1], dtype=int32)

In [764]:
# add clustering labels


neighborhoods_venues_sorted.insert(0, 'Cluster Labelsnine', kmeans.labels_)
Y_grouped_merged =Y

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Y_grouped_merged = Y_grouped_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Y_grouped_merged.head() # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labelsnine,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M6C,York,Humewood-Cedarvale,43.693781,-79.428191,3.0,3.0,Trail,Park,Hockey Arena,Field,Women's Store,Skating Rink,Sandwich Place,Pizza Place,Pharmacy,Market
1,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,0.0,0.0,Park,Women's Store,Pharmacy,Market,Fast Food Restaurant,Trail,Skating Rink,Sandwich Place,Pizza Place,Hockey Arena
2,M6M,York,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",43.691116,-79.476013,2.0,2.0,Skating Rink,Sandwich Place,Discount Store,Convenience Store,Bar,Women's Store,Trail,Pizza Place,Pharmacy,Park
3,M6N,York,"The Junction North,Runnymede",43.673185,-79.487262,1.0,1.0,Pizza Place,Grocery Store,Convenience Store,Bus Line,Women's Store,Trail,Skating Rink,Sandwich Place,Pharmacy,Park
4,M9N,York,Weston,43.706876,-79.518188,,,,,,,,,,,,


#### Examine Clusters

In [765]:
#CLuster 1
Y_grouped_merged.loc[Y_grouped_merged['Cluster Labelstwo'] == 0, Y_grouped_merged.columns[[1] + list(range(5, Y_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelsnine,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,York,0.0,0.0,Park,Women's Store,Pharmacy,Market,Fast Food Restaurant,Trail,Skating Rink,Sandwich Place,Pizza Place,Hockey Arena


In [766]:
#CLuster 2
Y_grouped_merged.loc[Y_grouped_merged['Cluster Labelstwo'] == 1, Y_grouped_merged.columns[[1] + list(range(5, Y_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelsnine,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,York,1.0,1.0,Pizza Place,Grocery Store,Convenience Store,Bus Line,Women's Store,Trail,Skating Rink,Sandwich Place,Pharmacy,Park


In [768]:
#CLuster 3
Y_grouped_merged.loc[Y_grouped_merged['Cluster Labelstwo'] == 2, Y_grouped_merged.columns[[1] + list(range(5, Y_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelsnine,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,York,2.0,2.0,Skating Rink,Sandwich Place,Discount Store,Convenience Store,Bar,Women's Store,Trail,Pizza Place,Pharmacy,Park


In [779]:
#CLuster 4
Y_grouped_merged.loc[Y_grouped_merged['Cluster Labelstwo'] == 3, Y_grouped_merged.columns[[1] + list(range(5, Y_grouped_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labelsnine,Cluster Labelstwo,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,York,3.0,3.0,Trail,Park,Hockey Arena,Field,Women's Store,Skating Rink,Sandwich Place,Pizza Place,Pharmacy,Market


# Best Restaurants according to Gambero Rosso

Here's the best italian restaurants in Toronto according to Gambero rosso:
- Famiglia Baldassarre
- Giulietta
- Il covo
- Terroni Bar Centrale 
- Speducci Mercatto
- Pizzeria Defina
- Pizza e pazzi
- La Bettola di Terroni
- Ardo restaurant
- L' unità

For each restaurant, we get the Tips from Foursquare

### A. Let's explore Famiglia Baldassarre --

In [780]:
venue_id = '590390fc35811b1afa825dcc' # ID of Famiglia Baldassarre
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/590390fc35811b1afa825dcc?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [781]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'description', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'hours', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '590390fc35811b1afa825dcc',
 'name': 'Famiglia Baldassarre',
 'contact': {'phone': '6472935395',
  'formattedPhone': '(647) 293-5395',
  'twitter': 'fambaldassarre',
  'instagram': 'famiglia_baldassarre',
  'facebook': '1517083265279594',
  'facebookName': 'Pasta Fresca Baldassarre'},
 'location': {'address': '122 Geary Ave',
  'lat': 43.670388,
  'lng': -79.434999,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.670388,
    'lng': -79.434999}],
  'postalCode': 'M6H 4H1',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['122 Geary Ave', 'Toronto ON M6H 4H1', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/famiglia-baldassarre/590390fc35811b1afa825dcc',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': Tr

### C. Get the number of tips

In [783]:
result['response']['venue']['tips']['count']

1

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [784]:
## Famiglia Baldassarre
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c88808ad62e00398a2c0a'},
 'response': {'tips': {'count': 1,
   'items': [{'id': '590cdc868194fc3de954e58d',
     'createdAt': 1494015110,
     'text': 'In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/590cdc868194fc3de954e58d',
     'photo': {'id': '590cce2959fe5c461e16ab34',
      'createdAt': 1494011433,
      'source': {'name': 'Swarm for iOS', 'url': 'https://www.swarmapp.com'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
      'width': 1440,
      'height': 1920,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/510739_H2henYX98ZNXWFW6NgqTSggpn1QgXKouBUcyerN-Uro.jpg',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount'

#### Get tips and list of associated features

In [789]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [790]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"In addition to selling fresh pasta for take home, Baldassarre also offers lunch Tuesday-Friday using their pasta and gelato.",2,0,590cdc868194fc3de954e58d,Jordan,Yee,male,510739


# Best Restaurants according to Gambero Rosso

Here's the best italian restaurants in Toronto according to Gambero rosso:
- Famiglia Baldassarre
- Giulietta
- Il covo
- Terroni Bar Centrale 
- Speducci Mercatto
- Pizzeria Defina
- Pizza e pazzi
- La Bettola di Terroni
- Ardo restaurant
- L' unità

For each restaurant, we get the Tips from Foursquare

### A. Let's explore Giulietta --

In [791]:
venue_id = '5a4fe20a286fda0166f6b594' # ID of Famiglia Baldassarre
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/5a4fe20a286fda0166f6b594?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [792]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '5a4fe20a286fda0166f6b594',
 'name': 'Giulietta',
 'contact': {'twitter': 'giulietta_972',
  'instagram': 'giulietta972',
  'facebook': '668685959994560',
  'facebookUsername': 'giulietta972',
  'facebookName': 'Giulietta'},
 'location': {'address': '972 College St',
  'lat': 43.653481,
  'lng': -79.42769,
  'labeledLatLngs': [{'label': 'display', 'lat': 43.653481, 'lng': -79.42769}],
  'postalCode': 'M6H 1A5',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['972 College St', 'Toronto ON M6H 1A5', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/giulietta/5a4fe20a286fda0166f6b594',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats': {'tipCount': 4},
 'url': 'http://giu.ca',
 '

### C. Get the number of tips

In [793]:
result['response']['venue']['tips']['count']

4

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [794]:
## Giulietta
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8a0f8ad62e0039971d65'},
 'response': {'tips': {'count': 4,
   'items': [{'id': '5b38f7cef427de002ce2e6e0',
     'createdAt': 1530460110,
     'text': 'So good! The service was impeccable, great spritz cocktail menu and the food is outstanding. Try the Fennel salade, tartare and truffle braised goat.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/5b38f7cef427de002ce2e6e0',
     'photo': {'id': '5b38f7d16adbf5002c471ae5',
      'createdAt': 1530460113,
      'source': {'name': 'Foursquare for iOS',
       'url': 'https://foursquare.com/download/#/iphone'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/382136557_K4XqVOhpUFcTw02mYJ7wLmTDkVHVtXZ9HEmtzcUVTdA.jpg',
      'width': 1920,
      'height': 1440,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/382136557_K4XqVOhpUFcTw02mYJ7wLmTDkVHVtXZ9HEmtzcUVTdA.jpg',
     'lang': 'en',
     'likes': {'count'

#### Get tips and list of associated features

In [795]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'lastVoteText', 'lastUpvoteTimestamp', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [797]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"So good! The service was impeccable, great spritz cocktail menu and the food is outstanding. Try the Fennel salade, tartare and truffle braised goat.",2,0,5b38f7cef427de002ce2e6e0,Dounia,,female,382136557


### A. Let's explore Il covo --

In [798]:
venue_id = '5a4fe253a0215b4eadc86bdd' # ID of il covo
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/5a4fe253a0215b4eadc86bdd?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [799]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '5a4fe253a0215b4eadc86bdd',
 'name': 'Il Covo',
 'contact': {'phone': '4165307585',
  'formattedPhone': '(416) 530-7585',
  'instagram': 'ilcovo.to'},
 'location': {'address': '585 College St',
  'lat': 43.655161,
  'lng': -79.413314,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.655161,
    'lng': -79.413314}],
  'postalCode': 'M6G 1B2',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['585 College St', 'Toronto ON M6G 1B2', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/il-covo/5a4fe253a0215b4eadc86bdd',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True},
  {'id': '4bf58dd8d48988d123941735',
   'name': 'Wine Bar',
   'pluralName': 'Wine Bars',
   'shortName': 'Wine Bar',
   'icon': {'prefix': '

### C. Get the number of tips

In [800]:
result['response']['venue']['tips']['count']

2

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [801]:
## Il covo
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8b886f0aa2002ccebcfd'},
 'response': {'tips': {'count': 2,
   'items': [{'id': '5ac8bfa142d8c203d2ee34dd',
     'createdAt': 1523105697,
     'text': 'Fresh, creative food & a brilliant wine program to match. Presentation top notch and the service perfection. Make a reso. Aim for 3-5 dishes per person. The beef in liquorice, clams with lemonquat...',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/5ac8bfa142d8c203d2ee34dd',
     'photo': {'id': '5ac8bfa367e5f23fbeb9c10f',
      'createdAt': 1523105699,
      'source': {'name': 'Foursquare for iOS',
       'url': 'https://foursquare.com/download/#/iphone'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/146215510_1BYWfn1i1IEa723AILxHYQ_gA6-KOysQaCidLCv6a00.jpg',
      'width': 1440,
      'height': 1440,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/146215510_1BYWfn1i1IEa723AILxHYQ_gA6-KOysQaCidLCv6a

#### Get tips and list of associated features

In [802]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [806]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Fresh, creative food & a brilliant wine program to match. Presentation top notch and the service perfection. Make a reso. Aim for 3-5 dishes per person. The beef in liquorice, clams with lemonquat...",1,0,5ac8bfa142d8c203d2ee34dd,Xx,Xx,none,146215510


### A. Let's explore terroni bar centrale --

In [808]:
venue_id = '4d407afca799ba7a9bd9d160' # ID of il terroni bar centrale
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4d407afca799ba7a9bd9d160?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [809]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'popular', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '4d407afca799ba7a9bd9d160',
 'name': 'Bar Centrale by Terroni',
 'contact': {'phone': '4169254020',
  'formattedPhone': '(416) 925-4020',
  'twitter': 'terronito'},
 'location': {'address': '1095 Yonge St.',
  'crossStreet': 'at Price St.',
  'lat': 43.67990178371223,
  'lng': -79.39053550112058,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.67990178371223,
    'lng': -79.39053550112058}],
  'postalCode': 'M4W 2L7',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['1095 Yonge St. (at Price St.)',
   'Toronto ON M4W 2L7',
   'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/bar-centrale-by-terroni/4d407afca799ba7a9bd9d160',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True},
  {'id': '4bf58dd8d48988d

### C. Get the number of tips

In [810]:
result['response']['venue']['tips']['count']

34

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [811]:
## Terroni Bar centrale
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8c40697e3a0025718a5f'},
 'response': {'tips': {'count': 34,
   'items': [{'id': '54cd3bc4498e679b6483b687',
     'createdAt': 1422736324,
     'text': 'I love the vibe on the main floor. Great light streaming in. A very Italian feel. Must try the spicy tuna tartar and any and all pastas. To see more pics have a look at my instagram @Joannasable',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/54cd3bc4498e679b6483b687',
     'photo': {'id': '54cd3bc6498e90f73e91c75a',
      'createdAt': 1422736326,
      'source': {'name': 'Foursquare for iOS',
       'url': 'https://foursquare.com/download/#/iphone'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/81374730_AWKochOeEgnWcFw5zihq2hsAsUJNG5GGLhWJUJOA424.jpg',
      'width': 1439,
      'height': 1920,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/81374730_AWKochOeEgnWcFw5zihq2hsAsUJNG5GGLhWJUJOA424.jpg

#### Get tips and list of associated features

In [812]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [818]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,I love the vibe on the main floor. Great light streaming in. A very Italian feel. Must try the spicy tuna tartar and any and all pastas. To see more pics have a look at my instagram @Joannasable,0,0,54cd3bc4498e679b6483b687,Joanna,Sable,female,81374730


### A. Let's explore Speducci Mercatto --

In [821]:
venue_id = '53ecb865498e33680fdb6306' # ID of Speducci Mercatto
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/53ecb865498e33680fdb6306?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [822]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'price', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '53ecb865498e33680fdb6306',
 'name': 'Speducci',
 'contact': {},
 'location': {'address': '46 Milford Avenue',
  'lat': 43.70472653273358,
  'lng': -79.47285763431462,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.70472653273358,
    'lng': -79.47285763431462}],
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['46 Milford Avenue', 'Toronto ON', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/speducci/53ecb865498e33680fdb6306',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats': {'tipCount': 3},
 'price': {'tier': 2, 'message': 'Moderate', 'currency': '$'},
 'likes': {'count': 7,
  'groups': [{'type': 'others',
    'count': 7,
    'items': [{'id': '83830',
      'firs

### C. Get the number of tips

In [823]:
result['response']['venue']['tips']['count']

3

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [824]:
#Speducci mercatto
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8d526bdee60039024494'},
 'response': {'tips': {'count': 3,
   'items': [{'id': '59c3a1141ffed76478ebf064',
     'createdAt': 1505992980,
     'text': 'They have wild boar salami and a range of deli choices.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/59c3a1141ffed76478ebf064',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount': 0,
     'disagreeCount': 0,
     'todo': {'count': 0},
     'user': {'id': '3502173',
      'firstName': 'Alina',
      'lastName': 'D',
      'gender': 'female',
      'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       'suffix': '/MIOVW2H3SPJ2MI5L.jpg'}},
     'authorInteractionType': 'meh'}]}}}

#### Get tips and list of associated features

In [825]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [826]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,They have wild boar salami and a range of deli choices.,0,0,59c3a1141ffed76478ebf064,Alina,D,female,3502173


### A. Let's explore Pizzeria Defina --

In [827]:
venue_id = '4da70d0b90a00b5184013400' # ID of Speducci Mercatto
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4da70d0b90a00b5184013400?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [828]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'venuePage', 'reasons', 'page', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'popular', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '4da70d0b90a00b5184013400',
 'name': 'Pizzeria Defina',
 'contact': {'phone': '4165344414', 'formattedPhone': '(416) 534-4414'},
 'location': {'address': '321 Roncesvalles Avenue',
  'lat': 43.648722638277675,
  'lng': -79.44985455915354,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.648722638277675,
    'lng': -79.44985455915354}],
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['321 Roncesvalles Avenue', 'Toronto ON', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/pizzeria-defina/4da70d0b90a00b5184013400',
 'categories': [{'id': '4bf58dd8d48988d1ca941735',
   'name': 'Pizza Place',
   'pluralName': 'Pizza Places',
   'shortName': 'Pizza',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/pizza_',
    'suffix': '.png'},
   'primary': True},
  {'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix

### C. Get the number of tips

In [829]:
result['response']['venue']['tips']['count']

25

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [830]:
#Pizzeria Defina
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8de7933c010023e8cde3'},
 'response': {'tips': {'count': 25,
   'items': [{'id': '5cce5468340a58002cd95d3d',
     'createdAt': 1557025896,
     'text': 'This place ticks all the right boxes, friendly staff, great food, good vibes and good atmosphere. Sure to go again soon and take friends with 🤗',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/5cce5468340a58002cd95d3d',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount': 1,
     'disagreeCount': 0,
     'lastVoteText': 'Upvoted May 5',
     'lastUpvoteTimestamp': 1557060378,
     'todo': {'count': 0},
     'user': {'id': '383107199',
      'firstName': 'Aydın',
      'lastName': 'Sarucan',
      'gender': 'male',
      'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       'suffix': '/383107199_k6uV-0Jh_-6kfIS06CIewTxcCW6OQMmKGWB-VDv2ZTXcc0ZKZ2F1e1MtaT_2pCCjLmnSFUV1X.jpg'}},
     'authorInteractionType': 'liked'}]}

#### Get tips and list of associated features

In [831]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'lastVoteText', 'lastUpvoteTimestamp', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [833]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"This place ticks all the right boxes, friendly staff, great food, good vibes and good atmosphere. Sure to go again soon and take friends with 🤗",1,0,5cce5468340a58002cd95d3d,Aydın,Sarucan,male,383107199


### A. Let's explore Pizza e pazzi --

In [836]:
venue_id = '4dd5dc2445ddced820612749' # ID of Pizza e Pazzi
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4dd5dc2445ddced820612749?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [837]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'venuePage', 'reasons', 'description', 'storeId', 'page', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'hours', 'popular', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '4dd5dc2445ddced820612749',
 'name': 'Pizza e Pazzi',
 'contact': {'phone': '4166519999',
  'formattedPhone': '(416) 651-9999',
  'twitter': 'pizzaepazzi',
  'instagram': 'pizza_e_pazzi'},
 'location': {'address': '1182 St. Clair Ave. W',
  'lat': 43.67782147238314,
  'lng': -79.44372401169342,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.67782147238314,
    'lng': -79.44372401169342}],
  'postalCode': 'M6E 1B4',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['1182 St. Clair Ave. W',
   'Toronto ON M6E 1B4',
   'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/pizza-e-pazzi/4dd5dc2445ddced820612749',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True},
  {'id': '4bf58dd8d48988d1ca941735',
   'name

### C. Get the number of tips

In [838]:
result['response']['venue']['tips']['count']

31

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [839]:
#Pizza e pazzi
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8ed67a1a610025056ce9'},
 'response': {'tips': {'count': 31,
   'items': [{'id': '590bf77935811b21e3bf0f94',
     'createdAt': 1493956473,
     'text': 'Making Every Sunday Family Special !**Join us for Sunday Lunch 3 Course set menu !**Sundays are for relaxing, so let us do the cooking !',
     'type': 'merchant_special',
     'finePrint': 'Free Dessert for kids !',
     'canonicalUrl': 'https://foursquare.com/item/590bf77935811b21e3bf0f94',
     'photo': {'id': '58bcc8495c1ba26a238f1b4f',
      'createdAt': 1488767049,
      'source': {'name': 'Foursquare Web', 'url': 'https://foursquare.com'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/407415870_empvl23nd97dL2aVOtdUSRWoHslhQJOP0_cFJaiCw_g.jpg',
      'width': 4608,
      'height': 2592,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/407415870_empvl23nd97dL2aVOtdUSRWoHslhQJOP0_cFJaiCw_g.jpg',
     'lang': 'en',
     '

#### Get tips and list of associated features

In [840]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'finePrint', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'startAt', 'agreeCount', 'disagreeCount', 'todo', 'user'])

#### Format column width and display all tips

In [842]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Making Every Sunday Family Special !**Join us for Sunday Lunch 3 Course set menu !**Sundays are for relaxing, so let us do the cooking !",0,0,590bf77935811b21e3bf0f94,Pizza e Pazzi,,none,407415870


### A. Let's explore La Bettola di Terroni --

In [843]:
venue_id = '4c251062db519521621d2c3a' # ID of La bettola di terroni
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4c251062db519521621d2c3a?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [844]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'popular', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '4c251062db519521621d2c3a',
 'name': 'La Bettola Di Terroni',
 'contact': {'phone': '4165041992',
  'formattedPhone': '(416) 504-1992',
  'twitter': 'osteria_bettola'},
 'location': {'address': '106 Victoria St',
  'lat': 43.65199330592565,
  'lng': -79.37805579614488,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.65199330592565,
    'lng': -79.37805579614488}],
  'postalCode': 'M5C 2B4',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['106 Victoria St', 'Toronto ON M5C 2B4', 'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/la-bettola-di-terroni/4c251062db519521621d2c3a',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats': {'tipCount': 36},
 'url': 'http://labettola.

### C. Get the number of tips

In [845]:
result['response']['venue']['tips']['count']

36

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [846]:
#La bettola di Terroni
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c8f9cdb1d810039e2f99e'},
 'response': {'tips': {'count': 36,
   'items': [{'id': '57d5d9b6498ed33ac1e6a2f9',
     'createdAt': 1473632694,
     'text': 'Consistently great food. The classic fungi salad is a must and always a pizza. The rest can change. Do go, a Toronto institution',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/57d5d9b6498ed33ac1e6a2f9',
     'photo': {'id': '57d5d9b6498ed33ac1e6a2f8',
      'createdAt': 1473632694,
      'source': {'name': 'Instagram', 'url': 'http://instagram.com'},
      'prefix': 'https://fastly.4sqi.net/img/general/',
      'suffix': '/81374730_nzftmb6mppgWZ1ntKdHCwbu2UMS6iIunjpwLf_btX4Y.jpg',
      'width': 1080,
      'height': 1080,
      'visibility': 'public'},
     'photourl': 'https://fastly.4sqi.net/img/general/original/81374730_nzftmb6mppgWZ1ntKdHCwbu2UMS6iIunjpwLf_btX4Y.jpg',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount

#### Get tips and list of associated features

In [847]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'photo', 'photourl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [848]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,"Consistently great food. The classic fungi salad is a must and always a pizza. The rest can change. Do go, a Toronto institution",1,0,57d5d9b6498ed33ac1e6a2f9,Joanna,Sable,female,81374730


### A. Let's explore Ardo Restaurant --

In [849]:
venue_id = '56d8dff7498eb4e5e661e78d' # ID of Ardo Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/56d8dff7498eb4e5e661e78d?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [850]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '56d8dff7498eb4e5e661e78d',
 'name': 'Ardo',
 'contact': {'twitter': 'ardorestaurant',
  'facebook': '992305787529920',
  'facebookUsername': 'ardorestaurant',
  'facebookName': 'ARDO Restaurant'},
 'location': {'address': '243 King St E',
  'crossStreet': 'at Lower Sherbourne St',
  'lat': 43.6512011278776,
  'lng': -79.36835027595798,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.6512011278776,
    'lng': -79.36835027595798}],
  'postalCode': 'M5A 1J9',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['243 King St E (at Lower Sherbourne St)',
   'Toronto ON M5A 1J9',
   'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/ardo/56d8dff7498eb4e5e661e78d',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': Tr

### C. Get the number of tips

In [851]:
result['response']['venue']['tips']['count']

8

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [852]:
#Ardo Restaurant
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c9045f8953d0025990595'},
 'response': {'tips': {'count': 8,
   'items': [{'id': '5c3e8382ab42d9002c740a4c',
     'createdAt': 1547600770,
     'text': 'Had the achiugge (grilled anchovies on crostini) and ARDO salsiccia (perfect tomatoes and amount of spice).Extremely attentive and friendly service. Owner/chef and waiters were exceptional.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/5c3e8382ab42d9002c740a4c',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount': 0,
     'disagreeCount': 0,
     'todo': {'count': 0},
     'user': {'id': '167268074',
      'firstName': 'Tracy',
      'lastName': 'Zhao',
      'gender': 'female',
      'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       'suffix': '/167268074-SUXBARJKNLWTSDXM.jpg'}},
     'authorInteractionType': 'liked'}]}}}

#### Get tips and list of associated features

In [853]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [854]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,Had the achiugge (grilled anchovies on crostini) and ARDO salsiccia (perfect tomatoes and amount of spice).Extremely attentive and friendly service. Owner/chef and waiters were exceptional.,0,0,5c3e8382ab42d9002c740a4c,Tracy,Zhao,female,167268074


### A. Let's explore L'unità--

In [855]:
venue_id = '4ad4c05cf964a520f1f520e3' # ID of L'unità
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4ad4c05cf964a520f1f520e3?client_id=DQ1DDA4QGNWZ3AXGNDJAWKCL0TWAA25XZ0JIFZ1P2ILHVEAL&client_secret=G3WSZBQQS41HO3YXCFI5RIVAB5GETLHYDI5LKF4GKNIEKUOP&v=20180605'

#### Send GET request for result

In [856]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'url', 'price', 'hasMenu', 'likes', 'dislike', 'ok', 'rating', 'ratingColor', 'ratingSignals', 'menu', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'description', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'hours', 'popular', 'pageUpdates', 'inbox', 'attributes', 'bestPhoto', 'colors'])


{'id': '4ad4c05cf964a520f1f520e3',
 'name': "L'Unita Restaurant",
 'contact': {'phone': '4169648686',
  'formattedPhone': '(416) 964-8686',
  'twitter': 'lunita_enoteca'},
 'location': {'address': '134 Avenue Rd.',
  'crossStreet': 'Davenport Rd',
  'lat': 43.674387404879205,
  'lng': -79.39648826123504,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.674387404879205,
    'lng': -79.39648826123504}],
  'postalCode': 'M5R 2H6',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['134 Avenue Rd. (Davenport Rd)',
   'Toronto ON M5R 2H6',
   'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/lunita-restaurant/4ad4c05cf964a520f1f520e3',
 'categories': [{'id': '4bf58dd8d48988d110941735',
   'name': 'Italian Restaurant',
   'pluralName': 'Italian Restaurants',
   'shortName': 'Italian',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats'

### C. Get the number of tips

In [857]:
result['response']['venue']['tips']['count']

14

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**`&limit=`**LIMIT**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [858]:
#l' unità
limit = 15 # set limit to be greater than or equal to the total number of tips
url = 'https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION, limit)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d3c90ebbcbf7a003903bcb7'},
 'response': {'tips': {'count': 14,
   'items': [{'id': '547ff094498e38761bfb42de',
     'createdAt': 1417670804,
     'text': 'Amazing food and service. It has a really friendly atmosphere but can get loud with the hum of conversation. Great caesar salad and amazing pork loin with seasonal veggies. Nice wine list.',
     'type': 'user',
     'canonicalUrl': 'https://foursquare.com/item/547ff094498e38761bfb42de',
     'lang': 'en',
     'likes': {'count': 0, 'groups': []},
     'logView': True,
     'agreeCount': 0,
     'disagreeCount': 0,
     'todo': {'count': 0},
     'user': {'id': '3875010',
      'firstName': 'Denise',
      'gender': 'female',
      'photo': {'prefix': 'https://fastly.4sqi.net/img/user/',
       'suffix': '/1OPPKBK2FI5EAH0A.jpg'}},
     'authorInteractionType': 'liked'}]}}}

#### Get tips and list of associated features

In [859]:
tips = results['response']['tips']['items']

tip = results['response']['tips']['items'][0]
tip.keys()

dict_keys(['id', 'createdAt', 'text', 'type', 'canonicalUrl', 'lang', 'likes', 'logView', 'agreeCount', 'disagreeCount', 'todo', 'user', 'authorInteractionType'])

#### Format column width and display all tips

In [860]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ['text', 'agreeCount', 'disagreeCount', 'id', 'user.firstName', 'user.lastName', 'user.gender', 'user.id']
tips_filtered = tips_df.loc[:, filtered_columns]

# display tips
tips_filtered

Unnamed: 0,text,agreeCount,disagreeCount,id,user.firstName,user.lastName,user.gender,user.id
0,Amazing food and service. It has a really friendly atmosphere but can get loud with the hum of conversation. Great caesar salad and amazing pork loin with seasonal veggies. Nice wine list.,0,0,547ff094498e38761bfb42de,Denise,,female,3875010
