In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json
import requests
from pandas.io.json import json_normalize

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Users/ibrahimramadan/opt/anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         687 KB

The following NEW packages will be INSTALLED:

  altair             conda-forge/noarch::altair-4.1.0-py_1
  vincent            conda-forge/noarc

# First Download the NYC dataset

## 5 Boroughs and 306 neighborhoods

In [2]:
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

Data downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

newyork_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [4]:
neighborhoods_data = newyork_data['features']

In [5]:
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

## Import json data into a pandas dataframe

In [6]:
columnss = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']

neighborhoods = pd.DataFrame(columns=columnss)

In [7]:
# Won't run an empty dataframe
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


In [8]:
# Fill the dataframe
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [9]:
# Examine the dataframe
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [10]:
print("Boroughs: {}".format(len(neighborhoods["Borough"].unique())))
print("Neighborhoods: {}".format(neighborhoods.shape[0]))

Boroughs: 5
Neighborhoods: 306


# Visualizing NYC

In [11]:
lat_nyc = 40.7128
long_nyc = -74.0060

map_newyork = folium.Map(location=[lat_nyc, long_nyc], zoom_start=10)

# Markers on the map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

# Now lets get the data for Toronto

In [12]:
# import beautiful soup to scrape the data out of the website
#from bs4 import BeautifulSoup
print(5)

5


### Scrape the webpage contents off the sites.

In [13]:
from bs4 import BeautifulSoup

In [14]:
scrape_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' 
page = requests.get(scrape_url)
soup = BeautifulSoup(page.content, 'html.parser') 
print(soup)

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"YDeAj8sayba4-hwRD6ixBwAAAIc","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":1008658788,"wgRevisionId":1008658788,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communications in Ontario","Postal 

In [15]:
data_table = str(soup.table)
print(data_table)


<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td

### Convert the object into a dataframe

In [16]:
df_list = pd.read_html(data_table)
df = df_list[0]
df.columns = df.columns.str.replace(' ', '_') 
df.head()

Unnamed: 0,Postal_Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [17]:
# Ignore cells with a borough that is Not assigned
df = df[df.Borough != 'Not assigned']

# Combine neighborhood with similar postal codes
df = df.groupby(['Postal_Code','Borough'], sort=False).agg(', '.join)
df.reset_index(inplace=True)

# Repace Not assigned neighnourhood with the borough.
df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned',df['Borough'], df['Neighbourhood'])
df

Unnamed: 0,Postal_Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [18]:
df.head()

Unnamed: 0,Postal_Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [19]:
# Get geospactial data
df_geo = pd.read_csv('https://cocl.us/Geospatial_data')
df_geo.columns = df_geo.columns.str.replace(' ', '_') 
df_geo.head()

Unnamed: 0,Postal_Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
# Merge the datasets
df_merge = pd.merge(df, df_geo, on='Postal_Code')
df_merge.head()

Unnamed: 0,Postal_Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [21]:
# Only the Toronto rows
df_toronto = df_merge[df_merge['Borough'].str.contains('Toronto')]

In [22]:
#  Remove the Postal Code column and rename neighbourhood to neighborhood
df_toronto.drop(['Postal_Code'], axis=1, inplace=True)
df_toronto.rename(columns={"Neighbourhood": "Neighborhood"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [23]:
df_toronto.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,Downtown Toronto,St. James Town,43.651494,-79.375418
19,East Toronto,The Beaches,43.676357,-79.293031


### Visualize Toronto

In [24]:
latitude_toronto = 43.651070
longitude_toronto = -79.347015
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# add markers to map
for lat, lng, bor, neigh in zip(df_toronto['Latitude'],df_toronto['Longitude'],df_toronto['Borough'],df_toronto['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Define foursquare Credentials

In [25]:
CLIENT_ID = 'TFK2VE5CVJIFIPPZBIXRTT2HO3HTSEJXVVUFOQKPPJ5EB1OC' # your Foursquare ID
CLIENT_SECRET = 'LYBACES0OZDIALSEPAHMBCNYA20AT1BBO5LF5Y2WCPYRCGKO' # your Foursquare Secret
ACCESS_TOKEN = 'AF4C3P1WNAOBLTGWV1BX33CYBFUDNYJW0A2HZRMOIM2MBDVN'
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TFK2VE5CVJIFIPPZBIXRTT2HO3HTSEJXVVUFOQKPPJ5EB1OC
CLIENT_SECRET:LYBACES0OZDIALSEPAHMBCNYA20AT1BBO5LF5Y2WCPYRCGKO


In [26]:
# Define a function that gets nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# Retrieve Venues Nearby

In [27]:
toronto_venues = getNearbyVenues(df_toronto['Neighborhood'], df_toronto['Latitude'], df_toronto['Longitude'])
toronto_venues.head()

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [28]:
# Check the size
print(toronto_venues.shape)

(1599, 7)


In [29]:
nyc_venues = getNearbyVenues(neighborhoods['Neighborhood'], neighborhoods['Latitude'], neighborhoods['Longitude'])
nyc_venues.head()

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
2,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
3,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
4,Wakefield,40.894705,-73.847201,Subway,40.890468,-73.849152,Sandwich Place


In [30]:
print(nyc_venues.shape)

(10078, 7)


## Lets check how many Venues Per neighborhood
### Also I want to see which neighborhoods have the most venues

In [31]:
toronto_venues.groupby('Neighborhood').count().sort_values(by="Neighborhood Latitude", ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Toronto Dominion Centre, Design Exchange",100,100,100,100,100,100
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East, Union Station, Toronto Islands",100,100,100,100,100,100
Stn A PO Boxes,99,99,99,99,99,99
"Richmond, Adelaide, King",94,94,94,94,94,94
St. James Town,82,82,82,82,82,82
Church and Wellesley,77,77,77,77,77,77
Central Bay Street,65,65,65,65,65,65


In [32]:
nyc_venues.groupby('Neighborhood').count().sort_values(by="Neighborhood Latitude", ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Murray Hill,147,147,147,147,147,147
Chelsea,107,107,107,107,107,107
Lenox Hill,100,100,100,100,100,100
Little Italy,100,100,100,100,100,100
Chinatown,100,100,100,100,100,100
Civic Center,100,100,100,100,100,100
Clinton,100,100,100,100,100,100
Downtown,100,100,100,100,100,100
East Village,100,100,100,100,100,100
Financial District,100,100,100,100,100,100


### How many unique categories for each city?

In [33]:
print("NYC: {}".format(len(nyc_venues['Venue Category'].unique())))
print("Toronto: {}".format(len(toronto_venues['Venue Category'].unique())))

NYC: 437
Toronto: 232


### Lets Concatenate the two dataframes

In [34]:
#first lets add a new column called city
nyc_venues["City"] = "NYC"
toronto_venues["City"] = "Toronto"

dataframes = [nyc_venues, toronto_venues]
df_conc = pd.concat(dataframes)

In [35]:
print(df_conc.head())

  Neighborhood  Neighborhood Latitude  Neighborhood Longitude  \
0    Wakefield              40.894705              -73.847201   
1    Wakefield              40.894705              -73.847201   
2    Wakefield              40.894705              -73.847201   
3    Wakefield              40.894705              -73.847201   
4    Wakefield              40.894705              -73.847201   

              Venue  Venue Latitude  Venue Longitude  Venue Category City  
0  Lollipops Gelato       40.894123       -73.845892    Dessert Shop  NYC  
1          Rite Aid       40.896649       -73.844846        Pharmacy  NYC  
2         Walgreens       40.896528       -73.844700        Pharmacy  NYC  
3  Carvel Ice Cream       40.890487       -73.848568  Ice Cream Shop  NYC  
4            Subway       40.890468       -73.849152  Sandwich Place  NYC  


In [36]:
# check the size
print(df_conc.shape)

(11677, 8)


# Lets Analyze each neighborhood

In [37]:
# One hot encoding
cities_onehot = pd.get_dummies(df_conc[["Venue Category"]], prefix="", prefix_sep="")

# Re-add neighborhood and city column back
cities_onehot["Neighborhood"] = df_conc["Neighborhood"]
cities_onehot["City"] = df_conc["City"]
print(cities_onehot.head())

   Accessories Store  Adult Boutique  Afghan Restaurant  African Restaurant  \
0                  0               0                  0                   0   
1                  0               0                  0                   0   
2                  0               0                  0                   0   
3                  0               0                  0                   0   
4                  0               0                  0                   0   

   Airport  Airport Food Court  Airport Gate  Airport Lounge  Airport Service  \
0        0                   0             0               0                0   
1        0                   0             0               0                0   
2        0                   0             0               0                0   
3        0                   0             0               0                0   
4        0                   0             0               0                0   

   Airport Terminal  American Restaura

In [38]:
# Add city to the front
fixed_columns = [cities_onehot.columns[-1]] + list(cities_onehot.columns[:-1])
cities_onehot = cities_onehot[fixed_columns]

print(cities_onehot.head())

  City  Accessories Store  Adult Boutique  Afghan Restaurant  \
0  NYC                  0               0                  0   
1  NYC                  0               0                  0   
2  NYC                  0               0                  0   
3  NYC                  0               0                  0   
4  NYC                  0               0                  0   

   African Restaurant  Airport  Airport Food Court  Airport Gate  \
0                   0        0                   0             0   
1                   0        0                   0             0   
2                   0        0                   0             0   
3                   0        0                   0             0   
4                   0        0                   0             0   

   Airport Lounge  Airport Service  Airport Terminal  American Restaurant  \
0               0                0                 0                    0   
1               0                0                 0

In [78]:
# Add Neighborhoods to the front
temp = cities_onehot['Neighborhood']
cities_onehot .drop(labels=['Neighborhood'], axis=1,inplace = True)
cities_onehot.insert(0, 'Neighborhood', temp)

cities_onehot.head()

Unnamed: 0,Neighborhood,City,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Border Crossing,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Check Cashing Service,Cheese Shop,Child Care Service,Chinese Restaurant,Chocolate Shop,Christmas Market,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Auditorium,College Basketball Court,College Bookstore,College Cafeteria,College Gym,College Rec Center,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dosa Place,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,Frame Store,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Go Kart Track,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,Herbs & Spices Store,High School,Hill,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Indoor Play Area,Insurance Office,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Lebanese Restaurant,Library,Light Rail Station,Lingerie Store,Liquor Store,Locksmith,Lounge,Luggage Store,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mattress Store,Medical Center,Medical Supply Store,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Nightlife Spot,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Gym,Outdoor Sculpture,Outdoors & Recreation,Outlet Mall,Outlet Store,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Piano Bar,Pie Shop,Pier,Piercing Parlor,Pilates Studio,Pizza Place,Plane,Platform,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Portuguese Restaurant,Post Office,Poutine Place,Print Shop,Pub,Public Art,Puerto Rican Restaurant,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roller Rink,Romanian Restaurant,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Stadium,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swim School,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Toll Plaza,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Wakefield,NYC,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,NYC,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Wakefield,NYC,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wakefield,NYC,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,NYC,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [40]:
print(cities_onehot.shape)

(11677, 462)


In [41]:
# Groupby neighborhoods by getting the mean
cities_grouped = cities_onehot.groupby("Neighborhood").mean().reset_index()
print(cities_grouped.head())

    Neighborhood  Accessories Store  Adult Boutique  Afghan Restaurant  \
0       Allerton                0.0             0.0                0.0   
1       Annadale                0.0             0.0                0.0   
2  Arden Heights                0.0             0.0                0.0   
3      Arlington                0.0             0.0                0.0   
4       Arrochar                0.0             0.0                0.0   

   African Restaurant  Airport  Airport Food Court  Airport Gate  \
0                 0.0      0.0                 0.0           0.0   
1                 0.0      0.0                 0.0           0.0   
2                 0.0      0.0                 0.0           0.0   
3                 0.0      0.0                 0.0           0.0   
4                 0.0      0.0                 0.0           0.0   

   Airport Lounge  Airport Service  Airport Terminal  American Restaurant  \
0             0.0              0.0               0.0             0.00

In [76]:
# Group by respective cities to see venues
city_group = cities_onehot.groupby("City").mean().reset_index()
city_group.head()

Unnamed: 0,City,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Auto Garage,Auto Workshop,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Border Crossing,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Check Cashing Service,Cheese Shop,Child Care Service,Chinese Restaurant,Chocolate Shop,Christmas Market,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Auditorium,College Basketball Court,College Bookstore,College Cafeteria,College Gym,College Rec Center,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Distribution Center,Dive Bar,Doctor's Office,Dog Run,Doner Restaurant,Donut Shop,Dosa Place,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Eye Doctor,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,Frame Store,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Go Kart Track,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,Herbs & Spices Store,High School,Hill,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hospital,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Indoor Play Area,Insurance Office,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Lebanese Restaurant,Library,Light Rail Station,Lingerie Store,Liquor Store,Locksmith,Lounge,Luggage Store,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mattress Store,Medical Center,Medical Supply Store,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Nightlife Spot,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Gym,Outdoor Sculpture,Outdoors & Recreation,Outlet Mall,Outlet Store,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Piano Bar,Pie Shop,Pier,Piercing Parlor,Pilates Studio,Pizza Place,Plane,Platform,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Portuguese Restaurant,Post Office,Poutine Place,Print Shop,Pub,Public Art,Puerto Rican Restaurant,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roller Rink,Romanian Restaurant,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Stadium,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swim School,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Toll Plaza,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,NYC,0.001191,0.000298,0.000298,0.000496,0.0,0.0,0.0,0.0,0.0,9.9e-05,0.016769,0.000496,0.0,0.000198,0.000794,0.000893,0.004763,0.000595,0.002084,0.000198,0.004763,0.001885,9.9e-05,0.000397,0.000198,9.9e-05,0.000198,0.000397,0.00258,9.9e-05,0.011312,0.02312,0.014487,0.022127,0.001985,0.000298,0.001191,0.0,9.9e-05,0.004465,9.9e-05,0.0,0.001488,0.001588,0.000496,0.0,0.000496,0.000695,0.000198,0.000397,0.000695,0.000695,0.001488,0.004763,9.9e-05,0.003671,0.000893,0.001488,0.000595,0.003671,0.001488,0.000198,0.000397,0.003274,0.000397,0.000695,0.008037,9.9e-05,0.000893,0.000794,0.00774,0.00635,0.000695,0.000695,0.000198,0.017563,0.000992,9.9e-05,9.9e-05,0.001389,0.000397,0.008137,9.9e-05,0.000496,0.000893,9.9e-05,0.021731,0.000397,9.9e-05,0.000198,9.9e-05,0.000397,0.008633,0.000298,0.009327,0.03076,0.000298,9.9e-05,0.0,9.9e-05,9.9e-05,0.000298,0.0,0.0,0.000198,0.000496,0.000298,9.9e-05,0.000198,0.000992,0.00129,0.004763,9.9e-05,0.008633,0.000198,0.000893,0.001885,9.9e-05,0.000695,0.002183,9.9e-05,0.002878,9.9e-05,0.027486,0.002381,0.000298,0.006251,0.000893,0.01022,0.004961,0.000198,0.0,0.00129,0.000298,0.002282,0.0,0.015578,9.9e-05,9.9e-05,0.000695,0.000893,9.9e-05,0.00129,0.001687,0.000397,9.9e-05,0.000198,0.0,0.000298,0.000198,0.00129,0.000794,9.9e-05,0.000198,0.001488,0.000397,0.002282,0.008732,0.000198,0.000992,0.000397,0.001191,0.000496,0.00258,0.002977,0.002679,0.001191,0.000496,0.004564,0.000298,9.9e-05,0.006847,0.007541,0.001091,0.000695,0.004564,0.000198,0.001191,0.000595,0.002381,0.001588,0.000298,0.000397,0.0,0.000595,0.003076,9.9e-05,9.9e-05,0.000298,0.003969,0.003771,0.019151,0.012106,0.012304,9.9e-05,0.000298,0.000298,0.000893,0.000695,0.000496,0.001885,0.000992,0.000198,9.9e-05,9.9e-05,9.9e-05,0.000298,0.000893,0.001191,0.000595,0.001191,0.00129,0.0,0.000397,0.000397,0.010022,0.001786,9.9e-05,0.001191,0.000198,0.012304,0.00645,0.001091,0.000893,9.9e-05,9.9e-05,9.9e-05,0.002183,0.000397,0.000298,0.030562,0.000298,0.009129,0.000794,0.001488,0.000397,0.005259,0.001191,0.000298,0.001885,0.000198,0.00774,0.000397,0.000198,0.008137,0.000695,0.000397,0.000298,9.9e-05,9.9e-05,0.000298,0.0,0.000893,0.007938,9.9e-05,0.004068,9.9e-05,0.000496,0.001687,0.002183,0.000695,0.001389,9.9e-05,0.000198,0.004564,0.000397,0.001488,0.002282,0.017365,0.003572,9.9e-05,0.001091,0.006152,0.000198,9.9e-05,0.000794,0.000198,0.000198,0.000298,0.000992,0.000992,9.9e-05,0.001389,0.000198,0.000496,0.001687,0.001588,0.003473,0.000298,0.001588,9.9e-05,0.000198,0.001389,0.000298,0.000397,0.000298,0.001786,0.000695,0.000397,9.9e-05,0.000198,9.9e-05,0.000298,0.000298,9.9e-05,0.000198,0.000198,9.9e-05,0.001191,0.017265,0.000298,0.00258,0.000198,0.002381,0.000298,0.000198,0.004167,0.016769,9.9e-05,9.9e-05,0.000198,0.000397,0.000496,0.000198,0.001191,0.044453,0.0,9.9e-05,0.008335,0.00387,0.000695,0.000496,0.001091,0.000397,0.0,0.000298,0.0,9.9e-05,0.004366,0.000198,9.9e-05,9.9e-05,0.00258,0.001191,9.9e-05,9.9e-05,0.001488,0.000298,0.000992,0.000198,0.000198,0.00774,0.000198,9.9e-05,9.9e-05,0.000794,9.9e-05,9.9e-05,0.001091,0.000695,0.000298,0.00258,0.004068,0.01796,9.9e-05,0.002977,0.000794,0.000198,0.007938,9.9e-05,0.000893,0.001985,0.003274,0.000198,0.00129,9.9e-05,0.000397,0.000397,9.9e-05,0.00129,0.000198,0.001191,9.9e-05,0.000397,0.000198,0.000298,0.002084,0.000198,0.00129,9.9e-05,0.009228,0.006152,0.000893,0.002679,0.00129,0.000496,9.9e-05,9.9e-05,0.0,9.9e-05,9.9e-05,0.003671,0.000595,0.000198,9.9e-05,0.010121,0.002183,0.000496,0.011014,0.0,9.9e-05,0.000595,0.002878,0.000496,0.000496,0.000198,0.001488,0.000595,0.001588,9.9e-05,0.00129,0.000298,9.9e-05,0.007938,0.003175,9.9e-05,0.000397,0.0,0.001985,0.000198,0.000298,0.000198,0.000198,0.001588,9.9e-05,0.00129,9.9e-05,0.001687,0.001588,0.000198,0.000397,0.000198,9.9e-05,0.003969,9.9e-05,9.9e-05,0.002084,0.001191,0.003671,9.9e-05,0.000198,9.9e-05,0.000198,0.000198,0.000397,0.006053,0.00903,0.001091,0.002778,0.007045
1,Toronto,0.0,0.0,0.0,0.0,0.000625,0.000625,0.000625,0.001251,0.001876,0.000625,0.013133,0.001876,0.003127,0.0,0.0,0.0,0.007505,0.001251,0.001251,0.0,0.008755,0.000625,0.0,0.0,0.0,0.0,0.000625,0.0,0.001251,0.000625,0.002502,0.022514,0.007505,0.015635,0.0,0.001251,0.0,0.002502,0.0,0.000625,0.0,0.000625,0.010632,0.0,0.002502,0.001251,0.0,0.000625,0.0,0.0,0.003127,0.0,0.000625,0.010006,0.0,0.000625,0.0,0.0,0.001251,0.010632,0.007505,0.0,0.0,0.005003,0.0,0.0,0.006254,0.0,0.006254,0.000625,0.0,0.0,0.0,0.000625,0.0,0.052533,0.000625,0.0,0.0,0.000625,0.0,0.003127,0.0,0.0,0.003752,0.0,0.004378,0.000625,0.0,0.000625,0.0,0.000625,0.013133,0.0,0.009381,0.092558,0.0,0.000625,0.000625,0.0,0.0,0.000625,0.000625,0.000625,0.001251,0.0,0.003752,0.001876,0.0,0.005629,0.0,0.001876,0.0,0.006254,0.000625,0.005003,0.001251,0.0,0.001251,0.0,0.0,0.001251,0.0,0.008755,0.005629,0.0,0.007505,0.0,0.009381,0.001876,0.0,0.002502,0.0,0.0,0.001251,0.000625,0.001251,0.0,0.0,0.0,0.000625,0.0,0.001876,0.003752,0.0,0.0,0.0,0.000625,0.000625,0.0,0.001251,0.0,0.0,0.0,0.001251,0.0,0.006879,0.008755,0.0,0.000625,0.001876,0.003127,0.000625,0.000625,0.0,0.001251,0.002502,0.0,0.001876,0.002502,0.0,0.006254,0.003752,0.000625,0.000625,0.003752,0.001251,0.001876,0.000625,0.001251,0.010632,0.002502,0.001251,0.001251,0.000625,0.003752,0.002502,0.0,0.0,0.003127,0.008755,0.009381,0.015635,0.006254,0.0,0.0,0.0,0.000625,0.0,0.0,0.000625,0.001251,0.0,0.0,0.0,0.0,0.0,0.000625,0.001876,0.0,0.000625,0.000625,0.000625,0.000625,0.0,0.021889,0.001251,0.0,0.0,0.001251,0.005003,0.004378,0.001251,0.0,0.0,0.0,0.0,0.000625,0.001251,0.0,0.025641,0.0,0.021263,0.003752,0.001251,0.0,0.005629,0.0,0.0,0.0,0.0,0.001876,0.0,0.001251,0.003127,0.0,0.0,0.0,0.0,0.0,0.0,0.001876,0.002502,0.004378,0.0,0.006254,0.0,0.0,0.000625,0.000625,0.000625,0.0,0.0,0.0,0.002502,0.0,0.002502,0.0,0.006879,0.005003,0.0,0.001876,0.0,0.001876,0.001251,0.001876,0.001251,0.0,0.0,0.003752,0.0,0.0,0.003127,0.0,0.0,0.003127,0.0,0.004378,0.0,0.001876,0.0,0.0,0.000625,0.0,0.001876,0.000625,0.000625,0.000625,0.000625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022514,0.0,0.002502,0.0,0.0,0.0,0.0,0.003127,0.007505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015009,0.000625,0.0,0.001876,0.003752,0.002502,0.0,0.001251,0.0,0.000625,0.0,0.001876,0.0,0.013133,0.0,0.0,0.0,0.002502,0.001251,0.0,0.0,0.000625,0.0,0.0,0.0,0.0,0.033146,0.0,0.0,0.0,0.0,0.0,0.0,0.001251,0.0,0.000625,0.007505,0.004378,0.015009,0.0,0.002502,0.0,0.001251,0.015635,0.0,0.0,0.0,0.001876,0.0,0.003127,0.0,0.000625,0.000625,0.0,0.001876,0.001876,0.000625,0.0,0.0,0.0,0.001251,0.0,0.0,0.0,0.0,0.003127,0.0,0.002502,0.006254,0.001251,0.0,0.0,0.0,0.000625,0.0,0.000625,0.008755,0.0,0.0,0.000625,0.001876,0.0,0.0,0.014384,0.000625,0.0,0.0,0.000625,0.003127,0.000625,0.000625,0.0,0.0,0.003752,0.0,0.0,0.0,0.0,0.011882,0.00813,0.0,0.0,0.000625,0.0,0.000625,0.0,0.0,0.0,0.000625,0.0,0.002502,0.0,0.001876,0.0,0.0,0.0,0.0,0.0,0.011257,0.0,0.0,0.000625,0.0,0.003752,0.0,0.0,0.0,0.0,0.0,0.0,0.005629,0.000625,0.0,0.0,0.00813


### Lets see top 10 venues for each city and Neighborhood

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['City']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
city_venues_sorted = pd.DataFrame(columns=columns)
city_venues_sorted['City'] = city_group['City']

for ind in np.arange(city_group.shape[0]):
    city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_group.iloc[ind, :], num_top_venues)

city_venues_sorted.head()

Unnamed: 0,City,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,NYC,Pizza Place,Coffee Shop,Italian Restaurant,Deli / Bodega,Bakery,Bar,Chinese Restaurant,Grocery Store,Sandwich Place,Café
1,Toronto,Coffee Shop,Café,Restaurant,Italian Restaurant,Park,Bakery,Hotel,Japanese Restaurant,Gym,Bar


In [45]:
# Now Lets do neighborhoods
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = cities_grouped['Neighborhood']

for ind in np.arange(cities_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(cities_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Supermarket,Spa,Deli / Bodega,Chinese Restaurant,Grocery Store,Martial Arts School,Mexican Restaurant,Electronics Store,Gas Station
1,Annadale,Pizza Place,Bakery,Liquor Store,Train Station,Diner,American Restaurant,Restaurant,Dance Studio,Food,Park
2,Arden Heights,Pharmacy,Coffee Shop,Pool,Hotel,Pizza Place,Yoga Studio,Factory,Empanada Restaurant,English Restaurant,Entertainment Service
3,Arlington,Intersection,Construction & Landscaping,Playground,American Restaurant,Deli / Bodega,Boat or Ferry,Bus Stop,Event Space,Event Service,Fast Food Restaurant
4,Arrochar,Italian Restaurant,Deli / Bodega,Pizza Place,Bus Stop,Cosmetics Shop,Middle Eastern Restaurant,Pharmacy,Taco Place,Sandwich Place,Supermarket


# Clustering the Neighborhoods

In [46]:
k = 5

cities_grouped_clustering = cities_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=k, random_state=0).fit(cities_grouped_clustering)

kmeans.labels_[0:10] 

array([0, 0, 0, 4, 0, 2, 2, 0, 2, 0], dtype=int32)

In [47]:
# Combine Tornonto and NYC orginial data
neighborhoods.insert(0, "City", "NYC")
df_toronto.insert(0, "City", "Toronto")
frames = [neighborhoods, df_toronto]
temp_conc = pd.concat(frames)

In [48]:
temp_conc.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude
0,NYC,Bronx,Wakefield,40.894705,-73.847201
1,NYC,Bronx,Co-op City,40.874294,-73.829939
2,NYC,Bronx,Eastchester,40.887556,-73.827806
3,NYC,Bronx,Fieldston,40.895437,-73.905643
4,NYC,Bronx,Riverdale,40.890834,-73.912585


In [49]:
# Clustering Labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

cities_merged = temp_conc

cities_merged = cities_merged.join(neighborhoods_venues_sorted.set_index("Neighborhood"), on="Neighborhood")

cities_merged.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,NYC,Bronx,Wakefield,40.894705,-73.847201,0,Pharmacy,Deli / Bodega,Dessert Shop,Laundromat,Caribbean Restaurant,Food,Ice Cream Shop,Sandwich Place,Donut Shop,Eye Doctor
1,NYC,Bronx,Co-op City,40.874294,-73.829939,0,Bus Station,Bagel Shop,Fast Food Restaurant,Bar,Basketball Court,Grocery Store,Liquor Store,Pharmacy,Salon / Barbershop,Park
2,NYC,Bronx,Eastchester,40.887556,-73.827806,0,Caribbean Restaurant,Diner,Deli / Bodega,Bus Station,Seafood Restaurant,Metro Station,Automotive Shop,Donut Shop,Fast Food Restaurant,Chinese Restaurant
3,NYC,Bronx,Fieldston,40.895437,-73.905643,0,Music Venue,River,Plaza,Bus Station,Food Court,Food & Drink Shop,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant
4,NYC,Bronx,Riverdale,40.890834,-73.912585,0,Park,Medical Supply Store,Bus Station,Bank,Baseball Field,Gym,Plaza,Yoga Studio,Farm,Ethiopian Restaurant


### Visualize by Cluster

In [50]:
# Split into two data frames one for nyc
nyc_cluster = cities_merged[cities_merged["City"] == "NYC"]
nyc_cluster.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,NYC,Bronx,Wakefield,40.894705,-73.847201,0,Pharmacy,Deli / Bodega,Dessert Shop,Laundromat,Caribbean Restaurant,Food,Ice Cream Shop,Sandwich Place,Donut Shop,Eye Doctor
1,NYC,Bronx,Co-op City,40.874294,-73.829939,0,Bus Station,Bagel Shop,Fast Food Restaurant,Bar,Basketball Court,Grocery Store,Liquor Store,Pharmacy,Salon / Barbershop,Park
2,NYC,Bronx,Eastchester,40.887556,-73.827806,0,Caribbean Restaurant,Diner,Deli / Bodega,Bus Station,Seafood Restaurant,Metro Station,Automotive Shop,Donut Shop,Fast Food Restaurant,Chinese Restaurant
3,NYC,Bronx,Fieldston,40.895437,-73.905643,0,Music Venue,River,Plaza,Bus Station,Food Court,Food & Drink Shop,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant
4,NYC,Bronx,Riverdale,40.890834,-73.912585,0,Park,Medical Supply Store,Bus Station,Bank,Baseball Field,Gym,Plaza,Yoga Studio,Farm,Ethiopian Restaurant


In [51]:
# One for toronto
toronto_cluster = cities_merged[cities_merged["City"] == "Toronto"]
toronto_cluster.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Toronto,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Park,Bakery,Pub,Theater,Breakfast Spot,Café,Yoga Studio,Farmers Market,Gym / Fitness Center
4,Toronto,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Café,Theater,Gym,Beer Bar,Mexican Restaurant,Burrito Place
9,Toronto,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Clothing Store,Coffee Shop,Café,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Hotel,Bubble Tea Shop,Cosmetics Shop,Electronics Store
15,Toronto,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Café,Coffee Shop,American Restaurant,Cocktail Bar,Gastropub,Lingerie Store,Farmers Market,Clothing Store,Park,Gym
19,Toronto,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Pub,Trail,Asian Restaurant,Farm,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant


In [52]:
# One for toronto
toronto_cluster = cities_merged[cities_merged["City"] == "Toronto"]
toronto_cluster.head()

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Toronto,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Park,Bakery,Pub,Theater,Breakfast Spot,Café,Yoga Studio,Farmers Market,Gym / Fitness Center
4,Toronto,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Café,Theater,Gym,Beer Bar,Mexican Restaurant,Burrito Place
9,Toronto,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Clothing Store,Coffee Shop,Café,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Hotel,Bubble Tea Shop,Cosmetics Shop,Electronics Store
15,Toronto,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Café,Coffee Shop,American Restaurant,Cocktail Bar,Gastropub,Lingerie Store,Farmers Market,Clothing Store,Park,Gym
19,Toronto,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Pub,Trail,Asian Restaurant,Farm,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant


In [53]:
# create map for NYC
map_clusters = folium.Map(location=[lat_nyc, long_nyc], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cities_merged['Latitude'], cities_merged['Longitude'], cities_merged['Neighborhood'], cities_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [54]:
# create map for toronto
map_clusters = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_cluster['Latitude'], toronto_cluster['Longitude'], toronto_cluster['Neighborhood'], toronto_cluster['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Look at cluster one

In [74]:
nyc_cluster[nyc_cluster["Cluster Labels"] == 0]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,NYC,Bronx,Wakefield,40.894705,-73.847201,0,Pharmacy,Deli / Bodega,Dessert Shop,Laundromat,Caribbean Restaurant,Food,Ice Cream Shop,Sandwich Place,Donut Shop,Eye Doctor
1,NYC,Bronx,Co-op City,40.874294,-73.829939,0,Bus Station,Bagel Shop,Fast Food Restaurant,Bar,Basketball Court,Grocery Store,Liquor Store,Pharmacy,Salon / Barbershop,Park
2,NYC,Bronx,Eastchester,40.887556,-73.827806,0,Caribbean Restaurant,Diner,Deli / Bodega,Bus Station,Seafood Restaurant,Metro Station,Automotive Shop,Donut Shop,Fast Food Restaurant,Chinese Restaurant
3,NYC,Bronx,Fieldston,40.895437,-73.905643,0,Music Venue,River,Plaza,Bus Station,Food Court,Food & Drink Shop,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant
4,NYC,Bronx,Riverdale,40.890834,-73.912585,0,Park,Medical Supply Store,Bus Station,Bank,Baseball Field,Gym,Plaza,Yoga Studio,Farm,Ethiopian Restaurant
5,NYC,Bronx,Kingsbridge,40.881687,-73.902818,0,Pizza Place,Bar,Latin American Restaurant,Mexican Restaurant,Sandwich Place,Donut Shop,Supermarket,Fried Chicken Joint,Spanish Restaurant,Fast Food Restaurant
7,NYC,Bronx,Woodlawn,40.898273,-73.867315,0,Pub,Deli / Bodega,Playground,Pizza Place,Food & Drink Shop,Food Truck,Auto Workshop,Liquor Store,Bakery,Bar
8,NYC,Bronx,Norwood,40.877224,-73.879391,0,Pizza Place,Park,Bank,Deli / Bodega,Bus Station,Pharmacy,Chinese Restaurant,Cosmetics Shop,Restaurant,Liquor Store
10,NYC,Bronx,Baychester,40.866858,-73.835798,0,Donut Shop,Fast Food Restaurant,Sandwich Place,Pet Store,Laundromat,Doctor's Office,Mattress Store,Mexican Restaurant,Pizza Place,Discount Store
11,NYC,Bronx,Pelham Parkway,40.857413,-73.854756,0,Italian Restaurant,Frozen Yogurt Shop,Pizza Place,Bakery,Café,Gym / Fitness Center,Performing Arts Venue,Bank,Metro Station,Mexican Restaurant


In [73]:
toronto_cluster[toronto_cluster["Cluster Labels"] == 0]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,Toronto,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,0,Bakery,Pharmacy,Brewery,Supermarket,Park,Grocery Store,Middle Eastern Restaurant,Bar,Café,Bank
91,Toronto,Downtown Toronto,Rosedale,43.679563,-79.377529,0,Park,Bus Station,Accessories Store,Supermarket,Caribbean Restaurant,Sandwich Place,Pharmacy,Trail,Playground,Chinese Restaurant


### Cluster 2

In [72]:
nyc_cluster[nyc_cluster["Cluster Labels"] == 1]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
257,NYC,Staten Island,Howland Hook,40.638433,-74.186223,1,Pier,Yoga Studio,Farm,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space


In [71]:
toronto_cluster[toronto_cluster["Cluster Labels"] == 1]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Cluster 3

In [70]:
nyc_cluster[nyc_cluster["Cluster Labels"] == 2]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,NYC,Manhattan,Marble Hill,40.876551,-73.91066,2,Discount Store,Gym,Sandwich Place,Coffee Shop,Pizza Place,Tennis Stadium,Shopping Mall,Seafood Restaurant,Donut Shop,Pharmacy
9,NYC,Bronx,Williamsbridge,40.881039,-73.857446,2,Caribbean Restaurant,Soup Place,Nightclub,Dance Studio,Bar,Metro Station,Business Service,Farmers Market,Ethiopian Restaurant,Event Service
12,NYC,Bronx,City Island,40.847247,-73.786488,2,Seafood Restaurant,Thrift / Vintage Store,Bank,Arts & Crafts Store,Music Venue,French Restaurant,Café,Smoke Shop,Boat or Ferry,Grocery Store
22,NYC,Bronx,Port Morris,40.801664,-73.913221,2,Furniture / Home Store,Distillery,Brewery,Peruvian Restaurant,Grocery Store,Donut Shop,Bar,Restaurant,Baseball Field,Storage Facility
28,NYC,Bronx,Throgs Neck,40.815109,-73.81635,2,Pizza Place,Deli / Bodega,Coffee Shop,Asian Restaurant,Home Service,Bar,American Restaurant,Chinese Restaurant,Italian Restaurant,Sports Bar
29,NYC,Bronx,Country Club,40.844246,-73.824099,2,Sandwich Place,Playground,Weight Loss Center,Athletics & Sports,Farm,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant
39,NYC,Bronx,Edgewater Park,40.821986,-73.813885,2,Italian Restaurant,Coffee Shop,Deli / Bodega,Pizza Place,Pub,American Restaurant,Park,Liquor Store,Donut Shop,Asian Restaurant
46,NYC,Brooklyn,Bay Ridge,40.625801,-74.030621,2,Spa,American Restaurant,Pizza Place,Bagel Shop,Italian Restaurant,Greek Restaurant,Bar,Chinese Restaurant,Grocery Store,Café
49,NYC,Brooklyn,Greenpoint,40.730201,-73.954241,2,Bar,Coffee Shop,Pizza Place,Cocktail Bar,Grocery Store,Yoga Studio,French Restaurant,Deli / Bodega,Record Shop,Mexican Restaurant
51,NYC,Brooklyn,Brighton Beach,40.576825,-73.965094,2,Eastern European Restaurant,Russian Restaurant,Restaurant,Pharmacy,Mobile Phone Shop,Sushi Restaurant,Bank,Gourmet Shop,Beach,Fried Chicken Joint


In [69]:
toronto_cluster[toronto_cluster["Cluster Labels"] == 2]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Toronto,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Park,Bakery,Pub,Theater,Breakfast Spot,Café,Yoga Studio,Farmers Market,Gym / Fitness Center
4,Toronto,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Coffee Shop,Diner,Sushi Restaurant,Yoga Studio,Café,Theater,Gym,Beer Bar,Mexican Restaurant,Burrito Place
9,Toronto,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Clothing Store,Coffee Shop,Café,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Hotel,Bubble Tea Shop,Cosmetics Shop,Electronics Store
15,Toronto,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Café,Coffee Shop,American Restaurant,Cocktail Bar,Gastropub,Lingerie Store,Farmers Market,Clothing Store,Park,Gym
19,Toronto,East Toronto,The Beaches,43.676357,-79.293031,2,Health Food Store,Pub,Trail,Asian Restaurant,Farm,Empanada Restaurant,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant
20,Toronto,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Coffee Shop,Bakery,Cocktail Bar,Farmers Market,Restaurant,Seafood Restaurant,Cheese Shop,Beer Bar,Pharmacy,Gourmet Shop
24,Toronto,Downtown Toronto,Central Bay Street,43.657952,-79.387383,2,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Japanese Restaurant,Bubble Tea Shop,Salad Place,Burger Joint,Thai Restaurant,Yoga Studio
25,Toronto,Downtown Toronto,Christie,43.669542,-79.422564,2,Grocery Store,Café,Park,Restaurant,Coffee Shop,Athletics & Sports,Candy Store,Baby Store,Nightclub,Italian Restaurant
30,Toronto,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,2,Coffee Shop,Café,Restaurant,Bakery,Clothing Store,Gym,Thai Restaurant,Deli / Bodega,Hotel,Burrito Place
36,Toronto,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,2,Coffee Shop,Aquarium,Café,Hotel,Brewery,Italian Restaurant,Scenic Lookout,Fried Chicken Joint,Restaurant,Baseball Stadium


### Cluster 4

In [68]:
nyc_cluster[nyc_cluster["Cluster Labels"] == 3]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,NYC,Bronx,Clason Point,40.806551,-73.854144,3,Park,Bus Stop,Grocery Store,Pool,Boat or Ferry,South American Restaurant,Convenience Store,Ethiopian Restaurant,Farmers Market,Event Space
192,NYC,Queens,Somerville,40.597711,-73.796648,3,Park,Yoga Studio,Electronics Store,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Exhibit
203,NYC,Staten Island,Todt Hill,40.597069,-74.111329,3,Trail,Park,Yoga Studio,Farm,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space


In [67]:
toronto_cluster[toronto_cluster["Cluster Labels"] == 3]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Toronto,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Swim School,Bus Line,Yoga Studio,Farmers Market,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space
83,Toronto,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,3,Playground,Park,Yoga Studio,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Exhibit


### Cluster 5

In [66]:
nyc_cluster[nyc_cluster["Cluster Labels"] == 4]

Unnamed: 0,City,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
76,NYC,Brooklyn,Mill Island,40.606336,-73.908186,4,Locksmith,Pool,Yoga Studio,Farmers Market,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space
77,NYC,Brooklyn,Manhattan Beach,40.577914,-73.943537,4,Café,Harbor / Marina,Ice Cream Shop,Playground,Bus Stop,Sandwich Place,Beach,Pizza Place,Food,Event Service
85,NYC,Brooklyn,Sea Gate,40.576375,-74.007873,4,Spa,American Restaurant,Bus Station,Beach,Home Service,Sports Club,Ethiopian Restaurant,Event Service,Event Space,Farm
150,NYC,Queens,Whitestone,40.781291,-73.814202,4,Dance Studio,Bubble Tea Shop,Deli / Bodega,Candy Store,Yoga Studio,Fast Food Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
159,NYC,Queens,Briarwood,40.710935,-73.811748,4,Deli / Bodega,Arts & Crafts Store,Playground,Gym,Bus Station,Indian Restaurant,Coffee Shop,Eye Doctor,Falafel Restaurant,Factory
172,NYC,Queens,Breezy Point,40.557401,-73.925512,4,Beach,Trail,Monument / Landmark,Bus Stop,Yoga Studio,Fast Food Restaurant,Escape Room,Ethiopian Restaurant,Event Service,Event Space
178,NYC,Queens,Rockaway Beach,40.582802,-73.822361,4,Beach,Latin American Restaurant,Deli / Bodega,Bagel Shop,Fast Food Restaurant,Ice Cream Shop,BBQ Joint,Pizza Place,Arepa Restaurant,Board Shop
179,NYC,Queens,Neponsit,40.572037,-73.857547,4,Beach,Yoga Studio,Fast Food Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Eye Doctor
190,NYC,Queens,Belle Harbor,40.576156,-73.854018,4,Beach,Pub,Spa,Restaurant,Boutique,Mexican Restaurant,Chinese Restaurant,Bakery,Pharmacy,Donut Shop
193,NYC,Queens,Brookville,40.660003,-73.751753,4,Deli / Bodega,Home Service,Yoga Studio,Farmers Market,English Restaurant,Entertainment Service,Escape Room,Ethiopian Restaurant,Event Service,Event Space


In [64]:
print(toronto_cluster[toronto_cluster["Cluster Labels"] == 5])

Empty DataFrame
Columns: [City, Borough, Neighborhood, Latitude, Longitude, Cluster Labels, 1st Most Common Venue, 2nd Most Common Venue, 3rd Most Common Venue, 4th Most Common Venue, 5th Most Common Venue, 6th Most Common Venue, 7th Most Common Venue, 8th Most Common Venue, 9th Most Common Venue, 10th Most Common Venue]
Index: []
