# predicting brooklyn food price

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!pip install geopy 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium==0.5.0
import folium # map rendering library



In [2]:
with open("nyu_2451_34572-geojson.json") as datafile:
  NYdata = json.load(datafile)

In [3]:
NYdata=NYdata['features']

In [4]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [5]:
for data in NYdata:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
neighborhoods['Borough'].unique()

array(['Bronx', 'Manhattan', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

In [7]:
def label_borough (row):
    if row['Borough'] == 'Bronx' :
      return 2
    if row['Borough'] == 'Manhattan' :
      return 1
    if row['Borough'] == 'Brooklyn' :
      return 3
    if row['Borough'] == 'Queens' :
      return 4
    if row['Borough'] == 'Staten Island' :
      return 5
    return 'Other'

In [8]:
neighborhoods['Borough ID']=neighborhoods.apply (lambda row: label_borough(row), axis=1)
neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Borough ID
0,Bronx,Wakefield,40.894705,-73.847201,2
1,Bronx,Co-op City,40.874294,-73.829939,2
2,Bronx,Eastchester,40.887556,-73.827806,2
3,Bronx,Fieldston,40.895437,-73.905643,2
4,Bronx,Riverdale,40.890834,-73.912585,2
5,Bronx,Kingsbridge,40.881687,-73.902818,2
6,Manhattan,Marble Hill,40.876551,-73.91066,1
7,Bronx,Woodlawn,40.898273,-73.867315,2
8,Bronx,Norwood,40.877224,-73.879391,2
9,Bronx,Williamsbridge,40.881039,-73.857446,2


In [9]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [10]:
colors_array = cm.rainbow(np.linspace(0, 1, 6))
rainbow = [colors.rgb2hex(i) for i in colors_array]

### different colors represent different boroughs

In [11]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood, ID in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood'], neighborhoods['Borough ID']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[ID],
        fill=True,
        fill_color=rainbow[ID],
        fill_opacity=0.4,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [12]:
CLIENT_ID = 'JLOLYTL3R3JH1JD3ODLANEZCG53BGLPV4ISOWFQ54XLTVQTD' # your Foursquare ID
CLIENT_SECRET = 'LSKYXTJROVH3QIOAPPRSYKZIVFVP4YGIVNVYPGVTMYY5WQVY' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### define a function that gets venue details depending on different price ranges

In [13]:
def getNearbyVenues(names, latitudes, longitudes, price_range, radius=500,LIMIT=50):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section={}&price={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            'food',
            price_range)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Brooklyn neighbourhood food venues data

In [14]:
Brooklyn=neighborhoods[neighborhoods['Borough']== 'Brooklyn'].reset_index(drop=True)

In [15]:
Brooklyn.shape

(70, 5)

### first, find venues in the cheapest price range

In [16]:
venues = getNearbyVenues(names=Brooklyn['Neighborhood'],
                                   latitudes=Brooklyn['Latitude'],
                                   longitudes=Brooklyn['Longitude'],
                                   price_range=1
                                  )

Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus


In [17]:
venues.drop_duplicates(inplace=True)

In [18]:
venues['Venue Category'].unique()

array(['Bagel Shop', 'Sandwich Place', 'Thai Restaurant',
       'Chinese Restaurant', 'Fast Food Restaurant', 'Donut Shop',
       'Mexican Restaurant', 'Bakery', 'Deli / Bodega', 'Gastropub',
       'Pizza Place', 'Breakfast Spot', 'Caribbean Restaurant', 'Diner',
       'Restaurant', 'Taco Place', 'Café', 'Korean Restaurant',
       'Falafel Restaurant', 'Fried Chicken Joint',
       'Middle Eastern Restaurant', 'Burger Joint',
       'Latin American Restaurant', 'Hot Dog Joint', 'Food Truck',
       'Seafood Restaurant', 'Vietnamese Restaurant',
       'Italian Restaurant', 'Asian Restaurant',
       'Southern / Soul Food Restaurant', 'Burrito Place',
       'Cuban Restaurant', 'Dumpling Restaurant', 'Indian Restaurant',
       'Food', 'American Restaurant', 'Tibetan Restaurant'], dtype=object)

In [19]:
# display the venues on a map
# create map of New York using latitude and longitude values
map_venues = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood, name in zip(venues['Venue Latitude'], venues['Venue Longitude'], venues['Neighborhood'], venues['Venue']):
    label = '{}, {}'.format(neighborhood, name)
    label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.4,
        parse_html=False).add_to(map_venues)  
    
map_venues

In [20]:
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,Bagel Shop
1,Bay Ridge,40.625801,-74.030621,Cinnabon,40.623156,-74.031459,Sandwich Place
2,Bay Ridge,40.625801,-74.030621,My Thai Restaurant,40.62916,-74.028689,Thai Restaurant
3,Bay Ridge,40.625801,-74.030621,Pearl of China,40.624631,-74.030301,Chinese Restaurant
4,Bay Ridge,40.625801,-74.030621,McDonald's,40.622159,-74.027731,Fast Food Restaurant


### add a price range column to this dataframe

In [21]:
venues['Price Range']=1

In [22]:
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Price Range
0,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,Bagel Shop,1
1,Bay Ridge,40.625801,-74.030621,Cinnabon,40.623156,-74.031459,Sandwich Place,1
2,Bay Ridge,40.625801,-74.030621,My Thai Restaurant,40.62916,-74.028689,Thai Restaurant,1
3,Bay Ridge,40.625801,-74.030621,Pearl of China,40.624631,-74.030301,Chinese Restaurant,1
4,Bay Ridge,40.625801,-74.030621,McDonald's,40.622159,-74.027731,Fast Food Restaurant,1


### now write a loop to get the other price ranges, and combine the dataframes into one

In [23]:
venues_priced=pd.DataFrame()

for i in range (4):
    venues = getNearbyVenues(names=Brooklyn['Neighborhood'],
                                   latitudes=Brooklyn['Latitude'],
                                   longitudes=Brooklyn['Longitude'],
                                   price_range=i+1
                                  )
    venues.drop_duplicates(inplace=True)
    venues['Price Range']=i+1
    venues_priced=venues_priced.append(venues,ignore_index=True)

Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker Heights
Gerritsen Beach
Marine Park
Clinton Hill
Sea Gate
Downtown
Boerum Hill
Prospect Lefferts Gardens
Ocean Hill
City Line
Bergen Beach
Midwood
Prospect Park South
Georgetown
East Williamsburg
North Side
South Side
Ocean Parkway
Fort Hamilton
Ditmas Park
Wingate
Rugby
Remsen Village
New Lots
Paerdegat Basin
Mill Basin
Fulton Ferry
Vinegar Hill
Weeksville
Broadway Junction
Dumbo
Homecrest
Highland Park
Madison
Erasmus
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heig

In [24]:
venues_priced.tail()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Price Range
741,South Side,40.710861,-73.958001,Peter Luger Steak House,40.709958,-73.962291,Steakhouse,4
742,South Side,40.710861,-73.958001,Maison Premiere,40.714239,-73.961512,Seafood Restaurant,4
743,Fort Hamilton,40.614768,-74.031979,Embers Steakhouse,40.616648,-74.033815,Steakhouse,4
744,New Lots,40.662744,-73.885118,Piggys Restaurant,40.666164,-73.882599,Spanish Restaurant,4
745,Fulton Ferry,40.703281,-73.995508,The River Café,40.703754,-73.994834,American Restaurant,4


In [25]:
colors_array2 = cm.rainbow(np.linspace(0, 1, 4))
rainbow2 = [colors.rgb2hex(i) for i in colors_array2]

In [26]:
# plot the different price range on a map

map_venues_priced = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood, name, price in zip(venues_priced['Venue Latitude'], venues_priced['Venue Longitude'], venues_priced['Neighborhood'], venues_priced['Venue'], venues_priced['Price Range']):
    label = '{}, {}'.format(neighborhood, name)
    label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow2[price-1],
        fill=True,
        fill_color=rainbow2[price-1],
        fill_opacity=0.4,
        parse_html=False).add_to(map_venues_priced)  
    
map_venues_priced

## as initial examination, use k means to cluster the restaurants into 4 groups, based on the neighbourhood, category and price range

In [27]:
venues_onehot=pd.get_dummies(venues_priced[['Venue Category','Neighborhood']],prefix="", prefix_sep="")
venues_onehot['Price Range']=venues_priced['Price Range']
venues_onehot['Venue']=venues_priced['Venue']
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])
venues_onehot = venues_onehot[fixed_columns]

venues_onehot.head()

Unnamed: 0,Venue,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Breakfast Spot,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Creperie,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Hot Dog Joint,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Polish Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Bath Beach,Bay Ridge,Bedford Stuyvesant,Bensonhurst,Boerum Hill,Borough Park,Brighton Beach,Broadway Junction,Brooklyn Heights,Brownsville,Bushwick,Carroll Gardens,City Line,Clinton Hill,Cobble Hill,Coney Island,Crown Heights,Cypress Hills,Ditmas Park,Downtown,Dumbo,East Flatbush,East New York,East Williamsburg,Erasmus,Flatbush,Flatlands,Fort Greene,Fort Hamilton,Fulton Ferry,Georgetown,Gerritsen Beach,Gowanus,Gravesend,Greenpoint,Highland Park,Homecrest,Kensington,Madison,Manhattan Terrace,Midwood,Mill Basin,New Lots,North Side,Ocean Hill,Park Slope,Prospect Heights,Prospect Lefferts Gardens,Prospect Park South,Red Hook,Remsen Village,Rugby,Sheepshead Bay,South Side,Starrett City,Sunset Park,Vinegar Hill,Weeksville,Williamsburg,Windsor Terrace,Wingate,Price Range
0,Bagel Boy,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Cinnabon,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,My Thai Restaurant,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Pearl of China,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,McDonald's,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [28]:
kclusters = 4

venues_clustering = venues_onehot.drop('Venue', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(venues_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 2, 1, 2, 2, 2, 1, 1], dtype=int32)

In [29]:
venues_priced.insert(0, 'Cluster Labels', kmeans.labels_)
venues_merged=venues_priced.merge(venues_onehot,how='outer')
venues_merged.drop(['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue Category'],1,inplace=True)
venues_merged.head(10)

Unnamed: 0,Cluster Labels,Venue,Venue Latitude,Venue Longitude,Price Range,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Breakfast Spot,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Creperie,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Hot Dog Joint,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Polish Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Bath Beach,Bay Ridge,Bedford Stuyvesant,Bensonhurst,Boerum Hill,Borough Park,Brighton Beach,Broadway Junction,Brooklyn Heights,Brownsville,Bushwick,Carroll Gardens,City Line,Clinton Hill,Cobble Hill,Coney Island,Crown Heights,Cypress Hills,Ditmas Park,Downtown,Dumbo,East Flatbush,East New York,East Williamsburg,Erasmus,Flatbush,Flatlands,Fort Greene,Fort Hamilton,Fulton Ferry,Georgetown,Gerritsen Beach,Gowanus,Gravesend,Greenpoint,Highland Park,Homecrest,Kensington,Madison,Manhattan Terrace,Midwood,Mill Basin,New Lots,North Side,Ocean Hill,Park Slope,Prospect Heights,Prospect Lefferts Gardens,Prospect Park South,Red Hook,Remsen Village,Rugby,Sheepshead Bay,South Side,Starrett City,Sunset Park,Vinegar Hill,Weeksville,Williamsburg,Windsor Terrace,Wingate
0,1,Bagel Boy,40.627896,-74.029335,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,Cinnabon,40.623156,-74.031459,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,My Thai Restaurant,40.62916,-74.028689,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,Pearl of China,40.624631,-74.030301,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,McDonald's,40.622159,-74.027731,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,1,McDonald's,40.622159,-74.027731,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,1,McDonald's,40.598078,-73.961428,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,1,McDonald's,40.598078,-73.961428,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,2,Taste Of China,40.608834,-73.994117,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,2,Panda Chinese Restaurant,40.611388,-73.990784,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### visualise the resulting cluster

In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
#x = np.arange(kclusters)
#ys = [i + x + (i*x)**2 for i in range(kclusters)]
#colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
#rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_priced['Venue Latitude'], venues_priced['Venue Longitude'], venues_priced['Neighborhood'], venues_priced['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow2[cluster],
        fill=True,
        fill_color=rainbow2[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### now examine each cluster, grouped by the price range(s) present in the cluster

In [31]:
for i in range(kclusters):
    venues_grouped=venues_merged[venues_merged['Cluster Labels']==i].drop('Venue',1).groupby('Price Range').mean().reset_index()
    display(venues_grouped.sort_values(by=0,axis=1,ascending=False))

Unnamed: 0,Venue Latitude,Price Range,North Side,South Side,Italian Restaurant,American Restaurant,Pizza Place,Mexican Restaurant,Fort Greene,Cobble Hill,Clinton Hill,Brooklyn Heights,Japanese Restaurant,Thai Restaurant,Diner,Dumbo,Downtown,Carroll Gardens,French Restaurant,Prospect Heights,Park Slope,Greenpoint,Boerum Hill,Bay Ridge,Middle Eastern Restaurant,Sushi Restaurant,Mill Basin,Seafood Restaurant,Caribbean Restaurant,Restaurant,Indian Restaurant,Fulton Ferry,Korean Restaurant,Sandwich Place,Burger Joint,Latin American Restaurant,BBQ Joint,Asian Restaurant,Georgetown,Fort Hamilton,Vegetarian / Vegan Restaurant,Brighton Beach,New American Restaurant,East Williamsburg,Breakfast Spot,Fried Chicken Joint,Windsor Terrace,Sheepshead Bay,Wings Joint,Ditmas Park,Fast Food Restaurant,Deli / Bodega,Vietnamese Restaurant,Red Hook,Bakery,Gowanus,Dim Sum Restaurant,Homecrest,Café,Arepa Restaurant,South American Restaurant,Creperie,Food Truck,Williamsburg,Vinegar Hill,Spanish Restaurant,Taco Place,Kensington,Ramen Restaurant,Turkish Restaurant,Bushwick,Sunset Park,Argentinian Restaurant,City Line,Borough Park,Cuban Restaurant,Flatbush,Eastern European Restaurant,Greek Restaurant,Ethiopian Restaurant,Gravesend,Bagel Shop,Bath Beach,Prospect Park South,Mediterranean Restaurant,Polish Restaurant,Russian Restaurant,Bensonhurst,Southern / Soul Food Restaurant,Remsen Village,Erasmus,Midwood,Prospect Lefferts Gardens,Rugby,Highland Park,Gerritsen Beach,African Restaurant,Burrito Place,East New York,Tapas Restaurant,Cajun / Creole Restaurant,Falafel Restaurant,Food Court,Gastropub,German Restaurant,Israeli Restaurant,Shanghai Restaurant,Taiwanese Restaurant,Salad Place,Brownsville,Coney Island,East Flatbush,Cypress Hills,Crown Heights,Wingate,Bedford Stuyvesant,New Lots,Manhattan Terrace,Madison,Chinese Restaurant,Weeksville,Donut Shop,Dumpling Restaurant,Starrett City,Food,Flatlands,Steakhouse,Tibetan Restaurant,Hot Dog Joint,Broadway Junction,Peruvian Restaurant,Ocean Hill,Cluster Labels,Venue Longitude
0,40.682067,2,0.130952,0.114286,0.097619,0.092857,0.066667,0.059524,0.054762,0.054762,0.054762,0.054762,0.040476,0.040476,0.035714,0.035714,0.035714,0.033333,0.033333,0.033333,0.033333,0.030952,0.02619,0.02619,0.02381,0.02381,0.02381,0.02381,0.02381,0.021429,0.021429,0.021429,0.021429,0.021429,0.019048,0.019048,0.019048,0.016667,0.016667,0.016667,0.016667,0.014286,0.014286,0.014286,0.014286,0.014286,0.014286,0.014286,0.011905,0.011905,0.011905,0.011905,0.011905,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.009524,0.007143,0.007143,0.007143,0.007143,0.007143,0.007143,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.004762,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.002381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,-73.971106


Unnamed: 0,Venue Latitude,Price Range,Cluster Labels,Donut Shop,Fort Hamilton,Boerum Hill,Prospect Park South,Cypress Hills,Ditmas Park,Georgetown,Broadway Junction,City Line,Manhattan Terrace,Clinton Hill,Downtown,Pizza Place,Brooklyn Heights,East Williamsburg,Bath Beach,Fast Food Restaurant,Carroll Gardens,Homecrest,Brighton Beach,Flatbush,Windsor Terrace,Starrett City,Ocean Hill,Gowanus,Gravesend,Weeksville,Bagel Shop,Mill Basin,Burger Joint,Fried Chicken Joint,North Side,Bakery,Middle Eastern Restaurant,Dumbo,South Side,Mexican Restaurant,Caribbean Restaurant,Latin American Restaurant,Fulton Ferry,Fort Greene,Williamsburg,Deli / Bodega,Diner,Park Slope,Flatlands,Food Truck,Erasmus,Prospect Heights,Dumpling Restaurant,Prospect Lefferts Gardens,Greenpoint,Falafel Restaurant,New Lots,Sunset Park,Sandwich Place,Cobble Hill,Café,American Restaurant,Bushwick,Vinegar Hill,East New York,Brownsville,Rugby,Remsen Village,Bay Ridge,Wingate,Burrito Place,Taco Place,Asian Restaurant,Thai Restaurant,Bensonhurst,Red Hook,Restaurant,Kensington,Sheepshead Bay,Hot Dog Joint,Food,Breakfast Spot,Indian Restaurant,Vietnamese Restaurant,Cuban Restaurant,Gerritsen Beach,Italian Restaurant,Madison,Korean Restaurant,Gastropub,Highland Park,Southern / Soul Food Restaurant,Coney Island,Seafood Restaurant,Tibetan Restaurant,African Restaurant,Arepa Restaurant,Argentinian Restaurant,BBQ Joint,Cajun / Creole Restaurant,Chinese Restaurant,Dim Sum Restaurant,French Restaurant,Midwood,Food Court,Ethiopian Restaurant,Eastern European Restaurant,Creperie,Vegetarian / Vegan Restaurant,German Restaurant,Greek Restaurant,Wings Joint,Turkish Restaurant,Bedford Stuyvesant,Tapas Restaurant,Borough Park,Taiwanese Restaurant,Sushi Restaurant,Steakhouse,Spanish Restaurant,South American Restaurant,Shanghai Restaurant,Crown Heights,Salad Place,Russian Restaurant,Ramen Restaurant,East Flatbush,Polish Restaurant,Peruvian Restaurant,New American Restaurant,Mediterranean Restaurant,Japanese Restaurant,Israeli Restaurant,Venue Longitude
0,40.65692,1,1,0.764552,0.05363,0.052322,0.051014,0.05036,0.049052,0.048398,0.045128,0.045128,0.044474,0.034663,0.033355,0.032701,0.032701,0.030085,0.029431,0.026815,0.025507,0.025507,0.024853,0.024853,0.024199,0.023545,0.022891,0.022891,0.022891,0.022891,0.022237,0.022237,0.019621,0.018313,0.014388,0.012426,0.012426,0.011118,0.010464,0.009156,0.009156,0.007848,0.007194,0.007194,0.00654,0.00654,0.00654,0.005886,0.005886,0.005886,0.005886,0.005232,0.005232,0.004578,0.004578,0.004578,0.003924,0.003924,0.003924,0.003924,0.003924,0.003924,0.003924,0.003924,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.00327,0.002616,0.001962,0.001962,0.001962,0.001962,0.001308,0.001308,0.001308,0.001308,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.000654,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.95248


Unnamed: 0,Venue Latitude,Cluster Labels,Chinese Restaurant,Price Range,Gravesend,Clinton Hill,Bensonhurst,Greenpoint,City Line,Brooklyn Heights,Fort Hamilton,Mill Basin,Windsor Terrace,Crown Heights,Erasmus,Starrett City,Carroll Gardens,Kensington,Bushwick,Bay Ridge,Bedford Stuyvesant,Bath Beach,Ocean Hill,Cobble Hill,Park Slope,Prospect Lefferts Gardens,Boerum Hill,East Flatbush,Ditmas Park,Coney Island,Cypress Hills,Downtown,Williamsburg,Brownsville,Broadway Junction,Brighton Beach,Borough Park,Wings Joint,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Dumbo,Fort Greene,East New York,East Williamsburg,Vinegar Hill,Sunset Park,South Side,Sheepshead Bay,Rugby,Remsen Village,Red Hook,Prospect Park South,Prospect Heights,North Side,New Lots,Midwood,Manhattan Terrace,Madison,Homecrest,Highland Park,Gowanus,Gerritsen Beach,Georgetown,Turkish Restaurant,Weeksville,Flatlands,Flatbush,Fulton Ferry,Thai Restaurant,Tibetan Restaurant,Tapas Restaurant,French Restaurant,Food Truck,Food Court,Food,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Diner,Dim Sum Restaurant,Deli / Bodega,Cuban Restaurant,Creperie,Caribbean Restaurant,Cajun / Creole Restaurant,Café,Burrito Place,Burger Joint,Breakfast Spot,Bakery,Bagel Shop,BBQ Joint,Asian Restaurant,Argentinian Restaurant,Arepa Restaurant,American Restaurant,African Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Taiwanese Restaurant,Taco Place,Sushi Restaurant,Steakhouse,Spanish Restaurant,Southern / Soul Food Restaurant,South American Restaurant,Shanghai Restaurant,Seafood Restaurant,Sandwich Place,Salad Place,Russian Restaurant,Restaurant,Ramen Restaurant,Polish Restaurant,Pizza Place,Peruvian Restaurant,New American Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Mediterranean Restaurant,Latin American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Israeli Restaurant,Indian Restaurant,Hot Dog Joint,Wingate,Venue Longitude
0,40.651555,2,1.0,1,0.153846,0.076923,0.076923,0.051282,0.051282,0.051282,0.051282,0.051282,0.051282,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.964384
1,40.649802,2,1.0,2,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.969498


Unnamed: 0,Venue Latitude,Price Range,Cluster Labels,American Restaurant,Italian Restaurant,South Side,North Side,Japanese Restaurant,Bay Ridge,Tapas Restaurant,Williamsburg,Fulton Ferry,Fort Greene,French Restaurant,Seafood Restaurant,New American Restaurant,Gowanus,Mexican Restaurant,Mill Basin,Dumbo,Cobble Hill,Pizza Place,Taco Place,Clinton Hill,Boerum Hill,South American Restaurant,Carroll Gardens,Park Slope,Breakfast Spot,Russian Restaurant,Brooklyn Heights,Fort Hamilton,Sheepshead Bay,Mediterranean Restaurant,Deli / Bodega,Restaurant,Georgetown,Madison,Chinese Restaurant,Turkish Restaurant,Downtown,Erasmus,East Williamsburg,Red Hook,Spanish Restaurant,Polish Restaurant,Steakhouse,Peruvian Restaurant,Latin American Restaurant,Prospect Heights,Vinegar Hill,German Restaurant,Gastropub,Borough Park,Brighton Beach,Greenpoint,Homecrest,Crown Heights,Coney Island,Cypress Hills,Starrett City,Sunset Park,Ditmas Park,City Line,Windsor Terrace,Manhattan Terrace,Rugby,Ocean Hill,Kensington,Highland Park,Midwood,Gravesend,New Lots,Gerritsen Beach,Bushwick,Remsen Village,Weeksville,Prospect Lefferts Gardens,Flatlands,Prospect Park South,East New York,East Flatbush,Flatbush,Thai Restaurant,Brownsville,Broadway Junction,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Diner,Dim Sum Restaurant,Cuban Restaurant,Creperie,Caribbean Restaurant,Cajun / Creole Restaurant,Café,Burrito Place,Burger Joint,Bakery,Bagel Shop,BBQ Joint,Asian Restaurant,Argentinian Restaurant,Arepa Restaurant,African Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,Bensonhurst,Bedford Stuyvesant,Bath Beach,Wings Joint,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Tibetan Restaurant,Taiwanese Restaurant,Sushi Restaurant,Southern / Soul Food Restaurant,Shanghai Restaurant,Sandwich Place,Salad Place,Ramen Restaurant,Middle Eastern Restaurant,Korean Restaurant,Israeli Restaurant,Indian Restaurant,Hot Dog Joint,Greek Restaurant,Fried Chicken Joint,Wingate,Venue Longitude
0,40.679232,3,3,0.168067,0.159664,0.134454,0.109244,0.109244,0.07563,0.07563,0.058824,0.058824,0.058824,0.05042,0.05042,0.042017,0.042017,0.042017,0.042017,0.042017,0.042017,0.033613,0.033613,0.033613,0.033613,0.033613,0.033613,0.033613,0.033613,0.02521,0.02521,0.02521,0.02521,0.016807,0.016807,0.016807,0.016807,0.016807,0.016807,0.016807,0.016807,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.008403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.975865
1,40.687775,4,3,0.058824,0.058824,0.205882,0.176471,0.0,0.0,0.0,0.147059,0.029412,0.0,0.0,0.117647,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.147059,0.0,0.294118,0.029412,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,-73.950209


### some interesting observations can be made:
clusters are related to price ranges: cluster 0 is lower mid range, cluster 1 is mid range, cluster 2 is low range and cluster 3 is high.

cluster 1 contains only italian resturants.

cluster 2 has many donut shops.

### try a different clustering algorithm

In [32]:
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering

#sc = (n_clusters=kclusters, assign_labels="discretize",random_state=0).fit(venues_clustering)
ac=AgglomerativeClustering(n_clusters=kclusters).fit(venues_clustering)
#sc.labels_[0:10]
ac.labels_[0:10]

array([0, 0, 0, 0, 3, 3, 3, 3, 3, 3])

In [33]:
venues_priced['Cluster Labels']=ac.labels_

In [34]:
venues_priced.head()

Unnamed: 0,Cluster Labels,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Price Range
0,0,Bay Ridge,40.625801,-74.030621,Bagel Boy,40.627896,-74.029335,Bagel Shop,1
1,0,Bay Ridge,40.625801,-74.030621,Cinnabon,40.623156,-74.031459,Sandwich Place,1
2,0,Bay Ridge,40.625801,-74.030621,My Thai Restaurant,40.62916,-74.028689,Thai Restaurant,1
3,0,Bay Ridge,40.625801,-74.030621,Pearl of China,40.624631,-74.030301,Chinese Restaurant,1
4,3,Bay Ridge,40.625801,-74.030621,McDonald's,40.622159,-74.027731,Fast Food Restaurant,1


In [35]:
venues_merged=venues_priced.merge(venues_onehot,how='outer')
venues_merged.drop(['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue Category'],1,inplace=True)
venues_merged.head(10)

Unnamed: 0,Cluster Labels,Venue,Venue Latitude,Venue Longitude,Price Range,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Breakfast Spot,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Creperie,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Hot Dog Joint,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Polish Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Bath Beach,Bay Ridge,Bedford Stuyvesant,Bensonhurst,Boerum Hill,Borough Park,Brighton Beach,Broadway Junction,Brooklyn Heights,Brownsville,Bushwick,Carroll Gardens,City Line,Clinton Hill,Cobble Hill,Coney Island,Crown Heights,Cypress Hills,Ditmas Park,Downtown,Dumbo,East Flatbush,East New York,East Williamsburg,Erasmus,Flatbush,Flatlands,Fort Greene,Fort Hamilton,Fulton Ferry,Georgetown,Gerritsen Beach,Gowanus,Gravesend,Greenpoint,Highland Park,Homecrest,Kensington,Madison,Manhattan Terrace,Midwood,Mill Basin,New Lots,North Side,Ocean Hill,Park Slope,Prospect Heights,Prospect Lefferts Gardens,Prospect Park South,Red Hook,Remsen Village,Rugby,Sheepshead Bay,South Side,Starrett City,Sunset Park,Vinegar Hill,Weeksville,Williamsburg,Windsor Terrace,Wingate
0,0,Bagel Boy,40.627896,-74.029335,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,Cinnabon,40.623156,-74.031459,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,My Thai Restaurant,40.62916,-74.028689,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,Pearl of China,40.624631,-74.030301,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,3,McDonald's,40.622159,-74.027731,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,3,McDonald's,40.622159,-74.027731,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,3,McDonald's,40.598078,-73.961428,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,3,McDonald's,40.598078,-73.961428,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,3,Taste Of China,40.608834,-73.994117,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,3,Panda Chinese Restaurant,40.611388,-73.990784,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [36]:
for i in range(kclusters):
    venues_grouped=venues_merged[venues_merged['Cluster Labels']==i].drop('Venue',1).groupby('Price Range').mean().reset_index()
    display(venues_grouped.sort_values(by=0,axis=1,ascending=False))

Unnamed: 0,Venue Latitude,Price Range,South Side,American Restaurant,North Side,Williamsburg,Taco Place,Bay Ridge,Burger Joint,Middle Eastern Restaurant,Latin American Restaurant,Mexican Restaurant,Dumpling Restaurant,Sandwich Place,Chinese Restaurant,Georgetown,Kensington,Fort Hamilton,Thai Restaurant,Bagel Shop,Creperie,Ditmas Park,Fulton Ferry,BBQ Joint,Fort Greene,Flatlands,Flatbush,Erasmus,East Williamsburg,East New York,East Flatbush,Dumbo,Downtown,Cypress Hills,Gerritsen Beach,Crown Heights,Coney Island,Cobble Hill,Clinton Hill,City Line,Carroll Gardens,Bushwick,Brownsville,Brooklyn Heights,Broadway Junction,Brighton Beach,Asian Restaurant,Gowanus,Boerum Hill,Gravesend,Windsor Terrace,Weeksville,Vinegar Hill,Sunset Park,Starrett City,African Restaurant,Sheepshead Bay,Rugby,Remsen Village,Red Hook,Prospect Park South,Prospect Lefferts Gardens,Prospect Heights,Park Slope,Ocean Hill,Arepa Restaurant,New Lots,Mill Basin,Midwood,Manhattan Terrace,Madison,Argentinian Restaurant,Homecrest,Highland Park,Greenpoint,Borough Park,Bensonhurst,Cuban Restaurant,Bedford Stuyvesant,Mediterranean Restaurant,Cajun / Creole Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant,Israeli Restaurant,Indian Restaurant,Hot Dog Joint,Greek Restaurant,German Restaurant,Gastropub,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Food,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Caribbean Restaurant,Donut Shop,Diner,Dim Sum Restaurant,Deli / Bodega,Café,Burrito Place,New American Restaurant,Peruvian Restaurant,Bath Beach,Wings Joint,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Tibetan Restaurant,Cluster Labels,Tapas Restaurant,Taiwanese Restaurant,Bakery,Sushi Restaurant,Steakhouse,Spanish Restaurant,Southern / Soul Food Restaurant,South American Restaurant,Shanghai Restaurant,Seafood Restaurant,Breakfast Spot,Salad Place,Russian Restaurant,Restaurant,Ramen Restaurant,Polish Restaurant,Pizza Place,Wingate,Venue Longitude
0,40.689538,1,0.333333,0.222222,0.222222,0.185185,0.185185,0.148148,0.111111,0.111111,0.074074,0.074074,0.074074,0.037037,0.037037,0.037037,0.037037,0.037037,0.037037,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.970632
1,40.678689,2,0.188034,0.115385,0.209402,0.008547,0.008547,0.042735,0.012821,0.012821,0.025641,0.034188,0.0,0.0,0.021368,0.029915,0.004274,0.017094,0.042735,0.0,0.017094,0.0,0.012821,0.029915,0.059829,0.0,0.0,0.0,0.0,0.004274,0.0,0.012821,0.012821,0.0,0.004274,0.0,0.0,0.094017,0.025641,0.008547,0.029915,0.0,0.004274,0.008547,0.0,0.025641,0.021368,0.021368,0.0,0.008547,0.008547,0.0,0.0,0.008547,0.0,0.004274,0.021368,0.0,0.0,0.017094,0.0,0.0,0.017094,0.0,0.0,0.017094,0.0,0.047009,0.0,0.0,0.0,0.008547,0.021368,0.004274,0.008547,0.0,0.008547,0.008547,0.0,0.008547,0.0,0.029915,0.064103,0.128205,0.004274,0.0,0.0,0.004274,0.004274,0.004274,0.008547,0.025641,0.0,0.0,0.0,0.0,0.0,0.008547,0.008547,0.0,0.0,0.021368,0.017094,0.004274,0.008547,0.004274,0.004274,0.0,0.0,0.0,0.021368,0.008547,0.008547,0.0,0,0.004274,0.0,0.008547,0.042735,0.0,0.012821,0.004274,0.017094,0.0,0.038462,0.017094,0.004274,0.008547,0.008547,0.004274,0.004274,0.047009,0.004274,-73.967665
2,40.677185,3,0.169231,0.076923,0.169231,0.046154,0.0,0.138462,0.0,0.0,0.0,0.015385,0.0,0.0,0.030769,0.030769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030769,0.0,0.0,0.0,0.0,0.061538,0.015385,0.0,0.046154,0.0,0.0,0.0,0.0,0.0,0.0,0.061538,0.046154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.123077,0.230769,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015385,0.0,0.0,0.030769,0.015385,0.0,0.0,0.0,0.0,0.030769,0.0,0,0.092308,0.0,0.0,0.0,0.0,0.015385,0.0,0.061538,0.0,0.092308,0.061538,0.0,0.0,0.0,0.0,0.015385,0.061538,0.0,-73.974653
3,40.682311,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.995911


Unnamed: 0,Venue Latitude,Price Range,Cluster Labels,Donut Shop,Pizza Place,Boerum Hill,Downtown,Brooklyn Heights,Clinton Hill,Ditmas Park,Prospect Park South,Georgetown,Broadway Junction,Fort Hamilton,Cypress Hills,Manhattan Terrace,City Line,Windsor Terrace,Burger Joint,Chinese Restaurant,Bakery,Carroll Gardens,Bagel Shop,Dumbo,Flatbush,Greenpoint,Bath Beach,Prospect Heights,Park Slope,Caribbean Restaurant,Gravesend,Diner,Deli / Bodega,Middle Eastern Restaurant,Homecrest,Weeksville,Brighton Beach,Starrett City,Mexican Restaurant,Ocean Hill,East Williamsburg,Mill Basin,Gowanus,Fulton Ferry,Dumpling Restaurant,Latin American Restaurant,Burrito Place,Fort Greene,Café,Cobble Hill,Vinegar Hill,Fast Food Restaurant,Falafel Restaurant,North Side,Erasmus,Prospect Lefferts Gardens,Thai Restaurant,Fried Chicken Joint,Food Truck,Breakfast Spot,East New York,Rugby,Hot Dog Joint,Korean Restaurant,Brownsville,Cuban Restaurant,New Lots,Vietnamese Restaurant,Gastropub,Tibetan Restaurant,Sandwich Place,Southern / Soul Food Restaurant,South Side,Kensington,Williamsburg,French Restaurant,African Restaurant,Gerritsen Beach,Food Court,Food,Ethiopian Restaurant,Highland Park,American Restaurant,Eastern European Restaurant,Dim Sum Restaurant,Madison,Remsen Village,Sunset Park,Midwood,Arepa Restaurant,Creperie,Flatlands,Sheepshead Bay,BBQ Joint,Asian Restaurant,Argentinian Restaurant,Red Hook,Cajun / Creole Restaurant,Seafood Restaurant,German Restaurant,Turkish Restaurant,Bensonhurst,Bedford Stuyvesant,Bay Ridge,Salad Place,Wings Joint,Vegetarian / Vegan Restaurant,Tapas Restaurant,Greek Restaurant,Taiwanese Restaurant,Taco Place,Sushi Restaurant,Steakhouse,Spanish Restaurant,South American Restaurant,Russian Restaurant,Borough Park,Restaurant,Ramen Restaurant,Shanghai Restaurant,East Flatbush,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Crown Heights,Coney Island,Mediterranean Restaurant,New American Restaurant,Peruvian Restaurant,Bushwick,Polish Restaurant,Wingate,Venue Longitude
0,40.675057,1,1,0.60177,0.070796,0.067847,0.058997,0.058997,0.053097,0.047198,0.044248,0.041298,0.038348,0.038348,0.038348,0.035398,0.035398,0.032448,0.032448,0.029499,0.029499,0.029499,0.026549,0.023599,0.023599,0.023599,0.023599,0.023599,0.023599,0.020649,0.020649,0.020649,0.020649,0.020649,0.017699,0.017699,0.017699,0.017699,0.017699,0.017699,0.017699,0.017699,0.017699,0.014749,0.011799,0.011799,0.011799,0.011799,0.00885,0.00885,0.00885,0.00885,0.00885,0.0059,0.0059,0.0059,0.0059,0.0059,0.0059,0.0059,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.00295,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.977113
1,40.685726,2,1,0.0,0.098684,0.065789,0.072368,0.131579,0.111842,0.032895,0.013158,0.0,0.0,0.013158,0.0,0.0,0.0,0.026316,0.019737,0.006579,0.013158,0.046053,0.013158,0.059211,0.013158,0.072368,0.0,0.065789,0.098684,0.065789,0.0,0.052632,0.026316,0.046053,0.0,0.0,0.0,0.0,0.098684,0.0,0.0,0.0,0.0,0.019737,0.0,0.013158,0.0,0.052632,0.013158,0.006579,0.013158,0.0,0.006579,0.026316,0.006579,0.0,0.032895,0.0,0.013158,0.013158,0.0,0.006579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046053,0.006579,0.006579,0.0,0.0,0.032895,0.0,0.0,0.006579,0.0,0.0,0.0,0.078947,0.0,0.0,0.0,0.0,0.0,0.006579,0.0,0.0,0.0,0.006579,0.0,0.013158,0.0,0.0,0.006579,0.006579,0.0,0.0,0.0,0.0,0.0,0.0,0.032895,0.019737,0.0,0.006579,0.0,0.006579,0.0,0.0,0.0,0.0,0.0,0.006579,0.032895,0.013158,0.006579,0.006579,0.039474,0.0,0.065789,0.013158,0.006579,0.006579,0.0,0.026316,0.0,0.0,0.006579,0.0,-73.977473
2,40.691317,3,1,0.0,0.0,0.0,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.981708


Unnamed: 0,Venue Latitude,Price Range,Cluster Labels,French Restaurant,Korean Restaurant,South Side,Diner,Williamsburg,Boerum Hill,Downtown,Fort Greene,Erasmus,East Williamsburg,Flatbush,Coney Island,East Flatbush,Dumbo,Ditmas Park,Cypress Hills,Crown Heights,East New York,Carroll Gardens,Cobble Hill,Clinton Hill,City Line,Bushwick,Brownsville,Brooklyn Heights,Broadway Junction,Brighton Beach,Borough Park,Bensonhurst,Bedford Stuyvesant,Bay Ridge,Bath Beach,Wings Joint,Flatlands,Georgetown,Fort Hamilton,Ocean Hill,Windsor Terrace,Weeksville,Vinegar Hill,Sunset Park,Starrett City,Sheepshead Bay,Rugby,Remsen Village,Red Hook,Prospect Park South,Prospect Lefferts Gardens,Prospect Heights,Park Slope,North Side,Fulton Ferry,New Lots,Mill Basin,Midwood,Manhattan Terrace,Madison,Kensington,Homecrest,Highland Park,Greenpoint,Gravesend,Gowanus,Gerritsen Beach,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Thai Restaurant,Turkish Restaurant,Chinese Restaurant,Food Truck,Food Court,Food,Fast Food Restaurant,Falafel Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Dim Sum Restaurant,Deli / Bodega,Cuban Restaurant,Creperie,Caribbean Restaurant,Tibetan Restaurant,Cajun / Creole Restaurant,Café,Burrito Place,Burger Joint,Breakfast Spot,Bakery,Bagel Shop,BBQ Joint,Asian Restaurant,Argentinian Restaurant,Arepa Restaurant,American Restaurant,African Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Tapas Restaurant,Taiwanese Restaurant,Taco Place,Sushi Restaurant,Steakhouse,Spanish Restaurant,Southern / Soul Food Restaurant,South American Restaurant,Shanghai Restaurant,Seafood Restaurant,Sandwich Place,Salad Place,Russian Restaurant,Restaurant,Ramen Restaurant,Polish Restaurant,Pizza Place,Peruvian Restaurant,New American Restaurant,Middle Eastern Restaurant,Mexican Restaurant,Mediterranean Restaurant,Latin American Restaurant,Japanese Restaurant,Italian Restaurant,Israeli Restaurant,Indian Restaurant,Hot Dog Joint,Wingate,Venue Longitude
0,40.699175,2,2,0.428571,0.285714,0.285714,0.285714,0.285714,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.968394
1,40.679509,3,2,0.136364,0.0,0.113636,0.0,0.090909,0.022727,0.0,0.045455,0.0,0.022727,0.0,0.0,0.0,0.068182,0.0,0.0,0.0,0.0,0.022727,0.022727,0.045455,0.0,0.0,0.0,0.022727,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.0,0.0,0.0,0.022727,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.022727,0.068182,0.045455,0.113636,0.0,0.0,0.0,0.0,0.022727,0.0,0.022727,0.0,0.022727,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.227273,0.0,0.0,0.0,0.0,0.0,0.068182,0.0,0.090909,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068182,0.045455,0.0,0.0,0.0,0.0,0.068182,0.0,0.090909,0.022727,0.022727,0.113636,0.022727,0.0,0.0,0.0,0.0,-73.976326
2,40.687941,4,2,0.0,0.0,0.212121,0.0,0.151515,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.030303,0.030303,0.060606,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.181818,0.030303,0.060606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.060606,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.151515,0.121212,0.0,0.0,0.0,0.121212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.030303,0.0,0.30303,0.0,0.030303,0.0,0.0,0.0,0.0,-73.948824


Unnamed: 0,Venue Latitude,Cluster Labels,Price Range,Donut Shop,Fort Hamilton,Cypress Hills,Prospect Park South,City Line,Ditmas Park,Georgetown,Boerum Hill,Manhattan Terrace,Broadway Junction,East Williamsburg,Clinton Hill,Fast Food Restaurant,Bath Beach,Gravesend,Homecrest,Brooklyn Heights,Brighton Beach,Starrett City,Downtown,Flatbush,Mill Basin,Ocean Hill,Carroll Gardens,Gowanus,Weeksville,Windsor Terrace,Chinese Restaurant,Pizza Place,Fried Chicken Joint,Bagel Shop,Burger Joint,North Side,Middle Eastern Restaurant,Bakery,Dumbo,Flatlands,Erasmus,Caribbean Restaurant,Food Truck,Fort Greene,Bushwick,Bensonhurst,Fulton Ferry,Mexican Restaurant,Latin American Restaurant,Sunset Park,South Side,Prospect Lefferts Gardens,Williamsburg,New Lots,Remsen Village,Wingate,Asian Restaurant,Sandwich Place,Brownsville,Rugby,Falafel Restaurant,Cobble Hill,East New York,Restaurant,Diner,Café,Vinegar Hill,Deli / Bodega,Kensington,Red Hook,Dumpling Restaurant,Park Slope,Bay Ridge,Sheepshead Bay,Food,East Flatbush,Italian Restaurant,Madison,Seafood Restaurant,Hot Dog Joint,Indian Restaurant,Highland Park,Greenpoint,Crown Heights,Coney Island,Thai Restaurant,Gerritsen Beach,Burrito Place,Bedford Stuyvesant,Cuban Restaurant,Shanghai Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Dim Sum Restaurant,BBQ Joint,South American Restaurant,Midwood,Breakfast Spot,Prospect Heights,Creperie,Cajun / Creole Restaurant,Sushi Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Taiwanese Restaurant,Tapas Restaurant,Salad Place,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Russian Restaurant,Ramen Restaurant,Borough Park,Polish Restaurant,Peruvian Restaurant,New American Restaurant,Steakhouse,Mediterranean Restaurant,Korean Restaurant,Japanese Restaurant,Spanish Restaurant,Israeli Restaurant,Greek Restaurant,German Restaurant,Gastropub,Taco Place,Food Court,Southern / Soul Food Restaurant,French Restaurant,Venue Longitude
0,40.650898,3,1,0.802829,0.058236,0.053245,0.052413,0.049085,0.049085,0.049085,0.048253,0.046589,0.046589,0.033278,0.031614,0.031614,0.031614,0.028286,0.027454,0.026622,0.026622,0.02579,0.02579,0.024958,0.024958,0.024958,0.024958,0.024126,0.024126,0.023295,0.023295,0.021631,0.021631,0.019967,0.013311,0.011647,0.007488,0.007488,0.007488,0.007488,0.006656,0.005824,0.005824,0.005824,0.005824,0.004992,0.004992,0.004992,0.004992,0.004992,0.004992,0.004992,0.00416,0.00416,0.00416,0.00416,0.00416,0.003328,0.003328,0.003328,0.003328,0.003328,0.003328,0.002496,0.002496,0.002496,0.002496,0.002496,0.002496,0.002496,0.001664,0.001664,0.001664,0.001664,0.001664,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.000832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.945511
1,40.677175,3,2,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.138889,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.055556,0.111111,0.0,0.055556,0.055556,0.0,0.0,0.083333,0.0,0.0,0.0,0.055556,0.0,0.083333,0.027778,0.083333,0.055556,0.0,0.0,0.055556,0.027778,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.083333,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.055556,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-73.966712


In [105]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
#x = np.arange(kclusters)
#ys = [i + x + (i*x)**2 for i in range(kclusters)]
#colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
#rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(venues_priced['Venue Latitude'], venues_priced['Venue Longitude'], venues_priced['Neighborhood'], venues_priced['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow2[cluster],
        fill=True,
        fill_color=rainbow2[cluster],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

other clustering methods results in clusters that are less interpretable.

## Now try use different classification algorithms on the dataset to predict prices.

In [38]:
#first we need to prep the dataset into a training set and a test set.
from sklearn.model_selection import train_test_split
X=venues_merged.drop(['Cluster Labels','Venue'],1)
y=venues_merged['Price Range']
X=X.drop('Price Range',1)
train_x, test_x, train_y, test_y = train_test_split(X,y,test_size=0.2,stratify=y)

In [39]:
train_x.head()

Unnamed: 0,Venue Latitude,Venue Longitude,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Breakfast Spot,Burger Joint,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Chinese Restaurant,Creperie,Cuban Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Gastropub,German Restaurant,Greek Restaurant,Hot Dog Joint,Indian Restaurant,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,New American Restaurant,Peruvian Restaurant,Pizza Place,Polish Restaurant,Ramen Restaurant,Restaurant,Russian Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint,Bath Beach,Bay Ridge,Bedford Stuyvesant,Bensonhurst,Boerum Hill,Borough Park,Brighton Beach,Broadway Junction,Brooklyn Heights,Brownsville,Bushwick,Carroll Gardens,City Line,Clinton Hill,Cobble Hill,Coney Island,Crown Heights,Cypress Hills,Ditmas Park,Downtown,Dumbo,East Flatbush,East New York,East Williamsburg,Erasmus,Flatbush,Flatlands,Fort Greene,Fort Hamilton,Fulton Ferry,Georgetown,Gerritsen Beach,Gowanus,Gravesend,Greenpoint,Highland Park,Homecrest,Kensington,Madison,Manhattan Terrace,Midwood,Mill Basin,New Lots,North Side,Ocean Hill,Park Slope,Prospect Heights,Prospect Lefferts Gardens,Prospect Park South,Red Hook,Remsen Village,Rugby,Sheepshead Bay,South Side,Starrett City,Sunset Park,Vinegar Hill,Weeksville,Williamsburg,Windsor Terrace,Wingate
1913,40.711248,-73.961445,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1401,40.576163,-73.990963,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1292,40.708515,-73.958186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
456,40.5981,-73.997225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1474,40.64488,-73.958007,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [45]:
# the data is not normalized, especially the location data
from sklearn.preprocessing import StandardScaler
# use a pipeline 
from sklearn.pipeline import make_pipeline
# use a grid search also
from sklearn.model_selection import GridSearchCV
# import the potential classifiers 
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

pipe=make_pipeline(StandardScaler(),SVC())

parameters=[{'svc__kernel': ['rbf'], 'svc__gamma': ['scale',1e-3, 1e-4],'svc__C': [1, 10, 100, 1000]}]

clf=GridSearchCV(pipe,parameters,cv=5).fit(train_x,train_y)


In [46]:
clf.best_params_

{'svc__C': 1, 'svc__gamma': 0.001, 'svc__kernel': 'rbf'}

In [47]:
clf.best_score_

0.838953488372093

In [50]:
# now repeat the same for other classifiers
# decision tree
pipe=make_pipeline(StandardScaler(),DecisionTreeClassifier())

parameters=[{'decisiontreeclassifier__criterion': ['entropy','gini'], 'decisiontreeclassifier__max_depth': [6,8,10,12,14,None]}]

clf=GridSearchCV(pipe,parameters,cv=5).fit(train_x,train_y)

print('the best parameters are:', clf.best_params_)
print('the best score is:', clf.best_score_)

the best parameters are: {'decisiontreeclassifier__criterion': 'gini', 'decisiontreeclassifier__max_depth': None}
the best score is: 0.8697674418604653


In [54]:
# now repeat the same for other classifiers
# k nearest neighbors
pipe=make_pipeline(StandardScaler(),KNeighborsClassifier())

parameters=[{'kneighborsclassifier__n_neighbors': [14,15,16], 'kneighborsclassifier__p': [1,2], 'kneighborsclassifier__weights':['uniform','distance']}]

clf=GridSearchCV(pipe,parameters,cv=5).fit(train_x,train_y)

print('the best parameters are:', clf.best_params_)
print('the best score is:', clf.best_score_)

the best parameters are: {'kneighborsclassifier__n_neighbors': 16, 'kneighborsclassifier__p': 2, 'kneighborsclassifier__weights': 'distance'}
the best score is: 0.8488372093023256


In [56]:
# now repeat the same for other classifiers
# multi layer perceptron
pipe=make_pipeline(StandardScaler(),MLPClassifier())

parameters=[{'mlpclassifier__solver': ['adam'], 'mlpclassifier__alpha': [1e-3,1e-4,1e-5], 'mlpclassifier__learning_rate':['constant'], 'mlpclassifier__max_iter':[1000]}]

clf=GridSearchCV(pipe,parameters,cv=5).fit(train_x,train_y)

print('the best parameters are:', clf.best_params_)
print('the best score is:', clf.best_score_)

the best parameters are: {'mlpclassifier__alpha': 0.001, 'mlpclassifier__learning_rate': 'constant', 'mlpclassifier__max_iter': 1000, 'mlpclassifier__solver': 'adam'}
the best score is: 0.8401162790697674


In [71]:
# now intialise the different classifiers based on their optimal parameters and pass through the test set
from sklearn.metrics import accuracy_score

svc=SVC(C= 1, gamma= 0.001, kernel= 'rbf').fit(train_x,train_y)
y_pred_svc=svc.predict(test_x)
acc_svc=accuracy_score(test_y,y_pred_svc)
acc_svc

0.727906976744186

In [70]:
knn=KNeighborsClassifier(n_neighbors=16, p= 2, weights='distance').fit(train_x,train_y)
y_pred_knn=knn.predict(test_x)
acc_knn=accuracy_score(test_y,y_pred_knn)
acc_knn

0.8767441860465116

In [72]:
dt=DecisionTreeClassifier().fit(train_x,train_y)
y_pred_dt=dt.predict(test_x)
acc_dt=accuracy_score(test_y,y_pred_dt)
acc_dt

0.872093023255814

In [73]:
mlp=MLPClassifier(alpha=0.001,max_iter=1000).fit(train_x,train_y)
y_pred_mlp=mlp.predict(test_x)
acc_mlp=accuracy_score(test_y,y_pred_mlp)
acc_mlp

0.8488372093023255

In [74]:
# make a dataframe to store the test results.

results=pd.DataFrame({'classifier':['SVC','KNN','DecisionTree','MLP'],'test accuracy':[acc_svc,acc_knn,acc_dt,acc_mlp]})



In [75]:
results

Unnamed: 0,classifier,test accuracy
0,SVC,0.727907
1,KNN,0.876744
2,DecisionTree,0.872093
3,MLP,0.848837


KNN and decision tree both performed well.