# <font color =blue> <center>__Capstone Project Final Assignment Notebook__</font></center>

## Notebook information

The following notebook contains the development and analysisis of the desired location to open an experience center of Spanish wine and food.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


#### Import the NY Neighborhood data

In [2]:
#Downlad the data

!wget -q -O 'nyu_2451_34572-geojson.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

#Explore the infodmration in .json 

with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)
    
newyork_data

/bin/sh: wget: command not found
Data downloaded!


{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

In [3]:
# Transform .json into a Pandas DF

neighborhoods_data = newyork_data['features']

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
neighborhoods.head()    

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


### Create a Map of New York with the neighborhoods downloaded previously

This is usefull in order to show in the report the different neighborhoods in NY from which I'll select the desired location.

In [4]:
# Using geopy to get the latitude and longitude values of NY

address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

# create map of New York using latitude and longitude values using the Folium library
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


#### Using Foursquare data to explore neighborhoods in the dataframe

First the Foursquare credentials must be defined in order to import the data based on the coordinates defined above (coordinates of each neighborhood).

This will return the list of all the venues for each neighborhood which will be filtered to show only the needed ones:

- Wine Shops
- Wine Bars
- Paella Restaurants
- Spanish Restaurants
- Tapas Restaurant


In [5]:
#Define Foursquare Credentianls and Version
CLIENT_ID = 'SDKWCH31F0XWILJ4Y4ZRPZVE5APPBKXJIU05LSPFX45OPHUR' # your Foursquare ID
CLIENT_SECRET = 'BA2XABVGHNGK44QTAJ4XB1N1E2J5WZRTFGNRIRTCSJKEQITC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


#Try first with the first neighborhood

neighborhoods.loc[0, 'Neighborhood']

neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))


LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url

results = requests.get(url).json()
results

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

Your credentails:
CLIENT_ID: SDKWCH31F0XWILJ4Y4ZRPZVE5APPBKXJIU05LSPFX45OPHUR
CLIENT_SECRET:BA2XABVGHNGK44QTAJ4XB1N1E2J5WZRTFGNRIRTCSJKEQITC
Latitude and longitude values of Wakefield are 40.89470517661, -73.84720052054902.
9 venues were returned by Foursquare.




In [6]:
# Create a function to explore all neighborhoods in NY City

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

ny_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )



Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

In [7]:
print(ny_venues.shape)
ny_venues.head()

(10145, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
2,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
3,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [8]:
### Venues per neighborhoods in NY City

ny_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,32,32,32,32,32,32
Annadale,13,13,13,13,13,13
Arden Heights,5,5,5,5,5,5
Arlington,4,4,4,4,4,4
Arrochar,19,19,19,19,19,19
Arverne,18,18,18,18,18,18
Astoria,100,100,100,100,100,100
Astoria Heights,12,12,12,12,12,12
Auburndale,19,19,19,19,19,19
Bath Beach,50,50,50,50,50,50


In [9]:
# Analyze each neighborhood

# one hot encoding
newyork_onehot = pd.get_dummies(ny_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
newyork_onehot['Neighborhood'] = ny_venues['Neighborhood'] 

# move neighborhood column to the first column
col_name = "Neighborhood"
first_col = newyork_onehot.pop(col_name)
newyork_onehot.insert(0, col_name, first_col)

newyork_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Acupuncturist,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Austrian Restaurant,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Bath House,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burmese Restaurant,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Caucasian Restaurant,Cha Chaan Teng,Check Cashing Service,Cheese Shop,Child Care Service,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Basketball Court,College Bookstore,College Cafeteria,College Stadium,College Theater,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dosa Place,Drugstore,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Service,Event Space,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Film Studio,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Heliport,Herbs & Spices Store,High School,Himalayan Restaurant,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Locksmith,Lounge,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motel,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Music Venue,Nail Salon,New American Restaurant,Newsstand,Nightclub,Non-Profit,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoors & Recreation,Outlet Mall,Outlet Store,Paella Restaurant,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Perfume Shop,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Studio,Physical Therapist,Piano Bar,Pie Shop,Pier,Piercing Parlor,Pilates Studio,Pizza Place,Platform,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Post Office,Print Shop,Pub,Public Art,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Rock Climbing Spot,Rock Club,Roller Rink,Romanian Restaurant,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Smoke Shop,Smoothie Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stadium,State / Provincial Park,Steakhouse,Storage Facility,Street Art,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Tennis Stadium,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tiki Bar,Toll Plaza,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Turkish Restaurant,Udon Restaurant,Used Bookstore,Vape Store,Varenyky restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Waste Facility,Waterfront,Weight Loss Center,Well,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yemeni Restaurant,Yoga Studio
0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
newyork_onehot.shape

(10145, 426)

In [11]:
ny_spanish_venue = newyork_onehot.filter(['Neighborhood', 'Wine Bar', 'Wine Shop', 'Tapas Restaurant', 'Spanish Restaurant', 'Paella Restaurant'])

ny_spanish_venue.head()

Unnamed: 0,Neighborhood,Wine Bar,Wine Shop,Tapas Restaurant,Spanish Restaurant,Paella Restaurant
0,Wakefield,0,0,0,0,0
1,Wakefield,0,0,0,0,0
2,Wakefield,0,0,0,0,0
3,Wakefield,0,0,0,0,0
4,Wakefield,0,0,0,0,0


In [12]:
ny_spa_grouped = ny_spanish_venue.groupby('Neighborhood').mean().reset_index()

ny_spa_grouped

Unnamed: 0,Neighborhood,Wine Bar,Wine Shop,Tapas Restaurant,Spanish Restaurant,Paella Restaurant
0,Allerton,0.0,0.0,0.0,0.03125,0.0
1,Annadale,0.0,0.0,0.0,0.0,0.0
2,Arden Heights,0.0,0.0,0.0,0.0,0.0
3,Arlington,0.0,0.0,0.0,0.0,0.0
4,Arrochar,0.0,0.0,0.0,0.0,0.0
5,Arverne,0.0,0.055556,0.0,0.0,0.0
6,Astoria,0.0,0.01,0.0,0.0,0.0
7,Astoria Heights,0.0,0.0,0.0,0.0,0.0
8,Auburndale,0.0,0.0,0.0,0.0,0.0
9,Bath Beach,0.0,0.0,0.0,0.02,0.0


In [13]:
#Print the neighborhoods with the top 5 most common venues

num_top_venues = 5

for hood in ny_spa_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = ny_spa_grouped[ny_spa_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')



----Allerton----
                venue  freq
0  Spanish Restaurant  0.03
1            Wine Bar  0.00
2           Wine Shop  0.00
3    Tapas Restaurant  0.00
4   Paella Restaurant  0.00


----Annadale----
                venue  freq
0            Wine Bar   0.0
1           Wine Shop   0.0
2    Tapas Restaurant   0.0
3  Spanish Restaurant   0.0
4   Paella Restaurant   0.0


----Arden Heights----
                venue  freq
0            Wine Bar   0.0
1           Wine Shop   0.0
2    Tapas Restaurant   0.0
3  Spanish Restaurant   0.0
4   Paella Restaurant   0.0


----Arlington----
                venue  freq
0            Wine Bar   0.0
1           Wine Shop   0.0
2    Tapas Restaurant   0.0
3  Spanish Restaurant   0.0
4   Paella Restaurant   0.0


----Arrochar----
                venue  freq
0            Wine Bar   0.0
1           Wine Shop   0.0
2    Tapas Restaurant   0.0
3  Spanish Restaurant   0.0
4   Paella Restaurant   0.0


----Arverne----
                venue  freq
0           Win

In [14]:
#Orginize the data in decending order and create a Pandas DF

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [15]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_spa_grouped['Neighborhood']

for ind in np.arange(ny_spa_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_spa_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Allerton,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
1,Annadale,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
2,Arden Heights,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
3,Arlington,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
4,Arrochar,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
5,Arverne,Wine Shop,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Bar
6,Astoria,Wine Shop,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Bar
7,Astoria Heights,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
8,Auburndale,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
9,Bath Beach,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [16]:
### Cluster neighborhoods in order to analyze them

# set number of clusters
kclusters = 5

ny_grouped_clustering = ny_spa_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 


array([2, 0, 0, 0, 0, 4, 0, 0, 0, 2], dtype=int32)

In [17]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ny_spa_merged = neighborhoods

# merge ny_spa_merged with neighborhoods to add latitude/longitude for each neighborhood
ny_spa_merged = ny_spa_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')


In [18]:
ny_spa_merged

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
1,Bronx,Co-op City,40.874294,-73.829939,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
2,Bronx,Eastchester,40.887556,-73.827806,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
3,Bronx,Fieldston,40.895437,-73.905643,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
4,Bronx,Riverdale,40.890834,-73.912585,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
5,Bronx,Kingsbridge,40.881687,-73.902818,2.0,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
6,Manhattan,Marble Hill,40.876551,-73.91066,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
7,Bronx,Woodlawn,40.898273,-73.867315,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
8,Bronx,Norwood,40.877224,-73.879391,2.0,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
9,Bronx,Williamsbridge,40.881039,-73.857446,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [21]:
# As Folium can display values only in INT type, I checked that "Clustes Labels are of type Int"

ny_spa_merged.dtypes

Borough                   object
Neighborhood              object
Latitude                 float64
Longitude                float64
Cluster Labels           float64
1st Most Common Venue     object
2nd Most Common Venue     object
3rd Most Common Venue     object
4th Most Common Venue     object
5th Most Common Venue     object
dtype: object

In [27]:
# After some digging I've found that Cluster Lablels is float64 due to some NaN values, which will be removed.

ny_spa_merged_noNaN = ny_spa_merged.dropna()

In [28]:
# This is the DF after the NaN cleaning

ny_spa_merged_noNaN

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
1,Bronx,Co-op City,40.874294,-73.829939,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
2,Bronx,Eastchester,40.887556,-73.827806,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
3,Bronx,Fieldston,40.895437,-73.905643,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
4,Bronx,Riverdale,40.890834,-73.912585,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
5,Bronx,Kingsbridge,40.881687,-73.902818,2.0,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
6,Manhattan,Marble Hill,40.876551,-73.91066,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
7,Bronx,Woodlawn,40.898273,-73.867315,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
8,Bronx,Norwood,40.877224,-73.879391,2.0,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
9,Bronx,Williamsbridge,40.881039,-73.857446,0.0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [31]:
# Now that there are no NaN values, the Cluster Lables column type can be changed to type int
ny_spa_merged_noNaN[['Cluster Labels']] = ny_spa_merged_noNaN[['Cluster Labels']].astype(int)

# This is the DF proceseed ant ready to be displayed on Folium

ny_spa_merged_noNaN

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
1,Bronx,Co-op City,40.874294,-73.829939,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
2,Bronx,Eastchester,40.887556,-73.827806,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
3,Bronx,Fieldston,40.895437,-73.905643,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
4,Bronx,Riverdale,40.890834,-73.912585,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
5,Bronx,Kingsbridge,40.881687,-73.902818,2,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
6,Manhattan,Marble Hill,40.876551,-73.91066,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
7,Bronx,Woodlawn,40.898273,-73.867315,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
8,Bronx,Norwood,40.877224,-73.879391,2,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
9,Bronx,Williamsbridge,40.881039,-73.857446,0,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [44]:
# visualize the clusters

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_spa_merged_noNaN['Latitude'], ny_spa_merged_noNaN['Longitude'], ny_spa_merged_noNaN['Neighborhood'], ny_spa_merged_noNaN['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine the Clusters

In [48]:
#Cluster 1

ny_cluster_1 = ny_spa_merged_noNaN.loc[ny_spa_merged_noNaN['Cluster Labels'] == 0, ny_spa_merged_noNaN.columns[[0, 1] + list(range(5, ny_spa_merged_noNaN.shape[1]))]]

ny_cluster_1.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Bronx,Wakefield,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
1,Bronx,Co-op City,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
2,Bronx,Eastchester,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
3,Bronx,Fieldston,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
4,Bronx,Riverdale,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [49]:
#Cluster 2

ny_cluster_2 = ny_spa_merged_noNaN.loc[ny_spa_merged_noNaN['Cluster Labels'] == 1, ny_spa_merged_noNaN.columns[[0, 1] + list(range(5, ny_spa_merged_noNaN.shape[1]))]]

ny_cluster_2.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
15,Bronx,Morris Heights,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
17,Bronx,East Tremont,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
19,Bronx,High Bridge,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
21,Bronx,Mott Haven,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
22,Bronx,Port Morris,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [50]:
#Cluster 3

ny_cluster_3 = ny_spa_merged_noNaN.loc[ny_spa_merged_noNaN['Cluster Labels'] == 2, ny_spa_merged_noNaN.columns[[0, 1] + list(range(5, ny_spa_merged_noNaN.shape[1]))]]

ny_cluster_3.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
5,Bronx,Kingsbridge,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
8,Bronx,Norwood,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
10,Bronx,Baychester,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
12,Bronx,City Island,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar
13,Bronx,Bedford Park,Spanish Restaurant,Paella Restaurant,Tapas Restaurant,Wine Shop,Wine Bar


In [51]:
#Cluster 4

ny_cluster_4 = ny_spa_merged_noNaN.loc[ny_spa_merged_noNaN['Cluster Labels'] == 3, ny_spa_merged_noNaN.columns[[0, 1] + list(range(5, ny_spa_merged_noNaN.shape[1]))]]

ny_cluster_4.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
61,Brooklyn,Williamsburg,Tapas Restaurant,Wine Bar,Paella Restaurant,Spanish Restaurant,Wine Shop
63,Brooklyn,Bedford Stuyvesant,Wine Shop,Wine Bar,Paella Restaurant,Spanish Restaurant,Tapas Restaurant
69,Brooklyn,Fort Greene,Wine Shop,Wine Bar,Tapas Restaurant,Paella Restaurant,Spanish Restaurant
96,Brooklyn,North Side,Wine Bar,Wine Shop,Paella Restaurant,Spanish Restaurant,Tapas Restaurant
97,Brooklyn,South Side,Wine Bar,Tapas Restaurant,Paella Restaurant,Spanish Restaurant,Wine Shop


In [52]:
#Cluster 5

ny_cluster_5 = ny_spa_merged_noNaN.loc[ny_spa_merged_noNaN['Cluster Labels'] == 4, ny_spa_merged_noNaN.columns[[0, 1] + list(range(5, ny_spa_merged_noNaN.shape[1]))]]

ny_cluster_5.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
56,Brooklyn,East Flatbush,Wine Shop,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Bar
58,Brooklyn,Windsor Terrace,Wine Shop,Paella Restaurant,Spanish Restaurant,Tapas Restaurant,Wine Bar
59,Brooklyn,Prospect Heights,Wine Shop,Wine Bar,Paella Restaurant,Spanish Restaurant,Tapas Restaurant
64,Brooklyn,Brooklyn Heights,Wine Shop,Wine Bar,Paella Restaurant,Spanish Restaurant,Tapas Restaurant
65,Brooklyn,Cobble Hill,Wine Shop,Spanish Restaurant,Wine Bar,Paella Restaurant,Tapas Restaurant
