# A Recommender System for a Client to Help Him Decide Where to Open Which Kind of Restaurant

### First lets import required libraries.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          84 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.18.1         | 51 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Solving environme

## JSON Data file is taken from https://geo.nyu.edu/catalog/nyu_2451_34572

In [2]:
with open('city.json') as json_data:
    nycity_data = json.load(json_data)

In [3]:
neigh_data = nycity_data['features']

### Making a dataframe

In [4]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

neigh = pd.DataFrame(columns=column_names)

In [5]:
for data in neigh_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neigh = neigh.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
neigh.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


## Choosing Bronx from all the data

In [7]:
bronx_data = neigh[neigh['Borough'] == 'Bronx'].reset_index(drop=True)
bronx_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


## Using geopy library to get the latitude and longitude values of Bronx.

In [8]:
address = 'Bronx, NY'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bronx are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Bronx are 40.85048545, -73.8404035580209.


In [9]:
map_bronx = folium.Map(location=[latitude, longitude], zoom_start=11)


for lat, lng, label in zip(bronx_data['Latitude'], bronx_data['Longitude'], bronx_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bronx)  
    
map_bronx

## Defining Foursquare Credentials and Version

In [10]:
CLIENT_ID = 'C0VJET0NBDTYHKE30JU1BE42KF0MHEJSYWKM4KNVLUZDQ4DG' 
CLIENT_SECRET = 'HAQX5G43GGY0GTAMSL1ACMVASKWZLGMWCPKZISOL4KZR3X2S' 
VERSION = '20180605'

In [11]:
bronx_data.loc[0, 'Neighborhood']

'Wakefield'

In [12]:
neighborhood_latitude = bronx_data.loc[0, 'Latitude'] 
neighborhood_longitude = bronx_data.loc[0, 'Longitude'] 

neighborhood_name = bronx_data.loc[0, 'Neighborhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Wakefield are 40.89470517661, -73.84720052054902.


### Setting parameters: limit=200, radius=1000

In [13]:
LIMIT = 200 

radius = 1000 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=C0VJET0NBDTYHKE30JU1BE42KF0MHEJSYWKM4KNVLUZDQ4DG&client_secret=HAQX5G43GGY0GTAMSL1ACMVASKWZLGMWCPKZISOL4KZR3X2S&v=20180605&ll=40.89470517661,-73.84720052054902&radius=1000&limit=200'

In [14]:
results = requests.get(url).json()

### Getting all neighborhood venues

In [15]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) 


filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]


nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)


nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Lollipops Gelato,Dessert Shop,40.894123,-73.845892
1,Jackie's West Indian Bakery,Caribbean Restaurant,40.889283,-73.84331
2,Ripe Kitchen & Bar,Caribbean Restaurant,40.898152,-73.838875
3,Rite Aid,Pharmacy,40.896521,-73.84468
4,Ali's Roti Shop,Caribbean Restaurant,40.894036,-73.856935


In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
       
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
      
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
       
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
bronx_venues = getNearbyVenues(names=bronx_data['Neighborhood'],
                                   latitudes=bronx_data['Latitude'],
                                   longitudes=bronx_data['Longitude']
                                  )

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Claremont Village
Concourse Village
Mount Eden
Mount Hope
Bronxdale
Allerton
Kingsbridge Heights


### Summary Information about Neighborhoods inside Bronx

In [19]:
bronx_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,100,100,100,100,100,100
Baychester,100,100,100,100,100,100
Bedford Park,100,100,100,100,100,100
Belmont,100,100,100,100,100,100
Bronxdale,100,100,100,100,100,100
Castle Hill,100,100,100,100,100,100
City Island,100,100,100,100,100,100
Claremont Village,100,100,100,100,100,100
Clason Point,100,100,100,100,100,100
Co-op City,100,100,100,100,100,100


### One-hot Encoding the "categroies" Column 

In [20]:
bronx_onehot = pd.get_dummies(bronx_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bronx_onehot['Neighborhood'] = bronx_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bronx_onehot.columns[-1]] + list(bronx_onehot.columns[:-1])
bronx_onehot = bronx_onehot[fixed_columns]

bronx_onehot.head()

Unnamed: 0,Neighborhood,African Restaurant,Airport Lounge,American Restaurant,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Beach,Beer Bar,Beer Garden,Big Box Store,Bistro,Bookstore,Botanical Garden,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Burger Joint,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Cuban Restaurant,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distillery,Dog Run,Donut Shop,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish Market,Food & Drink Shop,Food Truck,Fountain,French Restaurant,Frozen Yogurt Shop,Furniture / Home Store,Garden,Gift Shop,Golf Course,Golf Driving Range,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hardware Store,Historic Site,History Museum,Hotel,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Lounge,Mac & Cheese Joint,Market,Martial Arts Dojo,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Monument / Landmark,Motorcycle Shop,Museum,Music Venue,New American Restaurant,Other Nightlife,Outdoor Sculpture,Paella Restaurant,Park,Peruvian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pub,Public Art,Ramen Restaurant,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,School,Seafood Restaurant,Shoe Store,Shopping Mall,Smoke Shop,Soccer Field,Soup Place,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sports Bar,Stables,State / Provincial Park,Steakhouse,Street Art,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Tattoo Parlor,Tennis Court,Thai Restaurant,Theater,Theme Park,Track,Track Stadium,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
print('There are {} uniques categories.'.format(len(bronx_venues['Venue Category'].unique())))

print('Here is the list of different categories:')
list(bronx_venues['Venue Category'].unique())

There are 165 uniques categories.
Here is the list of different categories:


['Dessert Shop',
 'Caribbean Restaurant',
 'Bakery',
 'Burger Joint',
 'Pizza Place',
 'Pub',
 'Bar',
 'Deli / Bodega',
 'Grocery Store',
 'Seafood Restaurant',
 'Italian Restaurant',
 'Café',
 'Ice Cream Shop',
 'Gym',
 'Bagel Shop',
 'Trail',
 'Brewery',
 'Pharmacy',
 'Supermarket',
 'Diner',
 'Soup Place',
 'Kitchen Supply Store',
 'Wine Shop',
 'Furniture / Home Store',
 'Fast Food Restaurant',
 'Brazilian Restaurant',
 'Dumpling Restaurant',
 'Pet Store',
 'Breakfast Spot',
 'Sports Bar',
 'Liquor Store',
 'Spa',
 'Park',
 'Mexican Restaurant',
 'Asian Restaurant',
 'Arts & Crafts Store',
 'Donut Shop',
 'Plaza',
 'Cajun / Creole Restaurant',
 'Coffee Shop',
 'Smoke Shop',
 'Spanish Restaurant',
 'Botanical Garden',
 'Stables',
 'Historic Site',
 'Beer Bar',
 'Martial Arts Dojo',
 'Discount Store',
 'Beer Garden',
 'Frozen Yogurt Shop',
 'Electronics Store',
 'Shopping Mall',
 'Gym / Fitness Center',
 'Golf Course',
 'BBQ Joint',
 'Shoe Store',
 'Clothing Store',
 'Hotel',
 'Golf 

### Manually selecting restarurants/diners from venue category

In [22]:
important_list_of_features = [
 
  
 'Neighborhood',
 'Caribbean Restaurant',

 'Burger Joint',

 'Italian Restaurant',

 'Diner',

 'Fast Food Restaurant',
 
 'Brazilian Restaurant',
 'Dumpling Restaurant',

 'Mexican Restaurant',
 'Asian Restaurant',
 'American Restaurant',
 
 'Cajun / Creole Restaurant',
 
 'Spanish Restaurant',

 'BBQ Joint',
 
 'Arepa Restaurant',
 'Sushi Restaurant',

 'Cuban Restaurant',
 
 'Japanese Restaurant',
 
 'New American Restaurant',
 'Chinese Restaurant',

 'Tapas Restaurant',
 'Restaurant',
 'Greek Restaurant',
 'Latin American Restaurant',
 
 'Steakhouse',
 
 'Venezuelan Restaurant',
 
 'Mediterranean Restaurant',
 
 'French Restaurant',

 'Thai Restaurant',

 'Ethiopian Restaurant',

 'Paella Restaurant',
 
 'African Restaurant',

 'Southern / Soul Food Restaurant',

 'Peruvian Restaurant',

 'Taco Place',
 'Indian Restaurant',

 'Turkish Restaurant',
 'Ramen Restaurant',


 'Vietnamese Restaurant',
 'Korean Restaurant',
 ]

In [32]:
bronx_onehot = bronx_onehot[important_list_of_features]

bronx_onehot.head()

Unnamed: 0,Neighborhood,Caribbean Restaurant,Burger Joint,Italian Restaurant,Diner,Fast Food Restaurant,Brazilian Restaurant,Dumpling Restaurant,Mexican Restaurant,Asian Restaurant,American Restaurant,Cajun / Creole Restaurant,Spanish Restaurant,BBQ Joint,Arepa Restaurant,Sushi Restaurant,Cuban Restaurant,Japanese Restaurant,New American Restaurant,Chinese Restaurant,Tapas Restaurant,Restaurant,Greek Restaurant,Latin American Restaurant,Steakhouse,Venezuelan Restaurant,Mediterranean Restaurant,French Restaurant,Thai Restaurant,Ethiopian Restaurant,Paella Restaurant,African Restaurant,Southern / Soul Food Restaurant,Peruvian Restaurant,Taco Place,Indian Restaurant,Turkish Restaurant,Ramen Restaurant,Vietnamese Restaurant,Korean Restaurant
0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### In order to analyze what king of restaurants are common, we have to group rows by neighborhood and by taking the mean of the frequency of occurrence of each restaurants/diners.

In [33]:
bronx_grouped = bronx_onehot.groupby('Neighborhood').mean().reset_index()
bronx_grouped

Unnamed: 0,Neighborhood,Caribbean Restaurant,Burger Joint,Italian Restaurant,Diner,Fast Food Restaurant,Brazilian Restaurant,Dumpling Restaurant,Mexican Restaurant,Asian Restaurant,American Restaurant,Cajun / Creole Restaurant,Spanish Restaurant,BBQ Joint,Arepa Restaurant,Sushi Restaurant,Cuban Restaurant,Japanese Restaurant,New American Restaurant,Chinese Restaurant,Tapas Restaurant,Restaurant,Greek Restaurant,Latin American Restaurant,Steakhouse,Venezuelan Restaurant,Mediterranean Restaurant,French Restaurant,Thai Restaurant,Ethiopian Restaurant,Paella Restaurant,African Restaurant,Southern / Soul Food Restaurant,Peruvian Restaurant,Taco Place,Indian Restaurant,Turkish Restaurant,Ramen Restaurant,Vietnamese Restaurant,Korean Restaurant
0,Allerton,0.03,0.0,0.1,0.01,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Baychester,0.08,0.0,0.05,0.04,0.02,0.0,0.01,0.04,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bedford Park,0.02,0.02,0.12,0.03,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Belmont,0.02,0.0,0.14,0.03,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bronxdale,0.01,0.0,0.15,0.01,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Castle Hill,0.0,0.01,0.04,0.06,0.01,0.0,0.0,0.05,0.01,0.05,0.0,0.04,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.02,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
6,City Island,0.0,0.0,0.05,0.05,0.0,0.0,0.0,0.03,0.01,0.06,0.0,0.01,0.01,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Claremont Village,0.0,0.01,0.11,0.0,0.01,0.0,0.0,0.06,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
8,Clason Point,0.0,0.02,0.03,0.04,0.01,0.0,0.0,0.06,0.01,0.04,0.0,0.04,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01
9,Co-op City,0.08,0.02,0.05,0.03,0.03,0.0,0.01,0.04,0.02,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Now to time see each neighborhood along with the top 5 most common restaurant types

In [57]:
num_top_venues = 5

for hood in bronx_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = bronx_grouped[bronx_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allerton----
                  venue  freq
0    Italian Restaurant  0.10
1  Caribbean Restaurant  0.03
2    Mexican Restaurant  0.03
3      Sushi Restaurant  0.01
4      Arepa Restaurant  0.01


----Baychester----
                  venue  freq
0  Caribbean Restaurant  0.08
1    Italian Restaurant  0.05
2                 Diner  0.04
3    Mexican Restaurant  0.04
4  Fast Food Restaurant  0.02


----Bedford Park----
                  venue  freq
0    Italian Restaurant  0.12
1    Mexican Restaurant  0.04
2                 Diner  0.03
3  Caribbean Restaurant  0.02
4          Burger Joint  0.02


----Belmont----
                  venue  freq
0    Italian Restaurant  0.14
1                 Diner  0.03
2    Mexican Restaurant  0.03
3  Caribbean Restaurant  0.02
4  Fast Food Restaurant  0.01


----Bronxdale----
                  venue  freq
0    Italian Restaurant  0.15
1    Mexican Restaurant  0.04
2    Spanish Restaurant  0.02
3  Caribbean Restaurant  0.01
4                 Diner  0.01



### Making this a Dataframe

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Displaying the top 5 restaurant types for each neighborhood.

In [36]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Common Restaurant/Diner'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Common Restaurant/Diner'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bronx_grouped['Neighborhood']

for ind in np.arange(bronx_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bronx_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
0,Allerton,Italian Restaurant,Caribbean Restaurant,Mexican Restaurant,Spanish Restaurant,Diner
1,Baychester,Caribbean Restaurant,Italian Restaurant,Diner,Mexican Restaurant,Fast Food Restaurant
2,Bedford Park,Italian Restaurant,Mexican Restaurant,Diner,Caribbean Restaurant,Burger Joint
3,Belmont,Italian Restaurant,Diner,Mexican Restaurant,Caribbean Restaurant,Fast Food Restaurant
4,Bronxdale,Italian Restaurant,Mexican Restaurant,Spanish Restaurant,Caribbean Restaurant,Diner
5,Castle Hill,Diner,American Restaurant,Mexican Restaurant,Spanish Restaurant,Italian Restaurant
6,City Island,American Restaurant,Italian Restaurant,Diner,Mexican Restaurant,Thai Restaurant
7,Claremont Village,Italian Restaurant,Mexican Restaurant,Asian Restaurant,Venezuelan Restaurant,Burger Joint
8,Clason Point,Mexican Restaurant,Latin American Restaurant,Diner,American Restaurant,Spanish Restaurant
9,Co-op City,Caribbean Restaurant,Italian Restaurant,Mexican Restaurant,Fast Food Restaurant,Diner


## Clustering Neighborhoods

### I will Run *k*-means to cluster the neighborhood into 5 clusters.

In [37]:
kclusters = 5

bronx_grouped_clustering = bronx_grouped.drop('Neighborhood', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bronx_grouped_clustering)


kmeans.labels_[0:10] 

array([1, 2, 1, 1, 1, 0, 0, 1, 0, 2], dtype=int32)

### Creating a dataframe that includes the cluster as well as the top 5 restaurant types for each neighborhood.

In [38]:
bronx_merged = bronx_data

bronx_merged['Cluster Labels'] = kmeans.labels_

bronx_merged = bronx_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

bronx_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
0,Bronx,Wakefield,40.894705,-73.847201,1,Caribbean Restaurant,Burger Joint,Italian Restaurant,Fast Food Restaurant,Diner
1,Bronx,Co-op City,40.874294,-73.829939,2,Caribbean Restaurant,Italian Restaurant,Mexican Restaurant,Fast Food Restaurant,Diner
2,Bronx,Eastchester,40.887556,-73.827806,1,Caribbean Restaurant,Italian Restaurant,Fast Food Restaurant,Burger Joint,Diner
3,Bronx,Fieldston,40.895437,-73.905643,1,Mexican Restaurant,Diner,Burger Joint,Latin American Restaurant,Italian Restaurant
4,Bronx,Riverdale,40.890834,-73.912585,1,Mexican Restaurant,Diner,Latin American Restaurant,Burger Joint,Japanese Restaurant


### Visulizing Clusters

In [39]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(bronx_merged['Latitude'], bronx_merged['Longitude'], bronx_merged['Neighborhood'], bronx_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examining Clusters

Cluster-1

In [40]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 0, bronx_merged.columns[[1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
5,Kingsbridge,Diner,Mexican Restaurant,Latin American Restaurant,Burger Joint,Italian Restaurant
6,Woodlawn,Caribbean Restaurant,Burger Joint,Italian Restaurant,Diner,Brazilian Restaurant
8,Williamsbridge,Caribbean Restaurant,Italian Restaurant,Diner,Fast Food Restaurant,Burger Joint
12,Bedford Park,Italian Restaurant,Mexican Restaurant,Diner,Caribbean Restaurant,Burger Joint
16,East Tremont,Italian Restaurant,Mexican Restaurant,Diner,American Restaurant,Spanish Restaurant
26,Clason Point,Mexican Restaurant,Latin American Restaurant,Diner,American Restaurant,Spanish Restaurant
34,Spuyten Duyvil,Mexican Restaurant,Diner,Latin American Restaurant,Burger Joint,Tapas Restaurant
35,North Riverdale,Mexican Restaurant,Italian Restaurant,Caribbean Restaurant,Burger Joint,Diner
40,Olinville,Italian Restaurant,Caribbean Restaurant,Diner,Mexican Restaurant,Spanish Restaurant
41,Pelham Gardens,Italian Restaurant,Caribbean Restaurant,Diner,Mexican Restaurant,Fast Food Restaurant


Cluster-2

In [41]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 1, bronx_merged.columns[[1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
0,Wakefield,Caribbean Restaurant,Burger Joint,Italian Restaurant,Fast Food Restaurant,Diner
2,Eastchester,Caribbean Restaurant,Italian Restaurant,Fast Food Restaurant,Burger Joint,Diner
3,Fieldston,Mexican Restaurant,Diner,Burger Joint,Latin American Restaurant,Italian Restaurant
4,Riverdale,Mexican Restaurant,Diner,Latin American Restaurant,Burger Joint,Japanese Restaurant
7,Norwood,Italian Restaurant,Caribbean Restaurant,Diner,Mexican Restaurant,Burger Joint
13,University Heights,Italian Restaurant,Mexican Restaurant,Latin American Restaurant,Caribbean Restaurant,Diner
18,High Bridge,Mexican Restaurant,Italian Restaurant,Southern / Soul Food Restaurant,Latin American Restaurant,Ethiopian Restaurant
25,Soundview,Italian Restaurant,Mexican Restaurant,American Restaurant,Spanish Restaurant,Diner
29,Parkchester,Italian Restaurant,Mexican Restaurant,Spanish Restaurant,Diner,American Restaurant
30,Westchester Square,Italian Restaurant,Mexican Restaurant,American Restaurant,Diner,Spanish Restaurant


Cluster-3

In [42]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 2, bronx_merged.columns[[1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
1,Co-op City,Caribbean Restaurant,Italian Restaurant,Mexican Restaurant,Fast Food Restaurant,Diner
9,Baychester,Caribbean Restaurant,Italian Restaurant,Diner,Mexican Restaurant,Fast Food Restaurant
14,Morris Heights,Italian Restaurant,Mexican Restaurant,Latin American Restaurant,Tapas Restaurant,Restaurant
15,Fordham,Italian Restaurant,Mexican Restaurant,Latin American Restaurant,Diner,Caribbean Restaurant
32,Morris Park,Italian Restaurant,Spanish Restaurant,Mexican Restaurant,Diner,American Restaurant
36,Pelham Bay,Mexican Restaurant,Italian Restaurant,Spanish Restaurant,American Restaurant,Diner
47,Mount Eden,Italian Restaurant,Mexican Restaurant,Latin American Restaurant,Tapas Restaurant,Restaurant
50,Allerton,Italian Restaurant,Caribbean Restaurant,Mexican Restaurant,Spanish Restaurant,Diner
51,Kingsbridge Heights,Italian Restaurant,Mexican Restaurant,Diner,Latin American Restaurant,Burger Joint


Cluster-4

In [43]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 3, bronx_merged.columns[[1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
17,West Farms,Italian Restaurant,Spanish Restaurant,Mexican Restaurant,Latin American Restaurant,Diner
20,Mott Haven,Italian Restaurant,Mexican Restaurant,American Restaurant,Southern / Soul Food Restaurant,French Restaurant
21,Port Morris,Italian Restaurant,Greek Restaurant,American Restaurant,Mexican Restaurant,Ramen Restaurant
22,Longwood,Italian Restaurant,Mexican Restaurant,French Restaurant,Southern / Soul Food Restaurant,Greek Restaurant
31,Van Nest,Italian Restaurant,Spanish Restaurant,Mexican Restaurant,Diner,Latin American Restaurant
39,Castle Hill,Diner,American Restaurant,Mexican Restaurant,Spanish Restaurant,Italian Restaurant
42,Concourse,Italian Restaurant,Mexican Restaurant,Restaurant,Latin American Restaurant,Tapas Restaurant


Cluster-5

In [44]:
bronx_merged.loc[bronx_merged['Cluster Labels'] == 4, bronx_merged.columns[[1] + list(range(5, bronx_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Common Restaurant/Diner,2nd Common Restaurant/Diner,3rd Common Restaurant/Diner,4th Common Restaurant/Diner,5th Common Restaurant/Diner
10,Pelham Parkway,Italian Restaurant,Mexican Restaurant,Diner,Caribbean Restaurant,Spanish Restaurant
11,City Island,American Restaurant,Italian Restaurant,Diner,Mexican Restaurant,Thai Restaurant
19,Melrose,Italian Restaurant,Mexican Restaurant,French Restaurant,Ethiopian Restaurant,Southern / Soul Food Restaurant
23,Hunts Point,Mexican Restaurant,Greek Restaurant,Spanish Restaurant,Burger Joint,Diner
24,Morrisania,Italian Restaurant,Mexican Restaurant,Southern / Soul Food Restaurant,African Restaurant,Ethiopian Restaurant
27,Throgs Neck,Diner,Mexican Restaurant,Italian Restaurant,American Restaurant,Latin American Restaurant
28,Country Club,Italian Restaurant,Diner,Mexican Restaurant,American Restaurant,Spanish Restaurant
38,Edgewater Park,Diner,Mexican Restaurant,Italian Restaurant,American Restaurant,Spanish Restaurant


### We have the data about common restaurant types. Now we have to calculate the number of restaurants in each neighborhood.

In [47]:
bronx_onehot['Total'] = bronx_onehot.sum(axis=1)
bronx_onehot.head()

Unnamed: 0,Neighborhood,Caribbean Restaurant,Burger Joint,Italian Restaurant,Diner,Fast Food Restaurant,Brazilian Restaurant,Dumpling Restaurant,Mexican Restaurant,Asian Restaurant,American Restaurant,Cajun / Creole Restaurant,Spanish Restaurant,BBQ Joint,Arepa Restaurant,Sushi Restaurant,Cuban Restaurant,Japanese Restaurant,New American Restaurant,Chinese Restaurant,Tapas Restaurant,Restaurant,Greek Restaurant,Latin American Restaurant,Steakhouse,Venezuelan Restaurant,Mediterranean Restaurant,French Restaurant,Thai Restaurant,Ethiopian Restaurant,Paella Restaurant,African Restaurant,Southern / Soul Food Restaurant,Peruvian Restaurant,Taco Place,Indian Restaurant,Turkish Restaurant,Ramen Restaurant,Vietnamese Restaurant,Korean Restaurant,Total
0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
2,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
3,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2


In [50]:
final = bronx_onehot [['Neighborhood','Total']].groupby('Neighborhood').sum()
final.head()

Unnamed: 0_level_0,Total
Neighborhood,Unnamed: 1_level_1
Allerton,42
Baychester,60
Bedford Park,54
Belmont,52
Bronxdale,56


### Finding the neighborhood that has the least number of restaurants.

In [54]:
final = final.sort_values(['Total'], ascending=[True])
final.head()

Unnamed: 0_level_0,Total
Neighborhood,Unnamed: 1_level_1
Allerton,42
Port Morris,42
Morris Heights,44
Mount Eden,46
Edenwald,46


## Conclusion= Our client wants to open his restaurant in a neighborhood that has the least number of restaurants. Allerton and Port Morris have the least number of restaurants. 

## In Allerton most common restaurants are: Italian Restaurant, Caribbean Restaurant, Mexican Restaurant, Spanish Restaurant Diner.

## In Port Morris most common restaurants are: Italian Restaurant, Greek Restaurant, American Restaurant, Mexican Restaurant Ramen Restaurant.

# So considering the data above he should open his Mexican Restaurant in Port Morris.