## The best neighborhood to open cafe shop in Brooklyn, Newyork.

In [3]:
! pip install geopy
! pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 9.3MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [4]:
# Importing required libraries.
import pandas as pd
import numpy as np
import requests
import json
from pandas.io.json import json_normalize


import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

from geopy.geocoders import Nominatim

import folium

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

%matplotlib inline

We are going to use data provided by NYU Data repository, which is publicly available. 

In [5]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded')    

with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

neighborhoods_data = newyork_data['features']

# We will convert the json format data to pandas dataframe

column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

neighborhoods.head()

Data downloaded


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [6]:
# filter out the brooklyn neihborhood details
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)

In [7]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.6501038, -73.9495823.


In [35]:
# create map of Manhattan using latitude and longitude values
map_brooklyn = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn)  
    
map_brooklyn

Now we are going to use FourSquare API to get the neighborhood details 

In [9]:
# Credentials
CLIENT_ID = '#' # your Foursquare ID
CLIENT_SECRET = '#' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DWZNP5R3T2YNI3OHENZOAIG40FUKXME5301POZITUQ5M52PC
CLIENT_SECRET:AE22EEPIZJZJEMLICGMLKVKUS2EZMUYJSXDMHWWTMLENGXVO


In [11]:
search_query = 'Coffee Shops'
radius = 40000
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

resp = requests.get(url).json()

venues = resp['response']['venues']
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,delivery.id,delivery.provider.icon.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.name,delivery.url,hasPerk,id,location.address,...,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",2082742.0,/delivery_provider_seamless_20180129.png,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",seamless,https://www.seamless.com/menu/plg-coffee-house...,False,5389f295498e264f18c5d652,499 Rogers Ave,...,"[499 Rogers Ave (Midwood St), Brooklyn, NY 112...","[{'label': 'display', 'lat': 40.66000670620193...",40.660007,-73.953362,,11225,NY,PLG Coffee House and Tavern,v-1593626733,
1,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",,,,,,,False,5b89aecc8c812a002ca7f2af,78 Rockwell Pl,...,"[78 Rockwell Pl, New York, NY 11217, United St...","[{'label': 'display', 'lat': 40.687751, 'lng':...",40.687751,-73.979387,,11217,NY,Coffee Project NY,v-1593626733,
2,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",,,,,,,False,5cf821d8250cab002cfbf4c4,Rogers Av,...,"[Rogers Av (Midwood St), Brooklyn, NY 11225, U...","[{'label': 'display', 'lat': 40.659725, 'lng':...",40.659725,-73.95328,,11225,NY,Coffee Rx,v-1593626733,
3,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",,,,,,,False,59b2c702f96b2c7bd7416992,,...,"[New York, NY 11210, United States]","[{'label': 'display', 'lat': 40.616927, 'lng':...",40.616927,-73.944795,,11210,NY,Coffee Redefined,v-1593626733,
4,"[{'id': '4bf58dd8d48988d147941735', 'name': 'D...",403612.0,/delivery_provider_seamless_20180129.png,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",seamless,https://www.seamless.com/menu/park-side-coffee...,False,4c41a4bbaf052d7f079d7d79,188 Parkside Ave,...,"[188 Parkside Ave (at Ocean Ave.), Brooklyn, N...","[{'label': 'display', 'lat': 40.65508454943389...",40.655085,-73.961502,,11226,NY,Parkside Coffee Shop and Diner,v-1593626733,


In [12]:
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location')]+['id']
filtered_df = dataframe.loc[:,filtered_columns]

def get_category_type(row):
    try: 
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

filtered_df['categories'] = filtered_df.apply(get_category_type, axis=1)
filtered_df.columns = [column.split('.')[-1] for column in filtered_df.columns]
filtered_df.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,PLG Coffee House and Tavern,Café,499 Rogers Ave,US,Brooklyn,United States,Midwood St,1147,"[499 Rogers Ave (Midwood St), Brooklyn, NY 112...","[{'label': 'display', 'lat': 40.66000670620193...",40.660007,-73.953362,,11225,NY,5389f295498e264f18c5d652
1,Coffee Project NY,Coffee Shop,78 Rockwell Pl,US,New York,United States,,4888,"[78 Rockwell Pl, New York, NY 11217, United St...","[{'label': 'display', 'lat': 40.687751, 'lng':...",40.687751,-73.979387,,11217,NY,5b89aecc8c812a002ca7f2af
2,Coffee Rx,Coffee Shop,Rogers Av,US,Brooklyn,United States,Midwood St,1115,"[Rogers Av (Midwood St), Brooklyn, NY 11225, U...","[{'label': 'display', 'lat': 40.659725, 'lng':...",40.659725,-73.95328,,11225,NY,5cf821d8250cab002cfbf4c4
3,Coffee Redefined,Coffee Shop,,US,New York,United States,,3715,"[New York, NY 11210, United States]","[{'label': 'display', 'lat': 40.616927, 'lng':...",40.616927,-73.944795,,11210,NY,59b2c702f96b2c7bd7416992
4,Parkside Coffee Shop and Diner,Diner,188 Parkside Ave,US,Brooklyn,United States,at Ocean Ave.,1149,"[188 Parkside Ave (at Ocean Ave.), Brooklyn, N...","[{'label': 'display', 'lat': 40.65508454943389...",40.655085,-73.961502,,11226,NY,4c41a4bbaf052d7f079d7d79


In [13]:
filtered_df = filtered_df[filtered_df['city']=='Brooklyn'].reset_index(drop=True)
filtered_df.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,PLG Coffee House and Tavern,Café,499 Rogers Ave,US,Brooklyn,United States,Midwood St,1147,"[499 Rogers Ave (Midwood St), Brooklyn, NY 112...","[{'label': 'display', 'lat': 40.66000670620193...",40.660007,-73.953362,,11225,NY,5389f295498e264f18c5d652
1,Coffee Rx,Coffee Shop,Rogers Av,US,Brooklyn,United States,Midwood St,1115,"[Rogers Av (Midwood St), Brooklyn, NY 11225, U...","[{'label': 'display', 'lat': 40.659725, 'lng':...",40.659725,-73.95328,,11225,NY,5cf821d8250cab002cfbf4c4
2,Parkside Coffee Shop and Diner,Diner,188 Parkside Ave,US,Brooklyn,United States,at Ocean Ave.,1149,"[188 Parkside Ave (at Ocean Ave.), Brooklyn, N...","[{'label': 'display', 'lat': 40.65508454943389...",40.655085,-73.961502,,11226,NY,4c41a4bbaf052d7f079d7d79
3,Ciao Bella Coffee,Coffee Shop,284 Clarkson Ave.,US,Brooklyn,United States,Nostrand Ave.,583,"[284 Clarkson Ave. (Nostrand Ave.), Brooklyn, ...","[{'label': 'display', 'lat': 40.65530776977539...",40.655308,-73.950455,,11226,NY,5e665bb9acf4e7000855c560
4,Caoba Restaurant/ Coffee Shop,Cuban Restaurant,762 Snediker Ave,US,Brooklyn,United States,,4431,"[762 Snediker Ave, Brooklyn, NY 11207, United ...","[{'label': 'display', 'lat': 40.65608819435355...",40.656088,-73.897702,East New York,11207,NY,542da088498e60c8dafa5b06


Lets see the coffee shops in the brokklyn map to get more ideas

In [34]:
cafe_map = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(filtered_df.lat, filtered_df.lng, filtered_df.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        popup=label,
        fill = True,
        fill_color='yellow',
        fill_opacity = 1.0
    ).add_to(cafe_map)

cafe_map

In [15]:
from sklearn.preprocessing import StandardScaler

## Lets Cluster the neighbourhood to get more insights

In [16]:
X = filtered_df.values[:,10:12]
X = np.nan_to_num(X)
clust_data = StandardScaler().fit_transform(X)
clust_data



array([[-0.79061557,  0.25946817],
       [-0.80150969,  0.26269084],
       [-0.98096479, -0.06066852],
       [-0.97233243,  0.37380894],
       [-0.94215192,  2.4485711 ],
       [ 0.02618661, -1.4381804 ],
       [-1.13153579,  0.52856048],
       [-1.30668088, -0.42302471],
       [-1.73599838, -2.59987951],
       [ 0.32835771, -1.26570105],
       [-0.97425281, -0.00533332],
       [-0.38094782, -0.72756402],
       [-1.37226844,  0.45155268],
       [ 1.38093968,  0.01687942],
       [ 0.52215726, -0.07753311],
       [ 0.36097244, -0.21324547],
       [ 1.32546177,  0.11470907],
       [ 1.45256006,  0.03088084],
       [ 0.02224999, -0.66966285],
       [-0.56538815,  1.96550257],
       [ 0.12151236, -1.39020711],
       [ 1.64823534,  0.61226415],
       [ 1.34487272,  0.60838543],
       [-0.2820876 , -0.38431241],
       [ 0.50432948, -0.23734699],
       [ 1.03845517,  0.23959905],
       [ 0.89765107,  1.66707907],
       [ 1.26279262, -0.08729233]])

In [17]:
k = 3

kmeans = KMeans(init='k-means++', n_clusters=k, random_state=4)
kmeans.fit(clust_data)
kmeans.labels_

array([2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 0, 0, 0, 0, 0, 1, 2, 1, 0,
       0, 1, 0, 0, 0, 0], dtype=int32)

In [18]:
labels = kmeans.labels_
filtered_df['Cluster labels'] = labels
filtered_df.head()

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id,Cluster labels
0,PLG Coffee House and Tavern,Café,499 Rogers Ave,US,Brooklyn,United States,Midwood St,1147,"[499 Rogers Ave (Midwood St), Brooklyn, NY 112...","[{'label': 'display', 'lat': 40.66000670620193...",40.660007,-73.953362,,11225,NY,5389f295498e264f18c5d652,2
1,Coffee Rx,Coffee Shop,Rogers Av,US,Brooklyn,United States,Midwood St,1115,"[Rogers Av (Midwood St), Brooklyn, NY 11225, U...","[{'label': 'display', 'lat': 40.659725, 'lng':...",40.659725,-73.95328,,11225,NY,5cf821d8250cab002cfbf4c4,2
2,Parkside Coffee Shop and Diner,Diner,188 Parkside Ave,US,Brooklyn,United States,at Ocean Ave.,1149,"[188 Parkside Ave (at Ocean Ave.), Brooklyn, N...","[{'label': 'display', 'lat': 40.65508454943389...",40.655085,-73.961502,,11226,NY,4c41a4bbaf052d7f079d7d79,2
3,Ciao Bella Coffee,Coffee Shop,284 Clarkson Ave.,US,Brooklyn,United States,Nostrand Ave.,583,"[284 Clarkson Ave. (Nostrand Ave.), Brooklyn, ...","[{'label': 'display', 'lat': 40.65530776977539...",40.655308,-73.950455,,11226,NY,5e665bb9acf4e7000855c560,2
4,Caoba Restaurant/ Coffee Shop,Cuban Restaurant,762 Snediker Ave,US,Brooklyn,United States,,4431,"[762 Snediker Ave, Brooklyn, NY 11207, United ...","[{'label': 'display', 'lat': 40.65608819435355...",40.656088,-73.897702,East New York,11207,NY,542da088498e60c8dafa5b06,2


#### Lets see the clustered neighborhood on map

In [33]:
cluster_map = folium.Map(location =[latitude, longitude], zoom_start=11)


colors_array = cm.rainbow(np.linspace(0, 1, k))
rainbow = [colors.rgb2hex(i) for i in colors_array]

for lat, lng, cluster in zip(filtered_df['lat'], filtered_df['lng'], filtered_df['Cluster labels']):
    folium.CircleMarker(
        [lat,lng],
    radius = 5,
    color = rainbow[cluster-1],
    popup=cluster,
    fill= True,
    fill_color=rainbow[cluster-1],
    fill_opacity = 0.7).add_to(cluster_map)

cluster_map

In the above map we can see all coffee shops has been disturbuted in Brokklyn brorugh.

Cluster 0 : williamsburg

Cluster 1 : Caroll Gardens

Cluster 2 : East new york

### Finding the numbers of coffee shops in clusters

In [30]:
filtered_df.groupby('Cluster labels')['Cluster labels'].count()

Cluster labels
0    11
1     7
2    10
Name: Cluster labels, dtype: int64

## Findings

The best option would be to open coffee shop on cluster 1 area (i.e. Caroll gardens) as this area has less numbers of coffee shops. As there is less number of coffee shops in this cluster than others but not that less which states that the customers count is very low. Also less number of coffee shop suggests that it will be less competitive for the shop owner than other clusters. 