In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
import json 
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [5]:
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [6]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


In [7]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [8]:
neighborhoods_data = newyork_data['features']

In [9]:
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [10]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [11]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [12]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [13]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [14]:
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [15]:
brooklyn_data = neighborhoods[neighborhoods['Borough'] == 'Brooklyn'].reset_index(drop=True)
brooklyn_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Brooklyn,Bay Ridge,40.625801,-74.030621
1,Brooklyn,Bensonhurst,40.611009,-73.99518
2,Brooklyn,Sunset Park,40.645103,-74.010316
3,Brooklyn,Greenpoint,40.730201,-73.954241
4,Brooklyn,Gravesend,40.59526,-73.973471


In [16]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brooklyn are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brooklyn are 40.6501038, -73.9495823.


In [17]:
# create map of Manhattan using latitude and longitude values
map_brooklyn = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(brooklyn_data['Latitude'], brooklyn_data['Longitude'], brooklyn_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_brooklyn)  
    
map_brooklyn

In [18]:
CLIENT_ID = '2VFI3ZTFWLKJPSV1DUCE4XBR5IO4KVXVVXKPFKYKEPY5LJUW' # your Foursquare ID
CLIENT_SECRET = 'Z4GWZ4F3HHFBYIZBH0SNI2SJ2HQEU4AADE4G0YWTFSPXYYQZ' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2VFI3ZTFWLKJPSV1DUCE4XBR5IO4KVXVVXKPFKYKEPY5LJUW
CLIENT_SECRET:Z4GWZ4F3HHFBYIZBH0SNI2SJ2HQEU4AADE4G0YWTFSPXYYQZ


In [19]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=50, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']
            #results = requests.get(url).json()["response"]['venues']
            
            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
         pass

    return(nearby_venues)

In [20]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

brooklyn_venues_food = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude'],categoryIds='4d4b7105d754a06374d81259')

In [21]:
print(brooklyn_venues_food.shape)
brooklyn_venues_food.head()

(210, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bay Ridge,40.625801,-74.030621,RED OAK Restaurant & Bar & Hookah Lounge,40.625447,-74.030246,Hookah Bar
1,Bay Ridge,40.625801,-74.030621,Georgian Dream Cafe and Bakery,40.625586,-74.030196,Caucasian Restaurant
2,Bay Ridge,40.625801,-74.030621,Spartan Souvlaki,40.625511,-74.030202,Greek Restaurant
3,Bay Ridge,40.625801,-74.030621,Blue Door,40.625208,-74.030353,Greek Restaurant
4,Bay Ridge,40.625801,-74.030621,Vela Tapas Bar,40.625463,-74.030161,Spanish Restaurant


In [22]:
brooklyn_venues_food.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bath Beach,4,4,4,4,4,4
Bay Ridge,7,7,7,7,7,7
Brighton Beach,5,5,5,5,5,5
Brooklyn Heights,1,1,1,1,1,1
Brownsville,1,1,1,1,1,1
Bushwick,7,7,7,7,7,7
Carroll Gardens,11,11,11,11,11,11
City Line,3,3,3,3,3,3
Clinton Hill,11,11,11,11,11,11
Cobble Hill,4,4,4,4,4,4


In [23]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [24]:
map_brooklyn_food = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(brooklyn_venues_food, 'red', map_brooklyn_food)
map_brooklyn_food

In [25]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

brooklyn_venues_shop = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude'],categoryIds='4d4b7105d754a06378d81259')

In [26]:
brooklyn_venues_shop.shape

(294, 7)

In [27]:
map_brooklyn_shop = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(brooklyn_venues_shop, 'red', map_brooklyn_shop)
map_brooklyn_shop

In [28]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 50 # define radius

brooklyn_venues_col = getNearbyVenues(names=brooklyn_data['Neighborhood'],
                                   latitudes=brooklyn_data['Latitude'],
                                   longitudes=brooklyn_data['Longitude'],categoryIds='4d4b7105d754a06372d81259')

In [29]:
brooklyn_venues_col.shape

(9, 7)

In [30]:
map_brooklyn_col = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(brooklyn_venues_col, 'red', map_brooklyn_col)
map_brooklyn_col

In [31]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [32]:
brook_data = brooklyn_data.copy()

addColumn(brook_data, 'Food Joints', brooklyn_venues_food)
addColumn(brook_data, 'Shops & Services', brooklyn_venues_shop)
addColumn(brook_data, 'Office', brooklyn_venues_col)
brook_data

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Food Joints,Shops & Services,Office
0,Brooklyn,Bay Ridge,40.625801,-74.030621,7.0,10.0,0.0
1,Brooklyn,Bensonhurst,40.611009,-73.99518,0.0,0.0,0.0
2,Brooklyn,Sunset Park,40.645103,-74.010316,6.0,12.0,0.0
3,Brooklyn,Greenpoint,40.730201,-73.954241,15.0,25.0,0.0
4,Brooklyn,Gravesend,40.59526,-73.973471,0.0,1.0,0.0
5,Brooklyn,Brighton Beach,40.576825,-73.965094,5.0,15.0,0.0
6,Brooklyn,Sheepshead Bay,40.58689,-73.943186,0.0,0.0,0.0
7,Brooklyn,Manhattan Terrace,40.614433,-73.957438,0.0,1.0,0.0
8,Brooklyn,Flatbush,40.636326,-73.958401,0.0,0.0,0.0
9,Brooklyn,Crown Heights,40.670829,-73.943291,0.0,2.0,0.0


In [35]:
# negative weight, because Abeledata Restaurant and thus wants to avoid concurrence as much as possible
weight_food = -1

# positive weight, because shop owners and workers are good customers
weight_shop = 1

# positive weight because employees are even better customers
weight_office = 2

In [34]:
brook_weight = brook_data[['Neighborhood']].copy()

In [36]:
brook_weight['Score'] = brook_data['Food Joints'] * weight_food + brook_data['Shops & Services'] * weight_shop + brook_data['Office'] * weight_office
brook_weight = brook_weight.sort_values(by=['Score'], ascending=False)
brook_weight

Unnamed: 0,Neighborhood,Score
24,Park Slope,18.0
3,Greenpoint,10.0
5,Brighton Beach,10.0
44,City Line,8.0
37,Marine Park,8.0
17,Bedford Stuyvesant,7.0
66,Homecrest,6.0
10,East Flatbush,6.0
51,South Side,6.0
2,Sunset Park,6.0


In [37]:
map_brook_result = folium.Map(location=[latitude, longitude], zoom_start=14)

brook_winner = brooklyn_data[brooklyn_data['Neighborhood'] == 'Midtown']

for lat, lng, local in zip( brook_winner['Latitude'], brook_winner['Longitude'], brook_winner['Neighborhood']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_brook_result) 

addToMap(brooklyn_venues_food[brooklyn_venues_food['Neighborhood'] == 'Midtown'], 'red', map_brook_result)
addToMap(brooklyn_venues_shop[brooklyn_venues_shop['Neighborhood'] == 'Midtown'], 'green', map_brook_result)
addToMap(brooklyn_venues_col[brooklyn_venues_col['Neighborhood'] == 'Midtown'], 'gold', map_brook_result)

map_brook_result