## Importing the required libraries

In [46]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported.')

Libraries imported.


## Downloading New York Data

In [None]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

## Data cleansing, munging, wrangling to get the required data

In [2]:
with open ('newyork_data.json') as json_data:
    nydata = json.load(json_data)

In [3]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighbourhoods = pd.DataFrame(columns=column_names)

In [4]:
neighbourhoods_data = nydata['features']

In [5]:
for data in neighbourhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighbourhoods = neighbourhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
neighbourhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [65]:
neighbourhoods['Borough'].unique()

array(['Bronx', 'Manhattan', 'Brooklyn', 'Queens', 'Staten Island'],
      dtype=object)

## Analyzing data of Brooklyn

In [8]:
brooklyn_data = neighbourhoods[neighbourhoods.Borough=='Brooklyn'].reset_index()
brooklyn_data.head()

Unnamed: 0,index,Borough,Neighborhood,Latitude,Longitude
0,46,Brooklyn,Bay Ridge,40.625801,-74.030621
1,47,Brooklyn,Bensonhurst,40.611009,-73.99518
2,48,Brooklyn,Sunset Park,40.645103,-74.010316
3,49,Brooklyn,Greenpoint,40.730201,-73.954241
4,50,Brooklyn,Gravesend,40.59526,-73.973471


## Finding the latitude and longitude values of Brooklyn

In [9]:
address = 'Brooklyn, NY'

geolocator = Nominatim(user_agent="brook_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Brooklyn are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Brooklyn are 40.6501038, -73.9495823.


In [10]:
CLIENT_ID = 'B21RWMG44NNR0PXVDDGJJHHIB42PSDJBNLVHXXBRCDWSX3XP' # your Foursquare ID
CLIENT_SECRET = 'O20XQGZPUV4RVY0WJZWU1I0NJVSWYMD3SBJDIQR11FKIYU2F' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: B21RWMG44NNR0PXVDDGJJHHIB42PSDJBNLVHXXBRCDWSX3XP
CLIENT_SECRET:O20XQGZPUV4RVY0WJZWU1I0NJVSWYMD3SBJDIQR11FKIYU2F


## Foursquare ID, Secret and API version details

In [66]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


## Pulling data of all pubs and bars in Brookyln using Foursquare

In [11]:
n = brooklyn_data["Neighborhood"].count()
brookbarpub = pd.DataFrame()
for i in range(0,n):
    lat = brooklyn_data.loc[i, "Latitude"]
    lng = brooklyn_data.loc[i, "Longitude"]
    radius = 500 
    LIMIT = 100
    search_query = "%bar%pub%"
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()
    sq = results['response']['venues']
    nearby_sq = json_normalize(sq)
    barpub = pd.DataFrame(nearby_sq)
    brookbarpub = brookbarpub.append(barpub)

brookbarpub.count()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


categories                        1111
delivery.id                         66
delivery.provider.icon.name         66
delivery.provider.icon.prefix       66
delivery.provider.icon.sizes        66
delivery.provider.name              66
delivery.url                        66
hasPerk                           1111
id                                1111
location.address                   858
location.cc                       1111
location.city                     1065
location.country                  1111
location.crossStreet               466
location.distance                 1111
location.formattedAddress         1111
location.isFuzzed                    1
location.isServiceAreaBusiness       1
location.labeledLatLngs           1110
location.lat                      1111
location.lng                      1111
location.neighborhood               23
location.postalCode                948
location.state                    1110
name                              1111
referralId               

## Data cleansing to remove all null values in latitude, longitude and names of bars and pubs

In [12]:
brookbarpub = brookbarpub.dropna(subset = ['name','location.lat','location.lng'])

In [13]:
brookbarpub.head()

Unnamed: 0,categories,delivery.id,delivery.provider.icon.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.name,delivery.url,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.isFuzzed,location.isServiceAreaBusiness,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d119941735', 'name': 'H...",,,,,,,False,4eee44112c5bafa619f9a8c2,8305 3rd Ave,US,Brooklyn,United States,83rd St,50,"[8305 3rd Ave (83rd St), Brooklyn, NY 11209, U...",,,"[{'label': 'display', 'lat': 40.62544661432081...",40.625447,-74.030246,,11209.0,NY,RED OAK Restaurant & Bar & Hookah Lounge,v-1553595590,73521084.0
1,"[{'id': '4bf58dd8d48988d157941735', 'name': 'N...",321014.0,/delivery_provider_seamless_20180129.png,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",seamless,https://www.seamless.com/menu/cebu-bistro-8801...,False,4a9f1f91f964a5209b3c20e3,8801 3rd Ave,US,Brooklyn,United States,at 88th St,453,"[8801 3rd Ave (at 88th St), Brooklyn, NY 11209...",,,"[{'label': 'display', 'lat': 40.62181234249646...",40.621812,-74.031727,,11209.0,NY,Cebu' Bar & Bistro,v-1553595590,81894946.0
2,"[{'id': '4bf58dd8d48988d150941735', 'name': 'S...",,,,,,,False,4c2e94a1a0ced13a3f2d0f6e,8305 3rd Ave,US,Brooklyn,United States,83rd Street,54,"[8305 3rd Ave (83rd Street), Brooklyn, NY 1120...",,,"[{'label': 'display', 'lat': 40.62546292253087...",40.625463,-74.030161,,11209.0,NY,Vela Tapas Bar,v-1553595590,
3,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",,,,,,,False,538bdb4d498ea323128e9a06,,US,Brooklyn,United States,,71,"[Brooklyn, NY, United States]",,,"[{'label': 'display', 'lat': 40.6251559, 'lng'...",40.625156,-74.030661,,,NY,Fillie's Restaurant and Bar,v-1553595590,
4,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,4a9880f5f964a520692c20e3,8622 3rd Ave,US,Brooklyn,United States,at 87th St.,336,"[8622 3rd Ave (at 87th St.), Brooklyn, NY 1120...",,,"[{'label': 'display', 'lat': 40.62283909856897...",40.622839,-74.031411,,11209.0,NY,The Kettle Black,v-1553595590,151465375.0


In [14]:
columns = ['Name', 'Latitude', 'Longitude']
brookbarpubdata = pd.DataFrame()
brookbarpubdata[columns] = brookbarpub[['name','location.lat','location.lng']].reset_index(drop = True)
brookbarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude
0,RED OAK Restaurant & Bar & Hookah Lounge,40.625447,-74.030246
1,Cebu' Bar & Bistro,40.621812,-74.031727
2,Vela Tapas Bar,40.625463,-74.030161
3,Fillie's Restaurant and Bar,40.625156,-74.030661
4,The Kettle Black,40.622839,-74.031411


In [50]:
x = brookbarpubdata[['Latitude','Longitude']]

## Fitting and Transforming the latitudes and longitudes of Brooklyn bars and pubs

In [51]:
from sklearn.preprocessing import StandardScaler

cluster_dataset = StandardScaler().fit_transform(x)
cluster_dataset

array([[-1.2871393 , -1.98162568],
       [-1.38806769, -2.02554602],
       [-1.2866864 , -1.97910753],
       ...,
       [-0.56990743,  0.54176737],
       [-0.79111035,  0.58353967],
       [-0.83540907,  0.44080706]])

## Creating cluster based on the number of available Mobile Police Units

In [61]:
available_police_units = 10
num_clusters = available_police_units
k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=15)
k_means.fit(cluster_dataset)
labels = k_means.labels_

print(labels)

[5 5 5 ... 1 1 1]


In [54]:
brookbarpubdata["Labels"] = labels

## Adding the corresponding cluster number to each bar/pub

In [55]:
brookbarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude,Labels
0,RED OAK Restaurant & Bar & Hookah Lounge,40.625447,-74.030246,5
1,Cebu' Bar & Bistro,40.621812,-74.031727,5
2,Vela Tapas Bar,40.625463,-74.030161,5
3,Fillie's Restaurant and Bar,40.625156,-74.030661,5
4,The Kettle Black,40.622839,-74.031411,5


## Finding the centroids of the clusters - Latitude and Longitude values of each cluster where the Mobile Police Units needs to be deployed

In [56]:
brookbarpubcentroid = brookbarpubdata.groupby('Labels').mean()
brookbarpubcentroid.reset_index(level=0, inplace=True)
brookbarpubcentroid

Unnamed: 0,Labels,Latitude,Longitude
0,0,40.690781,-73.991637
1,1,40.664033,-73.928039
2,2,40.59416,-73.963081
3,3,40.714973,-73.955746
4,4,40.64761,-73.958024
5,5,40.626383,-74.015548
6,6,40.672689,-73.881872
7,7,40.697873,-73.93212
8,8,40.619129,-73.918273
9,9,40.682268,-73.973185


## Visualizing the points where Mobile Police units should be deployed (Black markers inside each different colored clusters) along with the bars and pubs in each cluster

In [64]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(brookbarpubdata['Latitude'], brookbarpubdata['Longitude'], brookbarpubdata['Name'], brookbarpubdata['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)

# add markers to the map
markers_colors = ['black']
for lat, lon, cluster in zip(brookbarpubcentroid['Latitude'], brookbarpubcentroid['Longitude'], brookbarpubcentroid['Labels']):
    label = folium.Popup(str(lat) + ' , ' + str(lon), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=markers_colors,
        fill=True,
        fill_color=markers_colors,
        fill_opacity=1).add_to(map_clusters)

map_clusters