## Importing the required libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported.')

Libraries imported.


## Downloading New York Data

In [205]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


## Data cleansing, munging, wrangling to get the required data

In [2]:
with open ('newyork_data.json') as json_data:
    nydata = json.load(json_data)

In [3]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
newyork = pd.DataFrame(columns=column_names)

In [4]:
neighbourhoods_data = nydata['features']

In [5]:
for data in neighbourhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    newyork = newyork.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
newyork.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


## Finding the latitude and longitude values of New York

In [7]:
address = 'New York, NY'

geolocator = Nominatim(user_agent="brook_explorer")
location = geolocator.geocode(address)
nylatitude = location.latitude
nylongitude = location.longitude
print('The geograpical coordinate of New York are {}, {}.'.format(nylatitude, nylongitude))

The geograpical coordinate of New York are 40.7308619, -73.9871558.


## Foursquare ID, Secret and API version details

In [1]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


## Pulling data of all pubs and bars in New York using Foursquare, then cleansing, munging and wrangling to get the required data

In [9]:
n = newyork["Neighborhood"].count()
nybarpub = pd.DataFrame()
for i in range(0,n):
    lat = newyork.loc[i, "Latitude"]
    lng = newyork.loc[i, "Longitude"]
    radius = 500 
    LIMIT = 100
    search_query = "%bar%pub%"
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()
    n = (pd.DataFrame(results['response'])).count()['venues']
    jq = pd.DataFrame()
    for k in range(0,n):
        if ((results['response']['venues'][k]['categories']) != []):
            if ((results['response']['venues'][k]['categories'][0]['shortName']) in ('Bar','Pub','Nightclub')):
                nq = pd.DataFrame(json_normalize((results['response']['venues'][k])))
                jq = jq.append(nq, sort=True)
    nybarpub = nybarpub.append(jq, sort = True)
nybarpub = nybarpub.reset_index(drop=True)
nybarpub['name'].count()

647

In [10]:
nybarpub.head()

Unnamed: 0,categories,delivery.id,delivery.provider.icon.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.name,delivery.url,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,55e70f81498ee49baecd289e,"900 Baychester Avenue, 2nd Floor",US,Bronx,United States,,399,"[900 Baychester Avenue, 2nd Floor, Bronx, NY 1...","[{'label': 'display', 'lat': 40.8769761, 'lng'...",40.876976,-73.833091,,10475.0,NY,The City Lounge,v-1553859756,
1,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",,,,,,,False,56eb2d73498e0ec37f0c72ca,,US,Bronx,United States,,222,"[Bronx, NY, United States]","[{'label': 'display', 'lat': 40.88347546287676...",40.883475,-73.901648,,,NY,madden's Pub,v-1553859756,
2,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",,,,,,,False,571ff63c498ef61109f28cf3,5757 Broadway,US,Bronx,United States,236th,202,"[5757 Broadway (236th), Bronx, NY 10463, Unite...","[{'label': 'display', 'lat': 40.883327, 'lng':...",40.883327,-73.901777,,10463.0,NY,Madden's Uptown Bar & Grill,v-1553859756,
3,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,4e991dde6da1ff6172130a82,,US,,United States,,431,"[New York, United States]","[{'label': 'display', 'lat': 40.88483810424805...",40.884838,-73.899841,,,New York,Jc Mac Irish Pub,v-1553859756,
4,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,5138d604e4b08895fe5a13df,171 W 231st St,US,Bronx,United States,Albany,357,"[171 W 231st St (Albany), Bronx, NY 10463, Uni...","[{'label': 'display', 'lat': 40.8785, 'lng': -...",40.8785,-73.903331,,10463.0,NY,Harpurs Bar,v-1553859756,


## Data cleansing to remove all null values in latitude, longitude and names of bars and pubs

In [11]:
nybarpub = nybarpub.dropna(subset = ['name','location.lat','location.lng'])
nybarpub = nybarpub.drop_duplicates(subset = ['name','location.lat','location.lng'], keep='first')
nybarpub['name'].count()

564

In [12]:
columns = ['Name', 'Latitude', 'Longitude']
nybarpubdata = pd.DataFrame()
nybarpubdata[columns] = nybarpub[['name','location.lat','location.lng']].reset_index(drop = True)
nybarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude
0,The City Lounge,40.876976,-73.833091
1,madden's Pub,40.883475,-73.901648
2,Madden's Uptown Bar & Grill,40.883327,-73.901777
3,Jc Mac Irish Pub,40.884838,-73.899841
4,Harpurs Bar,40.8785,-73.903331


In [13]:
nybarpubdata['Name'].count()

564

In [14]:
x = nybarpubdata[['Latitude','Longitude']]

## Fitting and Transforming the latitudes and longitudes of New York bars and pubs

In [15]:
from sklearn.preprocessing import StandardScaler

cluster_dataset = StandardScaler().fit_transform(x)
cluster_dataset

array([[ 2.15587688,  1.56248322],
       [ 2.25319379,  0.63164427],
       [ 2.25097081,  0.62988942],
       ...,
       [ 0.31987863, -0.68634825],
       [ 0.41067805, -0.65915813],
       [ 0.2916845 , -0.74017397]])

## Creating cluster based on the number of Police Outposts planned

In [16]:
number_of_police_outposts = 25
num_clusters = number_of_police_outposts
k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=100, algorithm='elkan',random_state=5)
k_means.fit(cluster_dataset)
labels = k_means.labels_

print(labels)

[24 15 15 15 15 15 15 15 15 15 24 24 24 24 15 24 24 24 15 15 15 21 21 21
 15 21 21 21  5  5  5  5  5  5  5  5  5  5  5 15 15  5  5  5  5  5  5  5
 24  1  1  1  1  9  9  9  9  9  9  9  4  4  4  4  4  4  4 16 16 10 10 10
 10 10 10 10  9  9  9  9  9  9  9 10 10 10 10 10 10 10 10 10 10 10 10 10
 10 10 10 10 10 10 10 10 10 10 10 10 10 23 16  4  4  4  4  4  4  4  9 10
 10 10 10 10 16 16 16 16  4  9  9  9  9  9  9  9  9  9  9  9  9  9  9  9
  9  9  9  9  1  3  3  3  3  3 21 21 15 14 14 14 14 14 14 14 14 14  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 12 12  0
 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 22 12 22 12
 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22  3  3 12  3 12 12 12
 12 12  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3 17 17  3  3  3  3  3 17 17
  3  3  3  3 17  3  3  3  3  3  3  3  3  3  0 14 14 14 14 14 14 22  3 22
  3  3 22  3 22  3  3 17 17 17 17 17 17 17 17 17 17

## Adding the corresponding cluster number to each bar/pub

In [17]:
nybarpubdata["Labels"] = labels

In [18]:
nybarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude,Labels
0,The City Lounge,40.876976,-73.833091,24
1,madden's Pub,40.883475,-73.901648,15
2,Madden's Uptown Bar & Grill,40.883327,-73.901777,15
3,Jc Mac Irish Pub,40.884838,-73.899841,15
4,Harpurs Bar,40.8785,-73.903331,15


## Finding the centroids of the clusters - Latitude and Longitude values of each cluster where the Mobile Police Outposts needs to be deployed

In [19]:
nybarpubcentroid= nybarpubdata.groupby('Labels').mean()
nybarpubcentroid.reset_index(level=0, inplace=True)
nybarpubcentroid.head()

Unnamed: 0,Labels,Latitude,Longitude
0,0,40.779944,-73.958253
1,1,40.623455,-74.053082
2,2,40.759836,-73.7699
3,3,40.727979,-73.992308
4,4,40.595582,-73.944107


In [20]:
nybarpubcentroid['OutpostNumber'] = nybarpubcentroid['Labels']
nybarpubcentroid.head()

Unnamed: 0,Labels,Latitude,Longitude,OutpostNumber
0,0,40.779944,-73.958253,0
1,1,40.623455,-74.053082,1
2,2,40.759836,-73.7699,2
3,3,40.727979,-73.992308,3
4,4,40.595582,-73.944107,4


## Visualizing the points where Mobile Police units should be deployed (info-sign baloon) along with the bars and pubs in each cluster

In [21]:
# create map
map_clusters = folium.Map(location=[nylatitude, nylongitude], zoom_start=10.5)

# set color scheme for the clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nybarpubdata['Latitude'], nybarpubdata['Longitude'], nybarpubdata['Name'], nybarpubdata['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(nybarpubcentroid['Latitude'], nybarpubcentroid['Longitude'], nybarpubcentroid['OutpostNumber']):
    label = folium.Popup('Outpost Number ' + str(cluster) + ' : (' + str(lat) + ' , ' + str(lon) + ')', parse_html=True)
    folium.Marker(
        [lat, lon],
        popup=label,
        icon=folium.Icon(color='red',icon='info-sign')
        ).add_to(map_clusters)

map_clusters

## Viewing the pubs/bars/nightclubs under Outpost 1

In [30]:
outpostdata= pd.merge(nybarpubcentroid, nybarpubdata, left_on = 'Labels', right_on = 'Labels' ).drop(['Latitude_x','Longitude_x', 'Labels'], axis = 1)
outpostdata = outpostdata.append(nybarpubcentroid, sort = False)
outpostdata[outpostdata['OutpostNumber']==1]

Unnamed: 0,OutpostNumber,Name_x,Name_y,Latitude_y,Longitude_y,Labels,Latitude,Longitude,Name
26,1,1.0,The Kettle Black,40.622839,-74.031411,,,,
27,1,1.0,Harp Bar Brooklyn,40.629789,-74.028533,,,,
28,1,1.0,O'Sullivans,40.621333,-74.03195,,,,
29,1,1.0,RJ's bar bay ridge,40.62232,-74.02801,,,,
30,1,1.0,The Wicked Monk,40.617129,-74.033849,,,,
31,1,1.0,ZZ's Bar & Pizza,40.643961,-74.072661,,,,
32,1,1.0,Steiny's Pub,40.642185,-74.076599,,,,
33,1,1.0,Vinum Wine Bar & Cafe,40.624853,-74.07489,,,,
34,1,1.0,Dock Street Bar & Grill,40.625225,-74.074719,,,,
35,1,1.0,Saloons Bar/lounge,40.613902,-74.064938,,,,
