## Importing the required libraries

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported.')

Libraries imported.


## Downloading New York Data

In [205]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


## Data cleansing, munging, wrangling to get the required data

In [3]:
with open ('newyork_data.json') as json_data:
    nydata = json.load(json_data)

In [4]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
newyork = pd.DataFrame(columns=column_names)

In [5]:
neighbourhoods_data = nydata['features']

In [6]:
for data in neighbourhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    newyork = newyork.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [7]:
newyork.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


## Finding the latitude and longitude values of New York

In [8]:
address = 'New York, NY'

geolocator = Nominatim(user_agent="brook_explorer")
location = geolocator.geocode(address)
nylatitude = location.latitude
nylongitude = location.longitude
print('The geograpical coordinate of New York are {}, {}.'.format(nylatitude, nylongitude))

The geograpical coordinate of New York are 40.7308619, -73.9871558.


## Foursquare ID, Secret and API version details

In [75]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 
CLIENT_SECRET:


## Pulling data of all pubs and bars in New York using Foursquare, then cleansing, munging and wrangling to get the required data

In [10]:
n = newyork["Neighborhood"].count()
nybarpub = pd.DataFrame()
for i in range(0,n):
    lat = newyork.loc[i, "Latitude"]
    lng = newyork.loc[i, "Longitude"]
    radius = 500 
    LIMIT = 100
    search_query = "%bar%pub%"
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, search_query, radius, LIMIT)
    results = requests.get(url).json()
    n = (pd.DataFrame(results['response'])).count()['venues']
    jq = pd.DataFrame()
    for k in range(0,n):
        if ((results['response']['venues'][k]['categories']) != []):
            if ((results['response']['venues'][k]['categories'][0]['shortName']) in ('Bar','Pub','Nightclub')):
                nq = pd.DataFrame(json_normalize((results['response']['venues'][k])))
                jq = jq.append(nq, sort=True)
    nybarpub = nybarpub.append(jq, sort = True)
nybarpub = nybarpub.reset_index(drop=True)
nybarpub['name'].count()

613

In [18]:
nybarpub.head()

Unnamed: 0,categories,delivery.id,delivery.provider.icon.name,delivery.provider.icon.prefix,delivery.provider.icon.sizes,delivery.provider.name,delivery.url,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,55e70f81498ee49baecd289e,"900 Baychester Avenue, 2nd Floor",US,Bronx,United States,,399,"[900 Baychester Avenue, 2nd Floor, Bronx, NY 1...","[{'label': 'display', 'lat': 40.8769761, 'lng'...",40.876976,-73.833091,,10475.0,NY,The City Lounge,v-1553780437,
1,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",,,,,,,False,56eb2d73498e0ec37f0c72ca,,US,Bronx,United States,,222,"[Bronx, NY, United States]","[{'label': 'display', 'lat': 40.88347546287676...",40.883475,-73.901648,,,NY,madden's Pub,v-1553780439,
2,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",,,,,,,False,571ff63c498ef61109f28cf3,5757 Broadway,US,Bronx,United States,236th,202,"[5757 Broadway (236th), Bronx, NY 10463, Unite...","[{'label': 'display', 'lat': 40.883327, 'lng':...",40.883327,-73.901777,,10463.0,NY,Madden's Uptown Bar & Grill,v-1553780439,
3,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,4e991dde6da1ff6172130a82,,US,,United States,,431,"[New York, United States]","[{'label': 'display', 'lat': 40.88483810424805...",40.884838,-73.899841,,,New York,Jc Mac Irish Pub,v-1553780439,
4,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",,,,,,,False,5138d604e4b08895fe5a13df,171 W 231st St,US,Bronx,United States,Albany,357,"[171 W 231st St (Albany), Bronx, NY 10463, Uni...","[{'label': 'display', 'lat': 40.8785, 'lng': -...",40.8785,-73.903331,,10463.0,NY,Harpurs Bar,v-1553780439,


## Data cleansing to remove all null values in latitude, longitude and names of bars and pubs

In [19]:
nybarpub = nybarpub.dropna(subset = ['name','location.lat','location.lng'])
nybarpub = nybarpub.drop_duplicates(subset = ['name','location.lat','location.lng'], keep='first')
nybarpub['name'].count()

540

In [20]:
columns = ['Name', 'Latitude', 'Longitude']
nybarpubdata = pd.DataFrame()
nybarpubdata[columns] = nybarpub[['name','location.lat','location.lng']].reset_index(drop = True)
nybarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude
0,The City Lounge,40.876976,-73.833091
1,madden's Pub,40.883475,-73.901648
2,Madden's Uptown Bar & Grill,40.883327,-73.901777
3,Jc Mac Irish Pub,40.884838,-73.899841
4,Harpurs Bar,40.8785,-73.903331


In [21]:
nybarpubdata['Name'].count()

540

In [22]:
x = nybarpubdata[['Latitude','Longitude']]

## Fitting and Transforming the latitudes and longitudes of New York bars and pubs

In [23]:
from sklearn.preprocessing import StandardScaler

cluster_dataset = StandardScaler().fit_transform(x)
cluster_dataset

array([[ 2.11896176,  1.51722994],
       [ 2.21446789,  0.59919782],
       [ 2.21228628,  0.5974671 ],
       ...,
       [ 0.40623583, -0.67384526],
       [ 0.28945639, -0.75374645],
       [ 0.35561911, -0.63421698]])

## Creating cluster based on the number of Police Outposts planned

In [69]:
number_of_police_outposts = 25
num_clusters = number_of_police_outposts
k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=100, algorithm='elkan',random_state=5)
k_means.fit(cluster_dataset)
labels = k_means.labels_

print(labels)

[17  4  4  4  4  4  4  4  4  4 17 17 17 17 17 17 17 17  4  4  4  4  4  4
  4 18 18 18 20 20 20 20 20 20 20 20 20 20 20  4  4 20 20 20 20 20 20 20
 17  9  9  9  9 12 12 12 12 12 12 12 19 19 19 19 19 19 19 15 15 24 24 24
 24 24 24 24 12 12 12 12 12 12 15 24 24 24 24 24 24 24 24 24 24 24 24 24
 24 24 24 24 24 24 24 24 24 24 24 24 24 21 15 19 19  3  3  3  3  3 12 24
 24 24 24 24 15 15 15 15  3 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
 12 12 12  9 16  8  8  8 18 18  4 18 18 18 18 18 18 18 18 18 22 22 22 22
 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22 22  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
 16 16  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
  8  8 16 16 16 16 16 16 16  8 16 16 16 16 16 16 16 16 16 16 16 16 16 16
 22 22 22 22 22 22  0  8  0  8  8  8  8  8  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2 14 14 14 14 14 14 14  6  6

## Adding the corresponding cluster number to each bar/pub

In [70]:
nybarpubdata["Labels"] = labels

In [71]:
nybarpubdata.head()

Unnamed: 0,Name,Latitude,Longitude,Labels,ClusterLabels
0,The City Lounge,40.876976,-73.833091,17,9
1,madden's Pub,40.883475,-73.901648,4,18
2,Madden's Uptown Bar & Grill,40.883327,-73.901777,4,18
3,Jc Mac Irish Pub,40.884838,-73.899841,4,18
4,Harpurs Bar,40.8785,-73.903331,4,18


## Finding the centroids of the clusters - Latitude and Longitude values of each cluster where the Mobile Police Outposts needs to be deployed

In [72]:
nybarpubcentroid= nybarpubdata.groupby('Labels').mean()
nybarpubcentroid.reset_index(level=0, inplace=True)
nybarpubcentroid.head()

Unnamed: 0,Labels,Latitude,Longitude,ClusterLabels
0,0,40.754495,-73.982594,35.594595
1,1,40.76698,-73.832559,22.0
2,2,40.707617,-74.006486,10.1
3,3,40.597159,-73.92109,5.0
4,4,40.868771,-73.902067,21.652174


In [73]:
nybarpubcentroid['OutpostNumber'] = nybarpubcentroid['Labels']
nybarpubcentroid.head()

Unnamed: 0,Labels,Latitude,Longitude,ClusterLabels,OutpostNumber
0,0,40.754495,-73.982594,35.594595,0
1,1,40.76698,-73.832559,22.0,1
2,2,40.707617,-74.006486,10.1,2
3,3,40.597159,-73.92109,5.0,3
4,4,40.868771,-73.902067,21.652174,4


## Visualizing the points where Mobile Police units should be deployed (info-sign baloon) along with the bars and pubs in each cluster

In [74]:
# create map
map_clusters = folium.Map(location=[nylatitude, nylongitude], zoom_start=10.5)

# set color scheme for the clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nybarpubdata['Latitude'], nybarpubdata['Longitude'], nybarpubdata['Name'], nybarpubdata['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)

# add markers to the map
markers_colors = []
for lat, lon, cluster in zip(nybarpubcentroid['Latitude'], nybarpubcentroid['Longitude'], nybarpubcentroid['OutpostNumber']):
    label = folium.Popup('Outpost Number ' + str(cluster) + ' : (' + str(lat) + ' , ' + str(lon) + ')', parse_html=True)
    folium.Marker(
        [lat, lon],
        popup=label,
        icon=folium.Icon(color='red',icon='info-sign')
        ).add_to(map_clusters)

map_clusters

## Viewing the pubs/bars/nightclubs under Outpost 1

In [58]:
outpostdata= pd.merge(nybarpubcentroid, nybarpubdata, left_on = 'Labels', right_on = 'Labels' ).drop(['Latitude_x','Longitude_x', 'Labels'], axis = 1)
outpostdata[outpostdata['OutpostNumber']==1]

Unnamed: 0,OutpostNumber,Name,Latitude_y,Longitude_y
13,1,Tribeca Bar,40.719916,-74.006248
14,1,Bar at The Charles,40.718711,-74.002754
15,1,O'Hara's Restaurant & Pub,40.709894,-74.012836
16,1,Le Bar,40.713022,-74.016087
17,1,Mermaid Bar,40.712264,-74.016944
18,1,Jack's restaurant & bar,40.709504,-74.01374
19,1,1834 Bar & Burger,40.707807,-74.015992
20,1,Trinity Place,40.709412,-74.011144
21,1,Cat Bar,40.708068,-74.013514
22,1,The Irish American Pub,40.709798,-74.008907
