### Import Libraries and Get Location Data

In [1]:
#Required libraries FOR:

#data processing
import numpy as np
import pandas as pd
import json
from os import path

#web scraping
import urllib.request
from bs4 import BeautifulSoup

#plotting maps
#!conda install -c conda-forge folium --yes
#I have Ubuntu on my laptop so 
#!pip3 install folium
import folium

#retrieving latitude-longitude of Neighbors
from geopy.geocoders import Nominatim

#clustering
#!pip3 install --upgrade scipy
#!pip3 install --upgrade scikit-learn
from sklearn.cluster import KMeans

#getting data and conversion from JSON
import requests
from pandas.io.json import json_normalize


In [2]:
if not path.exists('newyork_data.json'):
    !wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
else:
    print("Data is already downloaded!")

Data is already downloaded!


In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [4]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods_data = newyork_data['features']

In [5]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
queen_neigh = neighborhoods[neighborhoods['Borough']=='Queens'].reset_index(drop=True)
brook_neigh = neighborhoods[neighborhoods['Borough']=='Brooklyn'].reset_index(drop=True)

#bronx_neigh.head(10)

### Plot regions of Interest

In [7]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [8]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers for queens to map (blue)
for lat, lng, borough, neighborhood in zip(queen_neigh['Latitude'], queen_neigh['Longitude'], queen_neigh['Borough'], queen_neigh['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  

# add markers for brooklyn to map (red)
for lat, lng, borough, neighborhood in zip(brook_neigh['Latitude'], brook_neigh['Longitude'], brook_neigh['Borough'], brook_neigh['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#cc3139',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  

map_newyork

### Get Venues Data from Foursquare

In [9]:
CLIENT_ID = 'RYCX141XUBTJKRVJMLA51KZVARP4KANFZDUVGY4SNPBT0JV0' # your Foursquare ID
CLIENT_SECRET = 'BRS3PNN0IUEIUAJ4LYDWJ0EWFVVA22FKZMCUHC4331QBADIH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

In [10]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    LIMIT=200
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        fname = '%s' % name
        if not path.exists('Data/'+fname.replace(" ","")+'.json'):
            results = requests.get(url).json()["response"]['groups'][0]['items']
            with open('Data/'+fname.replace(" ","")+'.json', 'w') as f:
                json.dump(results, f)
        else:
            #print('File {} exists!'.format('Data/'+fname.replace(" ","")+'.json'))
            with open('Data/'+fname.replace(" ","")+'.json') as f:
              results = json.load(f)
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name'],
            v['venue']['categories'][0]['icon']['prefix']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category',
                  'prefix']
    
    return(nearby_venues)

In [11]:
queen_venues = getNearbyVenues(names=queen_neigh['Neighborhood'],
                                   latitudes=queen_neigh['Latitude'],
                                   longitudes=queen_neigh['Longitude']
                                  )
brook_venues = getNearbyVenues(names=brook_neigh['Neighborhood'],
                                   latitudes=brook_neigh['Latitude'],
                                   longitudes=brook_neigh['Longitude']
                                  )

In [12]:
#Pick only venues categorized under "Food"
brook_food_venues = pd.DataFrame(columns=list(brook_venues))[:-1]
brook_venues.shape[0]
for i in range(brook_venues.shape[0]):
    if not brook_venues.iloc[i]['prefix'].find('/food/')==-1:
        brook_food_venues=brook_food_venues.append(brook_venues.iloc[i])
brook_food_venues = brook_food_venues.drop(['prefix'],axis=1).reset_index(drop=True)

queen_food_venues = pd.DataFrame(columns=list(queen_venues))[:-1]
queen_venues.shape[0]
for i in range(queen_venues.shape[0]):
    if not queen_venues.iloc[i]['prefix'].find('/food/')==-1:
        queen_food_venues=queen_food_venues.append(queen_venues.iloc[i])
queen_food_venues = queen_food_venues.drop(['prefix'],axis=1).reset_index(drop=True)    

In [13]:
#Split "Food" venues into Restaurants and Otherfood
brook_restaurants = pd.DataFrame(columns=list(brook_food_venues))
brook_otherfood = pd.DataFrame(columns=list(brook_food_venues))
for i in range(brook_food_venues.shape[0]):
    if not brook_food_venues.iloc[i]['Venue Category'].find('Restaurant')==-1:
        brook_restaurants=brook_restaurants.append(brook_food_venues.iloc[i])
    else:
        brook_otherfood=brook_otherfood.append(brook_food_venues.iloc[i])
brook_restaurants = brook_restaurants.reset_index(drop=True)
brook_otherfood = brook_otherfood.reset_index(drop=True)

queen_restaurants = pd.DataFrame(columns=list(queen_food_venues))
queen_otherfood = pd.DataFrame(columns=list(queen_food_venues))
for i in range(queen_food_venues.shape[0]):
    if not queen_food_venues.iloc[i]['Venue Category'].find('Restaurant')==-1:
        queen_restaurants=queen_restaurants.append(queen_food_venues.iloc[i])
    else:
        queen_otherfood=queen_otherfood.append(queen_food_venues.iloc[i])
queen_restaurants = queen_restaurants.reset_index(drop=True)
queen_otherfood = queen_otherfood.reset_index(drop=True)

In [14]:
queen_otherfood['Venue Category'].unique()
#queen_restaurants['Venue Category'].unique()

array(['Dessert Shop', 'Bakery', 'Ice Cream Shop', 'Salad Place',
       'Bagel Shop', 'Pizza Place', 'Bubble Tea Shop', 'Food Truck',
       'Food', 'BBQ Joint', 'Souvlaki Shop', 'Juice Bar', 'Poke Place',
       'Deli / Bodega', 'Sandwich Place', 'Coffee Shop', 'Burger Joint',
       'Café', 'Diner', 'Donut Shop', 'Tea Room', 'Steakhouse',
       'Breakfast Spot', 'Snack Place', 'Fried Chicken Joint', 'Brewery',
       'Taco Place', 'Noodle House', 'Buffet', 'Wine Bar', 'Bistro',
       'Gastropub', 'Wings Joint', 'Frozen Yogurt Shop', 'Burrito Place',
       'Hot Dog Joint', 'Fish & Chips Shop', 'Dosa Place'], dtype=object)