### Import Libraries and Get Location Data

In [12]:
#Required libraries FOR:

#data processing
import numpy as np
import pandas as pd
import json
from os import path

#web scraping
import urllib.request
from bs4 import BeautifulSoup

#plotting maps
#!conda install -c conda-forge folium --yes
#I have Ubuntu on my laptop so 
#!pip3 install folium
import folium

#retrieving latitude-longitude of Neighbors
from geopy.geocoders import Nominatim

#clustering
#!pip3 install --upgrade scipy
#!pip3 install --upgrade scikit-learn
from sklearn.cluster import KMeans

#getting data and conversion from JSON
import requests
from pandas.io.json import json_normalize


In [13]:
if not path.exists('newyork_data.json'):
    !wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
else:
    print("Data is already downloaded!")

Data is already downloaded!


In [14]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

In [15]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
neighborhoods_data = newyork_data['features']

In [16]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [17]:
queen_neigh = neighborhoods[neighborhoods['Borough']=='Queens'].reset_index(drop=True)
brook_neigh = neighborhoods[neighborhoods['Borough']=='Brooklyn'].reset_index(drop=True)

#bronx_neigh.head(10)

### Plot regions of Interest

In [18]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [19]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers for queens to map (blue)
for lat, lng, borough, neighborhood in zip(queen_neigh['Latitude'], queen_neigh['Longitude'], queen_neigh['Borough'], queen_neigh['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  

# add markers for brooklyn to map (red)
for lat, lng, borough, neighborhood in zip(brook_neigh['Latitude'], brook_neigh['Longitude'], brook_neigh['Borough'], brook_neigh['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#cc3139',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  

map_newyork

### Get Venues Data from Foursquare

In [20]:
CLIENT_ID = 'RYCX141XUBTJKRVJMLA51KZVARP4KANFZDUVGY4SNPBT0JV0' # your Foursquare ID
CLIENT_SECRET = 'BRS3PNN0IUEIUAJ4LYDWJ0EWFVVA22FKZMCUHC4331QBADIH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    LIMIT=100
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        fname = '%s' % name
        if not path.exists('Data/'+fname.replace(" ","")+'.json'):
            results = requests.get(url).json()["response"]['groups'][0]['items']
            with open('Data/'+fname.replace(" ","")+'.json', 'w') as f:
                json.dump(results, f)
        else:
            print('File {} exists!'.format('Data/'+fname.replace(" ","")+'.json'))
            with open('Data/'+fname.replace(" ","")+'.json') as f:
              results = json.load(f)
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
queen_venues = getNearbyVenues(names=queen_neigh['Neighborhood'],
                                   latitudes=queen_neigh['Latitude'],
                                   longitudes=queen_neigh['Longitude']
                                  )
brook_venues = getNearbyVenues(names=brook_neigh['Neighborhood'],
                                   latitudes=brook_neigh['Latitude'],
                                   longitudes=brook_neigh['Longitude']
                                  )

Astoria
File Data/Astoria.json exists!
Woodside
File Data/Woodside.json exists!
Jackson Heights
File Data/JacksonHeights.json exists!
Elmhurst
File Data/Elmhurst.json exists!
Howard Beach
File Data/HowardBeach.json exists!
Corona
File Data/Corona.json exists!
Forest Hills
File Data/ForestHills.json exists!
Kew Gardens
File Data/KewGardens.json exists!
Richmond Hill
File Data/RichmondHill.json exists!
Flushing
File Data/Flushing.json exists!
Long Island City
File Data/LongIslandCity.json exists!
Sunnyside
File Data/Sunnyside.json exists!
East Elmhurst
File Data/EastElmhurst.json exists!
Maspeth
File Data/Maspeth.json exists!
Ridgewood
File Data/Ridgewood.json exists!
Glendale
File Data/Glendale.json exists!
Rego Park
File Data/RegoPark.json exists!
Woodhaven
File Data/Woodhaven.json exists!
Ozone Park
File Data/OzonePark.json exists!
South Ozone Park
File Data/SouthOzonePark.json exists!
College Point
File Data/CollegePoint.json exists!
Whitestone
File Data/Whitestone.json exists!
Baysi