# NY City Data Collection

In this notebook:

1- All the steps used to collect the data from the web will be described 

2- The Neighbourhood and Hospitals distributions will be visualized on a map

3- Data will be saved in CSV files to be used by the next notebook

## The Necessary libraries are imported

In [1]:
import numpy as np

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import matplotlib.cm as cm
import matplotlib.colors as colors

import folium # map rendering library

## The NY City Neghbourhood data is downloaded
(This part is fully adopted from the assignment)

In [2]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)

In [3]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [4]:
neighborhoods_data = newyork_data['features']

In [5]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [6]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
NYLat = location.latitude
NYLng = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(NYLat, NYLng))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [7]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[NYLat, NYLng], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### The data is saved to CSV file to have it available offline when needed

In [9]:
neighborhoods.to_csv('NYNeighborhoods.csv', index=False)

## The FourSquare API is utulized to collect hospitals' data
(User ID and Password are removed for security reasons)

In [None]:
CLIENT_ID = 'xxxx' # your Foursquare ID
CLIENT_SECRET = 'yyyy' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

### For each neghbourhood, FourSquare is searched for the nearby hospitals

#### To make sure the data of all available hospitals is collected, a radius of 5Km is used, whech is expected to duplicate hospitals.

In [None]:
#To get the ID for different categories, visit:
# https://developer.foursquare.com/docs/build-with-foursquare/categories/
#Here we used the category for Hospitals: categoryId=4bf58dd8d48988d196941735

NYHospitals=pd.DataFrame()
for lat,lng,name in zip(neighborhoods['Latitude'], neighborhoods['Longitude'],neighborhoods['Neighborhood']):
    print(name)

    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId=4bf58dd8d48988d196941735&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, 20200416, lat, lng, 5000, 500)
    results = requests.get(url).json()
    # Some collected data was erroneous, that is why this step is added inside a "try" block
    try:
        venues = results['response']['groups'][0]['items']
    except:
        print('Error Occured')
        continue
    TempHospitals = json_normalize(venues) # flatten JSON
    # filter columns
    filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng',
                    'venue.id','venue.location.city', 'venue.location.state',
                    'venue.location.formattedAddress', 'venue.location.labeledLatLngs']
    # Some collected data was erroneous, that is why this step is added inside a "try" block
    try: 
        TempHospitals =TempHospitals.loc[:, filtered_columns]
    except:
        print('Error Occured')
        continue
    # filter the category for each row
    TempHospitals['venue.categories'] = TempHospitals.apply(get_category_type, axis=1)
    # clean columns
    TempHospitals.columns = [col.split(".")[-1] for col in TempHospitals.columns]
    NYHospitals=NYHospitals.append(TempHospitals, ignore_index = True)

NYHospitals.tail()    

In [12]:
print(NYHospitals.shape)
NYHospitals.head()

(17943, 9)


Unnamed: 0,name,categories,lat,lng,id,city,state,formattedAddress,labeledLatLngs
0,Montefiore Medical Center - Wakefield Campus,Hospital,40.894207,-73.86079,4addedeaf964a520646621e3,Bronx,NY,"['600 E 233rd St (Carpenter Ave.)', 'Bronx, NY...","[{'label': 'display', 'lat': 40.89420660561763..."
1,Montefiore Medical Pavillion,Medical Center,40.880135,-73.878712,4db854784b22f2ddb6399a1b,Bronx,NY,"['3400 Bainbridge Ave', 'Bronx, NY 10467', 'Un...","[{'label': 'display', 'lat': 40.88013478363959..."
2,Montefiore Medical Center Mount Vernon,Hospital,40.913123,-73.840528,4c051aa973a8c9b6a7f196e0,Mount Vernon,NY,"['12 N 7th Ave', 'Mount Vernon, NY 10550', 'Un...","[{'label': 'display', 'lat': 40.91312340197153..."
3,The Children’s Hospital at Montefiore,Hospital,40.88064,-73.879153,4c3e307edb3b1b8d56106595,Bronx,NY,"['3415 Bainbridge Ave (at E Gun Hill Rd)', 'Br...","[{'label': 'display', 'lat': 40.88063954722417..."
4,North Central Bronx Hospital,Hospital,40.880441,-73.881526,4b6de6a8f964a520439a2ce3,Bronx,NY,"['3224 Kossuth Ave (at E 208th St)', 'Bronx, N...","[{'label': 'display', 'lat': 40.88044133267162..."


### Data of the hospitals is saved in a CSV file for further offline use

In [None]:
NYHospitals.to_csv('NYHospitals.csv', index=False)

In [10]:
NYHospitals=pd.read_csv('NYHospitals.csv')

### The Unique hospitals are selected from the full set of data collected

In [13]:
UnHos=NYHospitals.drop_duplicates()
UnHos.shape

(1039, 9)

In [14]:
UnHos.head()

Unnamed: 0,name,categories,lat,lng,id,city,state,formattedAddress,labeledLatLngs
0,Montefiore Medical Center - Wakefield Campus,Hospital,40.894207,-73.86079,4addedeaf964a520646621e3,Bronx,NY,"['600 E 233rd St (Carpenter Ave.)', 'Bronx, NY...","[{'label': 'display', 'lat': 40.89420660561763..."
1,Montefiore Medical Pavillion,Medical Center,40.880135,-73.878712,4db854784b22f2ddb6399a1b,Bronx,NY,"['3400 Bainbridge Ave', 'Bronx, NY 10467', 'Un...","[{'label': 'display', 'lat': 40.88013478363959..."
2,Montefiore Medical Center Mount Vernon,Hospital,40.913123,-73.840528,4c051aa973a8c9b6a7f196e0,Mount Vernon,NY,"['12 N 7th Ave', 'Mount Vernon, NY 10550', 'Un...","[{'label': 'display', 'lat': 40.91312340197153..."
3,The Children’s Hospital at Montefiore,Hospital,40.88064,-73.879153,4c3e307edb3b1b8d56106595,Bronx,NY,"['3415 Bainbridge Ave (at E Gun Hill Rd)', 'Br...","[{'label': 'display', 'lat': 40.88063954722417..."
4,North Central Bronx Hospital,Hospital,40.880441,-73.881526,4b6de6a8f964a520439a2ce3,Bronx,NY,"['3224 Kossuth Ave (at E 208th St)', 'Bronx, N...","[{'label': 'display', 'lat': 40.88044133267162..."


### The Hospitals' distributin is visualized on NYC map

In [15]:
# The Longitude and Latitude of NYC is given here in case the online service is not available
#NYLat=40.7896239
#NYLng=-73.9598939
# create map of Toronto using latitude and longitude values
HospitalsMap = folium.Map(location=[NYLat, NYLng], zoom_start=10)

# add markers to map
for lat, lng, label in zip(UnHos['lat'], UnHos['lng'], UnHos['name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(HospitalsMap)  
    
HospitalsMap

## The Hospital distribution relative to the different neghbourhoods will performed on the next notebook:
## NY Hospitals to Neighborhoods Distances