# Data Preparation

### Task
Parse the json **nyc_geo.json** into the dataframe with the following columns:
- Borough
- Neighborhood
- Latitude
- Longitude

In [141]:
import json
import pandas as pd
import os
import requests
import numpy as np
import sqlite3
from sqlite3 import Error
import matplotlib.pyplot as plt
import math
import plotly.express as px
from area import area
from geopy.geocoders import Nominatim
import folium # map rendering library
import math

In [142]:
with open('data/nyc_geo.json', 'r') as f:
    nyc_geo = json.load(f)

nyc_geo

df_nyc_geo = pd.json_normalize(nyc_geo)
df_features = pd.json_normalize(nyc_geo['features'])

In [144]:
df_nyc_geo

Unnamed: 0,type,totalFeatures,features,bbox,crs.type,crs.properties.name
0,FeatureCollection,306,"[{'type': 'Feature', 'id': 'nyu_2451_34572.1',...","[-74.2492599487305, 40.5033187866211, -73.7061...",name,urn:ogc:def:crs:EPSG::4326


In [145]:
df_features

Unnamed: 0,type,id,geometry_name,geometry.type,geometry.coordinates,properties.name,properties.stacked,properties.annoline1,properties.annoline2,properties.annoline3,properties.annoangle,properties.borough,properties.bbox
0,Feature,nyu_2451_34572.1,geom,Point,"[-73.84720052054902, 40.89470517661]",Wakefield,1,Wakefield,,,0.0,Bronx,"[-73.84720052054902, 40.89470517661, -73.84720..."
1,Feature,nyu_2451_34572.2,geom,Point,"[-73.82993910812398, 40.87429419303012]",Co-op City,2,Co-op,City,,0.0,Bronx,"[-73.82993910812398, 40.87429419303012, -73.82..."
2,Feature,nyu_2451_34572.3,geom,Point,"[-73.82780644716412, 40.887555677350775]",Eastchester,1,Eastchester,,,0.0,Bronx,"[-73.82780644716412, 40.887555677350775, -73.8..."
3,Feature,nyu_2451_34572.4,geom,Point,"[-73.90564259591682, 40.89543742690383]",Fieldston,1,Fieldston,,,0.0,Bronx,"[-73.90564259591682, 40.89543742690383, -73.90..."
4,Feature,nyu_2451_34572.5,geom,Point,"[-73.9125854610857, 40.890834493891305]",Riverdale,1,Riverdale,,,0.0,Bronx,"[-73.9125854610857, 40.890834493891305, -73.91..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,Feature,nyu_2451_34572.302,geom,Point,"[-74.00011136202637, 40.75665808227519]",Hudson Yards,2,Hudson,Yards,,0.0,Manhattan,"[-74.00011136202637, 40.75665808227519, -74.00..."
302,Feature,nyu_2451_34572.303,geom,Point,"[-73.80553002968718, 40.58733774018741]",Hammels,1,Hammels,,,0.0,Queens,"[-73.80553002968718, 40.58733774018741, -73.80..."
303,Feature,nyu_2451_34572.304,geom,Point,"[-73.76596781445627, 40.611321691283834]",Bayswater,1,Bayswater,,,0.0,Queens,"[-73.76596781445627, 40.611321691283834, -73.7..."
304,Feature,nyu_2451_34572.305,geom,Point,"[-73.94563070334091, 40.756091297094706]",Queensbridge,1,Queensbridge,,,0.0,Queens,"[-73.94563070334091, 40.756091297094706, -73.9..."


In [146]:
df_data = df_features[ ['properties.borough', 'properties.name'] ]
df_data = df_data.rename(columns={'properties.borough': 'borough', 'properties.name': 'neighborhood'})
df_data['latitude'] = df_features['geometry.coordinates'].str[1]
df_data['longitude'] = df_features['geometry.coordinates'].str[0]

In [134]:
df_data

Unnamed: 0,borough,neighborhood,latitude,longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585
...,...,...,...,...
301,Manhattan,Hudson Yards,40.756658,-74.000111
302,Queens,Hammels,40.587338,-73.805530
303,Queens,Bayswater,40.611322,-73.765968
304,Queens,Queensbridge,40.756091,-73.945631


In [None]:
# also include the neiborhoods found on the house price data files
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Beekman/Sutton Place', 'latitude': 40.753385, 'longitude':-73.964809}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Broadway Corridor', 'latitude': 40.812433, 'longitude': -73.960534}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Brooklyn', 'neighborhood': 'Brooklyn Navy Yard', 'latitude': 40.702087, 'longitude':-73.972412}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Central Park', 'latitude': 40.765215, 'longitude':-73.796978}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Queens', 'neighborhood': 'Clearview', 'latitude': 40.779985, 'longitude':-73.964809}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Bronx', 'neighborhood': 'Crotona Park East', 'latitude': 40.83656, 'longitude':-73.893348}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Bronx', 'neighborhood': 'Foxhurst', 'latitude': 40.825558, 'longitude':-73.892691}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Garment District', 'latitude': 40.753735, 'longitude':-73.990759}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Brooklyn', 'neighborhood': 'Greenwood Heights', 'latitude': 40.657649, 'longitude':-73.997425}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Hell''s Kitchen', 'latitude': 40.762535, 'longitude':-73.993621}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Kips Bay', 'latitude': 40.741068, 'longitude':-73.977322}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Brooklyn', 'neighborhood': 'Mapleton', 'latitude': 40.619768, 'longitude':-73.980701}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'NoLiTa', 'latitude': 40.722386, 'longitude':-73.995149}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'NoMad', 'latitude': 40.744109, 'longitude':-73.986659}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Park/Fifth Ave. to 79th St.', 'latitude': 40.773673, 'longitude':-73.955823}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Riverside Dr./West End Ave.', 'latitude': 40.836572, 'longitude':-73.948488}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Brooklyn', 'neighborhood': 'South Slope - Greenwood Heights', 'latitude': 40.660932, 'longitude':-73.987796}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Theater District', 'latitude': 40.759043, 'longitude':-73.984464}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Manhattan', 'neighborhood': 'Two Bridges', 'latitude': 40.711727, 'longitude':-73.992555}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Bronx', 'neighborhood': 'Van Cortlandt Village', 'latitude': 40.883644, 'longitude':-73.894275}, index=[0])).reset_index(drop=True)
df_data = df_data.append(pd.DataFrame({'borough': 'Bronx', 'neighborhood': 'Woodstock', 'latitude': 40.817445, 'longitude':-73.903243}, index=[0])).reset_index(drop=True)

In [149]:
df_data.to_csv('data/neighborhood.csv',sep=';')

### Task
Use different data sources and APIs to collect information about the neigborhoods that can be used for segmentation.

### SQL Lite Database for storing the data captured from the APIs

In [152]:
def create_connection(path):
    connection = None
    try:
        connection = sqlite3.connect(path)
        print("Connection to SQLite DB successful")
    except Error as e:
        print(f"The error '{e}' occurred")

    return connection

def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Query executed successfully")
    except Error as e:
        print(f"The error '{e}' occurred")
        
def execute_read_query(connection, query):
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(query)
        result = cursor.fetchall()
        return result
    except Error as e:
        print(f"The error '{e}' occurred")

In [181]:
connection = create_connection("data/midtermproject.sqlite")

Connection to SQLite DB successful


In [198]:
#connection.close()

#### Create the required tables

In [170]:
# 1. CREATE neighborhood table

#drop_neighborhood = "DROP TABLE neighborhood;"
#execute_query(connection, drop_neighborhood)  

create_neighborhood_table = """
CREATE TABLE IF NOT EXISTS neighborhood (
    borough TEXT NOT NULL,
    neighborhood TEXT NOT NULL,
    latitude TEXT NOT NULL,
    longitude TEXT NOT NULL,
    house_price_studio REAL,
    house_price_1_bed REAL,
    house_price_2_bed REAL,
    no_of_school INTEGER,
    distance_to_school REAL,
    no_of_hospital INTEGER,
    distance_to_hospital REAL,
    no_of_resturants INTEGER,
    no_of_banks INTEGER,
    criminal_rate REAL,
    density INTEGER,
    PRIMARY KEY (borough, neighborhood)
);
"""

execute_query(connection, create_neighborhood_table)  

Query executed successfully


In [171]:
# 2. CREATE poi_info table

#drop_poi_info = "DROP TABLE poi_info;"
#execute_query(connection, drop_poi_info)  

create_poi_into_table = """
CREATE TABLE IF NOT EXISTS poi_info (
    api TEXT NOT NULL,
    poi_id TEXT NOT NULL,
    name TEXT NOT NULL,
    address TEXT,
    province TEXT,
    city TEXT,
    country TEXT,
    postcode TEXT,
    timezone TEXT,
    latitude REAL,
    longitude REAL,
    phone TEXT,
    fax TEXT,
    email TEXT,
    social_media TEXT,
    is_closed TEXT,
    url TEXT,
    image_url TEXT,
    review_count INTEGER,
    rating REAL,
    price_level TEXT,
    PRIMARY KEY (api, poi_id)
);
"""

execute_query(connection, create_poi_into_table)  

Query executed successfully


In [153]:
# 3. CREATE poi_category table

#drop_poi_category = "DROP TABLE poi_category;"
#execute_query(connection, drop_poi_category)  

create_poi_category_table = """
CREATE TABLE IF NOT EXISTS poi_category (
    api TEXT NOT NULL,
    poi_id TEXT NOT NULL,
    borough TEXT NOT NULL,
    neighborhood TEXT NOT NULL,
    category TEXT NOT NULL ,
    PRIMARY KEY (api, poi_id, borough, category)
    FOREIGN KEY (api, poi_id) REFERENCES poi_info(api, poi_id)
);
"""

execute_query(connection, create_poi_category_table)  

Query executed successfully


#### Functions to insert records

In [191]:
def insert_poi_info_reccord(connection, record):
    insert_poi_info = """
    INSERT INTO
        poi_info (api, poi_id, name, address, province, city, country, postcode, timezone, latitude, longitude, phone, fax, email, social_media, is_closed, url, image_url, review_count, rating, price_level)
    VALUES
        (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);
    """
    
    cursor = connection.cursor()
    try:
        cursor.execute(insert_poi_info, record)
        connection.commit()
        #print("Query executed successfully")
    except Error as e:
        #pass
        print(f"The error '{e}' occurred")
    
    #execute_query(connection, create_users)  

In [192]:
def insert_poi_category_record(connection, record):
    insert_poi_category = """
    INSERT INTO
        poi_category (api, poi_id, borough, neighborhood, category)
    VALUES
        (?,?,?,?,?);
    """
    
    cursor = connection.cursor()
    try:
        cursor.execute(insert_poi_category, record)
        connection.commit()
        #print("Query executed successfully")
    except Error as e:
        #pass
        print(f"The error '{e}' occurred")

#### Get the venue information from Foursquare API

In [16]:
# set the keys
foursquare_api_key = os.environ["FOURSQUARE_API_KEY"]
yelp_client_id = os.environ["YELP_CLIENT_ID"]
yelp_api_key = os.environ["YELP_API_KEY"]
google_api_key = os.environ["GOOGLE_API_KEY"]

In [17]:
# setup the parameters

# 7 POIs selected 
# Restaurant
# Bar
# Bank
# Bus Station
# Cafe
# School
# Hospital

categories = [{'foursquare':'13065', 'yelp': 'restaurants', 'google': 'restaurant', 'poi_system': 'restaurants'},
              {'foursquare':'13003', 'yelp': 'bars', 'google': 'bars', 'poi_system': 'bars'},
              {'foursquare':'11045', 'yelp': 'banks', 'google': 'banks', 'poi_system': 'banks'},
              {'foursquare':'19042', 'yelp': 'busstations', 'google': 'bus_stations', 'poi_system': 'busstations'},
              {'foursquare':'13034', 'yelp': 'cafes', 'google': 'cafes', 'poi_system': 'cafes'},
              {'foursquare':'12009', 'yelp': 'education', 'google': 'school', 'poi_system': 'schools'},
              {'foursquare':'15014', 'yelp': 'hospitalists', 'google': 'hospital', 'poi_system': 'hospitals'}
             ]

radius = '1000'

In [18]:
# send the request to foursquare to get the data
def get_foursquare_place(ll, radius, categories):
    url = 'https://api.foursquare.com/v3/places/search?'
    url = url + 'll='+ll
    url = url + '&radius='+radius
    url = url + '&categories='+categories
    url = url + '&fields=fsq_id,name,geocodes,location,categories,related_places,timezone,description,tel,fax,email,website,social_media,hours,hours_popular,rating,stats,popularity,price,menu,photos,tips,tastes'
    url = url + '&limit=50'

    headers = {
        'Accept': 'application/json',
        'Authorization': foursquare_api_key
    }

    res = requests.request("GET", url, headers=headers)

    return res.json()

In [None]:
# a function to get the data for a specified neighborhood
def collect_foursquare_place(ll, radius, borough, neighborhood, categories):
    for category in categories:
        result = get_foursquare_place(ll, radius, category['foursquare'])
        
        if 'results' in result:

            for i in result['results']:
                record = ['foursquare']
                record.append(i['fsq_id'])
                record.append(i['name'])
                record.append(i['location']['formatted_address'])
                record.append(i['location']['region'])
                record.append(i['location']['locality'])
                record.append(i['location']['country'])
                record.append(i['location']['postcode'] if 'postcode' in i['location'] else '')
                record.append(i['timezone'] if 'timezone' in i else '')
                record.append(float(i['geocodes']['main']['latitude']) if 'main' in i['geocodes'] else '')
                record.append(float(i['geocodes']['main']['longitude']) if 'main' in i['geocodes'] else '')
                record.append(i['tel'] if 'tel' in i else '')
                record.append('')
                record.append(i['email'] if 'email' in i else '')
                record.append(str(i['social_media'] if 'social_media' in i else '') )
                record.append('False' if i['hours']['open_now'] else 'True')
                record.append(i['website'] if 'website' in i else '')
                record.append('')
                if 'stats' in i:
                    if 'total_ratings' in i['stats']:
                        record.append(i['stats']['total_ratings'])
                    else:
                        record.append('')
                else:
                    record.append('')
                record.append(i['rating'] if 'rating' in i else '')
                record.append(i['price'] if 'price' in i else '')

                insert_poi_info_reccord(connection, tuple(record))

                record = ('foursquare', i['fsq_id'], borough, neighborhood, category['poi_system'])
                insert_poi_category_record(connection, record)

In [None]:
## a loop to get the data for all neighborhoods
for i in df_data.iterrows():
    ll = str(i[1][2])+','+str(i[1][3])
    print(ll, i[1][0], i[1][1])
    collect_foursquare_place(ll, radius, i[1][0], i[1][1], categories)

#### Get the criminal rate

In [21]:
df_criminal = pd.read_csv('data/NYPD_Complaint_Data_Current__Year_To_Date_.csv')

In [56]:
df_criminal

Unnamed: 0,CMPLNT_NUM,ADDR_PCT_CD,BORO_NM,CMPLNT_FR_DT,CMPLNT_FR_TM,CMPLNT_TO_DT,CMPLNT_TO_TM,CRM_ATPT_CPTD_CD,HADEVELOPT,HOUSING_PSA,...,TRANSIT_DISTRICT,VIC_AGE_GROUP,VIC_RACE,VIC_SEX,X_COORD_CD,Y_COORD_CD,Latitude,Longitude,Lat_Lon,New Georeferenced Column
0,903695881,69,,12/17/2021,22:13:00,,,COMPLETED,,,...,,25-44,BLACK,M,1011203,174515,40.645647,-73.902876,"(40.64564719600002, -73.90287588699994)",POINT (-73.90287588699994 40.64564719600002)
1,400462399,113,,12/17/2021,06:21:00,,,COMPLETED,,,...,,25-44,BLACK,F,1043252,187998,40.682499,-73.787269,"(40.682499421000045, -73.78726915499993)",POINT (-73.78726915499993 40.682499421000045)
2,587910690,113,,12/13/2021,20:05:00,,,COMPLETED,,,...,,25-44,BLACK,M,1042087,190443,40.689218,-73.791449,"(40.689218058000044, -73.79144856999993)",POINT (-73.79144856999993 40.689218058000044)
3,186105368,52,BRONX,12/07/2021,22:49:00,,,COMPLETED,,,...,,UNKNOWN,UNKNOWN,E,1017088,260895,40.882718,-73.881247,"(40.88271780700006, -73.88124724999993)",POINT (-73.88124724999993 40.88271780700006)
4,185325394,113,,12/06/2021,17:25:00,,,COMPLETED,,,...,,25-44,BLACK,M,1046176,193100,40.696483,-73.776680,"(40.69648322200004, -73.77667979199998)",POINT (-73.77667979199998 40.69648322200004)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449501,798000815,60,BROOKLYN,01/08/2021,07:00:00,01/08/2021,07:15:00,COMPLETED,,,...,,45-64,WHITE,F,984003,149127,40.576003,-74.000891,"(40.57600335300003, -74.00089057799994)",POINT (-74.00089057799994 40.57600335300003)
449502,900510890,83,BROOKLYN,01/02/2021,04:50:00,,,COMPLETED,,,...,,UNKNOWN,UNKNOWN,E,1003229,194080,40.699369,-73.931555,"(40.69936940200005, -73.93155521999995)",POINT (-73.93155521999995 40.69936940200005)
449503,244132985,49,BRONX,01/02/2021,09:35:00,01/02/2021,09:37:00,COMPLETED,,,...,,UNKNOWN,UNKNOWN,D,1020845,250471,40.854092,-73.867718,"(40.854092353000055, -73.86771750999998)",POINT (-73.86771750999998 40.854092353000055)
449504,366756177,101,QUEENS,01/06/2021,19:40:00,01/06/2021,19:45:00,COMPLETED,,,...,,45-64,WHITE HISPANIC,F,1055868,156522,40.596012,-73.742116,"(40.596011813000075, -73.74211638099996)",POINT (-73.74211638099996 40.596011813000075)


In [83]:
# a function to calculate the distance between two points
def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [123]:
len(df_criminal)

449506

In [171]:
criminal_count = {}
for i in df_data.iterrows():
    criminal_count[(i[1][1], i[1][0])] = 0

In [172]:
i = 0
for criminal_record in df_criminal.iterrows():
    i = i + 1
    if i % 10000 == 0:
        print(i)
    for neighborhood in df_data.iterrows():
        if distance( (criminal_record[1][32], criminal_record[1][33]), (neighborhood[1][2], neighborhood[1][3]) ) < 1:
            criminal_count[(neighborhood[1][1], neighborhood[1][0])] = criminal_count[(neighborhood[1][1], neighborhood[1][0])] + 1

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
210000
220000
230000
240000
250000
260000
270000
280000
290000
300000
310000
320000
330000
340000
350000
360000
370000
380000
390000
400000
410000
420000
430000
440000


In [173]:
criminal_count

{('Wakefield', 'Bronx'): 2675,
 ('Co-op City', 'Bronx'): 1470,
 ('Eastchester', 'Bronx'): 905,
 ('Fieldston', 'Bronx'): 465,
 ('Riverdale', 'Bronx'): 574,
 ('Kingsbridge', 'Bronx'): 3370,
 ('Marble Hill', 'Manhattan'): 3070,
 ('Woodlawn', 'Bronx'): 1023,
 ('Norwood', 'Bronx'): 3401,
 ('Williamsbridge', 'Bronx'): 3706,
 ('Baychester', 'Bronx'): 1382,
 ('Pelham Parkway', 'Bronx'): 1642,
 ('City Island', 'Bronx'): 141,
 ('Bedford Park', 'Bronx'): 3912,
 ('University Heights', 'Bronx'): 6343,
 ('Morris Heights', 'Bronx'): 5332,
 ('Fordham', 'Bronx'): 8272,
 ('East Tremont', 'Bronx'): 4981,
 ('West Farms', 'Bronx'): 4474,
 ('High  Bridge', 'Bronx'): 5927,
 ('Melrose', 'Bronx'): 8129,
 ('Mott Haven', 'Bronx'): 4338,
 ('Port Morris', 'Bronx'): 1791,
 ('Longwood', 'Bronx'): 4532,
 ('Hunts Point', 'Bronx'): 744,
 ('Morrisania', 'Bronx'): 7406,
 ('Soundview', 'Bronx'): 3227,
 ('Clason Point', 'Bronx'): 294,
 ('Throgs Neck', 'Bronx'): 571,
 ('Country Club', 'Bronx'): 859,
 ('Parkchester', 'Bronx'

In [178]:
df_criminal_count = pd.DataFrame(criminal_count, index=[])

### Task

Visualize the neigborhoods in the graph.

In [86]:
df

Unnamed: 0,Borough,Year,FIPS County Code,NTA Code,NTA Name,Population,area,density
0,Bronx,2000,5,BX01,Claremont-Bathgate,28149,0.590962,47632.525880
1,Bronx,2000,5,BX03,Eastchester-Edenwald-Baychester,35422,1.446059,24495.544893
2,Bronx,2000,5,BX05,Bedford Park-Fordham North,55329,0.538774,102694.208900
3,Bronx,2000,5,BX06,Belmont,25967,0.486994,53321.001560
4,Bronx,2000,5,BX07,Bronxdale,34309,0.545483,62896.601203
...,...,...,...,...,...,...,...,...
385,Staten Island,2010,85,SI37,Stapleton-Rosebank,26453,1.667976,15859.343564
386,Staten Island,2010,85,SI45,New Dorp-Midland Beach,21896,1.991164,10996.583542
387,Staten Island,2010,85,SI48,Arden Heights,25238,1.810895,13936.755403
388,Staten Island,2010,85,SI54,Great Kills,40720,3.219239,12648.953331


In [88]:
# get NYC latitude and longitude

address = 'New York City, NY'
location = None

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
nyc_latitude = location.latitude
nyc_longitude = location.longitude

print('The geograpical coordinate of New York City are {}, {}.'.format(nyc_latitude, nyc_longitude))


# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[nyc_latitude, nyc_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_data['latitude'], df_data['longitude'], df_data['borough'], df_data['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

The geograpical coordinate of New York City are 40.7127281, -74.0060152.
