In [94]:
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
from geopy.geocoders import Nominatim
import folium

In [4]:
import sys,os.path
import re

In [5]:
SFO_FILE = "/var/tmp/sfo_wiki.txt"
NYC_FILE = "/var/tmp/nyc_wiki.txt"

In [6]:
# scrape SFO wiki for neighborhoods

if (os.path.exists(SFO_FILE) and os.path.getsize(SFO_FILE)):
    #read from file instead of the net
    print("reading sfo wiki from file")
    with open(SFO_FILE) as rsf:
        sfo_text = rsf.read()
else:
    print("reading sfo wiki from net")
    sfo_wiki = requests.get("https://en.wikipedia.org/wiki/List_of_neighborhoods_in_San_Francisco")
    if sfo_wiki.ok:
        #write it to file
        sfo_text = sfo_wiki.text
        with open(SFO_FILE, "w") as wsf:
            wsf.write(sfo_wiki.text)
    else:
        sys.exit("error reading wiki!")

reading sfo wiki from file


In [7]:
# scrape NYC wiki for neighborhoods

if (os.path.exists(NYC_FILE) and os.path.getsize(NYC_FILE)):
    #read from file instead of the net
    print("reading nyc wiki from file")
    with open(NYC_FILE) as rny:
        nyc_text = rny.read()
else:
    print("reading nyc wiki from net")
    nyc_wiki = requests.get("https://en.wikipedia.org/wiki/Neighborhoods_in_New_York_City")
    if nyc_wiki.ok:
        #write it to file
        nyc_text = nyc_wiki.text
        with open(NYC_FILE, "w") as wny:
            wny.write(nyc_wiki.text)
    else:
        sys.exit("error reading wiki!")

reading nyc wiki from file


In [8]:
sfo_soup = BeautifulSoup(sfo_text, "lxml")

nyc_soup = BeautifulSoup(nyc_text, "lxml")

In [9]:
# begin - process SFO data

sfo_neigh = sfo_soup.find_all('span',class_="toctext")

In [10]:
sfo_neighborhoods = []
for n in sfo_neigh:
    sfo_neighborhoods.append(n.string)
print("count is: ",len(sfo_neighborhoods))

count is:  123


In [11]:
sfo_neighborhoods[0:5]

['Alamo Square',
 'Anza Vista',
 'Ashbury Heights',
 'Balboa Park',
 'Balboa Terrace']

In [12]:
sfo_neighborhoods[-5:]

['Yerba Buena',
 'See also',
 'References',
 'External links',
 'Specific neighborhoods']

In [13]:
sfo_neighborhoods = sfo_neighborhoods[:-4]

In [14]:
len(sfo_neighborhoods)

119

In [15]:
# end - process SFO wiki data

sfo_neighborhoods[-5:]

['West Portal',
 'Western Addition',
 'Westwood Highlands',
 'Westwood Park',
 'Yerba Buena']

In [16]:
# begin - process NYC wiki data

nyc_rows = nyc_soup.select("table.wikitable.sortable > tbody > tr > td:nth-of-type(5) > a")

In [17]:
type(nyc_rows)

list

In [18]:
nyc_rows[:5]

[<a href="/wiki/Melrose,_Bronx" title="Melrose, Bronx">Melrose</a>,
 <a href="/wiki/Mott_Haven,_Bronx" title="Mott Haven, Bronx">Mott Haven</a>,
 <a href="/wiki/Port_Morris,_Bronx" title="Port Morris, Bronx">Port Morris</a>,
 <a href="/wiki/Hunts_Point,_Bronx" title="Hunts Point, Bronx">Hunts Point</a>,
 <a href="/wiki/Longwood,_Bronx" title="Longwood, Bronx">Longwood</a>]

In [19]:
nyc_rows[-5:]

[<a href="/wiki/Prince%27s_Bay,_Staten_Island" title="Prince's Bay, Staten Island">Prince's Bay</a>,
 <a href="/wiki/Richmond_Valley,_Staten_Island" title="Richmond Valley, Staten Island">Richmond Valley</a>,
 <a href="/wiki/Rossville,_Staten_Island" title="Rossville, Staten Island">Rossville</a>,
 <a href="/wiki/Tottenville,_Staten_Island" title="Tottenville, Staten Island">Tottenville</a>,
 <a href="/wiki/Woodrow,_Staten_Island" title="Woodrow, Staten Island">Woodrow</a>]

In [20]:
nyc_neighborhoods = []
for row in nyc_rows:
    neighs = [str(x).strip() for x in str(row.string).split(',')]
    nyc_neighborhoods.extend(neighs)

In [21]:
nyc_neighborhoods[0:5]

['Melrose', 'Mott Haven', 'Port Morris', 'Hunts Point', 'Longwood']

In [22]:
nyc_neighborhoods[-5:]

["Prince's Bay", 'Richmond Valley', 'Rossville', 'Tottenville', 'Woodrow']

In [23]:
# end - process NYC wiki data

len(nyc_neighborhoods)

328

In [24]:
# get NYC lats & lngs

geolocator = Nominatim(user_agent="nyc_sfo_explorer")

In [25]:
dfny_file = "/var/tmp/nyc2_df.csv"
dfsf_file = "/var/tmp/sfo2_df.csv"

In [26]:
if not os.path.exists(dfny_file):
    
    print('getting lats & lngs from net')
    lats = []
    lngs = []
    neighs = []
    missing_ny_neighs = []
    
    for loc in nyc_neighborhoods:
            addr = loc + ", New York, NY"
            #print("addr: ", addr)
            location = geolocator.geocode(addr)
            if (location is not None):
                lat = location.latitude
                lng = location.longitude
                #print("addr {} is at lat: {}, lng: {}".format(addr, lat,lng))
                lats.append(lat)
                lngs.append(lng)
                neighs.append(loc)
            else:
                #print("could not find geocode for {}".format(loc))
                missing_ny_neighs.append(loc)
                
    dfny = pd.DataFrame(list(zip(neighs,lats,lngs)), columns=['Neighborhood', 'Latitude', 'Longitude'])
    dfny.to_csv(dfny_file, index=False)
    
else:
    print('getting lats & lngs from disk')
    dfny = pd.read_csv(dfny_file)

getting lats & lngs from disk


In [27]:
dfny.shape

(314, 3)

In [28]:
dfny.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Melrose,40.824545,-73.910414
1,Mott Haven,40.80899,-73.922915
2,Port Morris,40.801515,-73.909581
3,Hunts Point,40.812601,-73.884025
4,Longwood,42.953115,-78.369465


In [29]:
dfny.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude
309,Prince's Bay,40.528994,-74.197644
310,Richmond Valley,40.520105,-74.229311
311,Rossville,40.555624,-74.212898
312,Tottenville,40.511217,-74.249312
313,Woodrow,40.543439,-74.197644


In [30]:
dfny.describe()

Unnamed: 0,Latitude,Longitude
count,314.0,314.0
mean,40.927502,-74.102143
std,0.684101,0.85324
min,40.511217,-78.799202
25%,40.637197,-74.006881
50%,40.728297,-73.941158
75%,40.824445,-73.852635
max,44.75161,-72.173984


In [31]:
# begin - NYC data clean up 
dfny.drop_duplicates(keep='first', inplace=True)

In [32]:
# clean up outliers - likely from the geocoder not getting the address right

# Lngs IQR
ny_lng_Q1 = dfny['Longitude'].quantile(.25)
ny_lng_Q3 = dfny['Longitude'].quantile(.75)
ny_lng_IQR = ny_lng_Q3 - ny_lng_Q1

# Lats IQR
ny_lat_Q1 = dfny['Latitude'].quantile(.25)
ny_lat_Q3 = dfny['Latitude'].quantile(.75)
ny_lat_IQR = ny_lat_Q3 - ny_lat_Q1

In [33]:
# drop longitude outliers

dfny.drop(dfny[(dfny['Longitude'] > (ny_lng_Q3 + 1.5 * ny_lng_IQR)) | (dfny['Longitude'] < (ny_lng_Q1 - 1.5 * ny_lng_IQR))].index, axis=0, inplace=True)

In [34]:
# drop latitide outliers

dfny.drop(dfny[(dfny['Latitude'] > (ny_lat_Q3 + 1.5 * ny_lat_IQR)) | (dfny['Latitude'] < (ny_lat_Q1 - 1.5 * ny_lat_IQR))].index, axis=0, inplace=True)

In [35]:
# remove Bronx River neighborhood from the listing. wrong data from wiki ...

dfny.drop(dfny[dfny['Neighborhood'] == 'Bronx River'].index, inplace=True)

In [36]:
dfny.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Melrose,40.824545,-73.910414
1,Mott Haven,40.80899,-73.922915
2,Port Morris,40.801515,-73.909581
3,Hunts Point,40.812601,-73.884025
5,Claremont,40.839876,-73.907328


In [37]:
dfny.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude
308,Pleasant Plains,40.523994,-74.2157
309,Prince's Bay,40.528994,-74.197644
310,Richmond Valley,40.520105,-74.229311
311,Rossville,40.555624,-74.212898
313,Woodrow,40.543439,-74.197644


In [38]:
dfny.reset_index(drop=True, inplace=True)

In [39]:
# end - NYC data clean up

dfny.shape

(255, 3)

In [40]:
# begin - NYC merge neighborhoods with similar lats & lngs

dfgny = dfny.groupby(['Latitude', 'Longitude'], as_index=False).agg({'Neighborhood': lambda x: ','.join(x)})

In [41]:
dfgny.tail()

Unnamed: 0,Latitude,Longitude,Neighborhood
245,40.894444,-73.903333,Fieldston
246,40.897877,-73.852357,Wakefield
247,40.898155,-73.867357,Woodlawn
248,40.90056,-73.90639,Riverdale
249,40.909821,-73.807911,Pelham Gardens


In [42]:
dfgny.loc[dfgny['Neighborhood'].str.contains(r'(Grave|Astoria|Corona|Flushing|Jamaica)')]

  """Entry point for launching an IPython kernel.


Unnamed: 0,Latitude,Longitude,Neighborhood
36,40.596134,-73.973943,"Gravesend,East Gravesend"
108,40.691485,-73.805677,"Jamaica,South Jamaica"
114,40.702185,-73.936434,"Flushing South,East Flushing"
124,40.713322,-73.782506,Jamaica Estates
160,40.744181,-73.835854,Flushing
162,40.746959,-73.860146,"North Corona,Corona"
181,40.772015,-73.930267,"Astoria,Old Astoria"


In [43]:
type(dfgny)

pandas.core.frame.DataFrame

In [44]:
dfgny.columns

Index(['Latitude', 'Longitude', 'Neighborhood'], dtype='object')

In [45]:
# end - NYC merge

dfg_nyc = dfgny[['Neighborhood','Latitude', 'Longitude']]

In [46]:
# begin - NYC map lats & lngs

nyc_addr = "New York, NY"
nyc_loc = geolocator.geocode(nyc_addr)
nyc_lat = nyc_loc.latitude
nyc_lng = nyc_loc.longitude

In [47]:
map_nyc = folium.Map(location=[nyc_lat, nyc_lng], zoom_start=10)

In [48]:
# adding markers for the neighborhoods on the map
for lat, lng, neighborhood in zip(dfg_nyc['Latitude'], dfg_nyc['Longitude'], dfg_nyc['Neighborhood']):
    label = '{}'.format(neighborhood) 
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc', fill_opacity=0.7, parse_html=False).add_to(map_nyc)

In [49]:
# end - NYC map
map_nyc

In [50]:
# begin - SFO get lats & lngs

if not os.path.exists(dfsf_file):
    
    print('getting lats & lngs from net')
    lats = []
    lngs = []
    neighs = []
    missing_sf_neighs = []
    
    for loc in sfo_neighborhoods:
            addr = loc + ", San Francisco, CA"
            #print("addr: ", addr)
            location = geolocator.geocode(addr)
            if (location is not None):
                lat = location.latitude
                lng = location.longitude
                #print("addr {} is at lat: {}, lng: {}".format(addr, lat,lng))
                lats.append(lat)
                lngs.append(lng)
                neighs.append(loc)
            else:
                #print("could not find geocode for {}".format(loc))
                missing_sf_neighs.append(loc)
                
    dfsf = pd.DataFrame(list(zip(neighs,lats,lngs)), columns=['Neighborhood', 'Latitude', 'Longitude'])
    dfsf.to_csv(dfsf_file, index=False)
    
else:
    print('getting lats & lngs from disk')
    dfsf = pd.read_csv(dfsf_file)

getting lats & lngs from disk


In [51]:
dfsf.shape

(83, 3)

In [52]:
dfsf.iloc[0:5]

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alamo Square,37.776357,-122.434694
1,Anza Vista,37.780836,-122.443149
2,Balboa Park,37.724949,-122.444805
3,Bayview,37.728889,-122.3925
4,Belden Place,37.791744,-122.403886


In [53]:
# end - SFO get lats & lngs
dfsf.iloc[-5:]

Unnamed: 0,Neighborhood,Latitude,Longitude
78,Union Square,37.787936,-122.407517
79,Visitacion Valley,37.712132,-122.409713
80,West Portal,37.741141,-122.465634
81,Western Addition,37.779559,-122.42981
82,Yerba Buena,-5.735634,-79.043992


In [54]:
# begin - SFO data clean up

dfsf.drop_duplicates(keep='first', inplace=True)

In [56]:
# clean up outliers - likely from the geocoder not getting the address right

# Lngs IQR
sf_lng_Q1 = dfsf['Longitude'].quantile(.25)
sf_lng_Q3 = dfsf['Longitude'].quantile(.75)
sf_lng_IQR = sf_lng_Q3 - sf_lng_Q1

# Lats IQR
sf_lat_Q1 = dfsf['Latitude'].quantile(.25)
sf_lat_Q3 = dfsf['Latitude'].quantile(.75)
sf_lat_IQR = sf_lat_Q3 - sf_lat_Q1

In [57]:
# drop longitude outliers

dfsf.drop(dfsf[(dfsf['Longitude'] > (sf_lng_Q3 + 1.5 * sf_lng_IQR)) | (dfsf['Longitude'] < (sf_lng_Q1 - 1.5 * sf_lng_IQR))].index, axis=0, inplace=True)

In [58]:
# drop latitide outliers

dfsf.drop(dfsf[(dfsf['Latitude'] > (sf_lat_Q3 + 1.5 * sf_lat_IQR)) | (dfsf['Latitude'] < (sf_lat_Q1 - 1.5 * sf_lat_IQR))].index, axis=0, inplace=True)

In [60]:
dfsf.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alamo Square,37.776357,-122.434694
1,Anza Vista,37.780836,-122.443149
2,Balboa Park,37.724949,-122.444805
3,Bayview,37.728889,-122.3925
4,Belden Place,37.791744,-122.403886


In [61]:
dfsf.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude
77,Twin Peaks,37.75464,-122.44648
78,Union Square,37.787936,-122.407517
79,Visitacion Valley,37.712132,-122.409713
80,West Portal,37.741141,-122.465634
81,Western Addition,37.779559,-122.42981


In [62]:
dfsf.reset_index(drop=True, inplace=True)

In [63]:
# merge neighborhoods with similar lats & lngs

dfgsf = dfsf.groupby(['Latitude', 'Longitude'], as_index=False).agg({'Neighborhood': lambda x: ','.join(x)})

In [64]:
dfgsf.shape

(79, 3)

In [65]:
dfgsf.tail()

Unnamed: 0,Latitude,Longitude,Neighborhood
74,37.80273,-122.405851,Telegraph Hill
75,37.802984,-122.437472,Marina District
76,37.806532,-122.420649,Buena Vista
77,37.809167,-122.416599,Fisherman's Wharf
78,37.823354,-122.370153,Treasure Island


In [66]:
dfgsf.loc[dfgsf['Neighborhood'].str.contains(r'(Portola|Financial)')]

  """Entry point for launching an IPython kernel.


Unnamed: 0,Latitude,Longitude,Neighborhood
8,37.726379,-122.408551,"Portola,Portola Place"
68,37.793647,-122.398938,"Financial District,Financial District South"


In [67]:
dfgsf.columns

Index(['Latitude', 'Longitude', 'Neighborhood'], dtype='object')

In [68]:
dfg_sfo = dfgsf[['Neighborhood','Latitude', 'Longitude']]

In [69]:
# end - SFO clean up and merge
dfg_sfo.shape

(79, 3)

In [70]:
sfo_addr = "San Francisco, CA"
sfo_loc = geolocator.geocode(sfo_addr)
sfo_lat = sfo_loc.latitude
sfo_lng = sfo_loc.longitude

In [71]:
map_sfo = folium.Map(location=[sfo_lat, sfo_lng], zoom_start=10)

In [72]:
# adding markers for the neighborhoods on the map
for lat, lng, neighborhood in zip(dfg_sfo['Latitude'], dfg_sfo['Longitude'], dfg_sfo['Neighborhood']):
    label = '{}'.format(neighborhood) 
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc', fill_opacity=0.7, parse_html=False).add_to(map_sfo)

In [73]:
map_sfo

In [75]:
# combine NYC and SFO into a dataframe ; add city column

dfg_sfo.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Crocker-Amazon,37.709378,-122.438587
1,Sunnydale,37.709519,-122.404779
2,Visitacion Valley,37.712132,-122.409713
3,Oceanview,37.713651,-122.457483
4,Parkmerced,37.717176,-122.478947


In [76]:
dfg_nyc.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Richmond Valley,40.520105,-74.229311
1,Pleasant Plains,40.523994,-74.2157
2,Prince's Bay,40.528994,-74.197644
3,Charleston,40.536772,-74.237367
4,Huguenot,40.537328,-74.194588


In [77]:
dfg_nyc['City'] = 'New York'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [78]:
dfg_nyc.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,City
0,Richmond Valley,40.520105,-74.229311,New York
1,Pleasant Plains,40.523994,-74.2157,New York
2,Prince's Bay,40.528994,-74.197644,New York
3,Charleston,40.536772,-74.237367,New York
4,Huguenot,40.537328,-74.194588,New York


In [79]:
dfg_sfo['City'] = 'San Francisco'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [80]:
dfg_sfo.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,City
0,Crocker-Amazon,37.709378,-122.438587,San Francisco
1,Sunnydale,37.709519,-122.404779,San Francisco
2,Visitacion Valley,37.712132,-122.409713,San Francisco
3,Oceanview,37.713651,-122.457483,San Francisco
4,Parkmerced,37.717176,-122.478947,San Francisco


In [81]:
dfg_sfo.shape

(79, 4)

In [82]:
dfg_nyc.shape

(250, 4)

In [83]:
df = pd.concat([dfg_nyc, dfg_sfo], ignore_index=True)

In [84]:
df.shape

(329, 4)

In [85]:
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,City
0,Richmond Valley,40.520105,-74.229311,New York
1,Pleasant Plains,40.523994,-74.2157,New York
2,Prince's Bay,40.528994,-74.197644,New York
3,Charleston,40.536772,-74.237367,New York
4,Huguenot,40.537328,-74.194588,New York


In [86]:
df.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude,City
324,Telegraph Hill,37.80273,-122.405851,San Francisco
325,Marina District,37.802984,-122.437472,San Francisco
326,Buena Vista,37.806532,-122.420649,San Francisco
327,Fisherman's Wharf,37.809167,-122.416599,San Francisco
328,Treasure Island,37.823354,-122.370153,San Francisco


In [127]:
# begin - get Foursquare data

# 4square creds
CLIENT_ID = 'HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1'
CLIENT_SECRET = '0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG'

In [128]:
# test data for one neighborhood

VERSION='20180604'
RADIUS=500
LIMIT=100
neighborhood_latitude = df.loc[0,'Latitude']
neighborhood_longitude = df.loc[0,'Longitude']
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, RADIUS, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5201055,-74.2293113&v=20180604&radius=500&limit=100'

In [129]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d6592cf6e4650002c29bc26'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4b967a25f964a520a6ce34e3-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/deli_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d146941735',
         'name': 'Deli / Bodega',
         'pluralName': 'Delis / Bodegas',
         'primary': True,
         'shortName': 'Deli / Bodega'}],
       'id': '4b967a25f964a520a6ce34e3',
       'location': {'address': 'Page Ave',
        'cc': 'US',
        'city': 'Staten Island',
        'country': 'United States',
        'distance': 498,
        'formattedAddress': ['Page Ave', 'Staten Island, NY', 'United States'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 40.516582119700814,
    

In [130]:
results['response']['groups'][0]['items']

[{'reasons': {'count': 0,
   'items': [{'reasonName': 'globalInteractionReason',
     'summary': 'This spot is popular',
     'type': 'general'}]},
  'referralId': 'e-0-4b967a25f964a520a6ce34e3-0',
  'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/deli_',
      'suffix': '.png'},
     'id': '4bf58dd8d48988d146941735',
     'name': 'Deli / Bodega',
     'pluralName': 'Delis / Bodegas',
     'primary': True,
     'shortName': 'Deli / Bodega'}],
   'id': '4b967a25f964a520a6ce34e3',
   'location': {'address': 'Page Ave',
    'cc': 'US',
    'city': 'Staten Island',
    'country': 'United States',
    'distance': 498,
    'formattedAddress': ['Page Ave', 'Staten Island, NY', 'United States'],
    'labeledLatLngs': [{'label': 'display',
      'lat': 40.516582119700814,
      'lng': -74.23294859905353}],
    'lat': 40.516582119700814,
    'lng': -74.23294859905353,
    'state': 'NY'},
   'name': 'Tottenville Hot Bagels & Deli',
   'photos': {'count': 

In [90]:
# function that extracts the category of the venue
def get_category_type(row): 
    try:
        categories_list = row['categories'] 
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0: 
        return None
    else:
        return categories_list[0]['name']

In [91]:
venues = results['response']['groups'][0]['items']

In [95]:
nearby_venues = json_normalize(venues)

In [97]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

In [98]:
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=
1)

In [99]:
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Tottenville Hot Bagels & Deli,Deli / Bodega,40.516582,-74.232949
1,Chase Bank,Bank,40.517461,-74.233904
2,MTA SIR - Richmond Valley,Train Station,40.519677,-74.228854
3,Burger King,Fast Food Restaurant,40.518076,-74.234167
4,MTA Bus - Outerbridge & Park and Ride (X17/X22...,Bus Stop,40.522675,-74.224768


In [100]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


In [108]:
# save 4square data to file to avoid multiple net trips
df_4square_venues_file = "/var/tmp/4square_venues_nyc_sfo.csv"

In [132]:
# repeat for all neighborhoods in dataframe
def getNearbyVenues(cities, names, latitudes, longitudes, radius=500):

        venues_list=[]
        for city, name, lat, lng in zip(cities, names, latitudes, longitudes):
            print(city, name)
        
            # create the API request URL
            #url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, radius, LIMIT)
            url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            lat,
            lng,
            VERSION,
            radius,
            LIMIT)
            
            # make the GET request
            print(url)
            #results = requests.get(url).json()["response"]['groups'][0]['items']
            resp = requests.get(url)
            print(resp.status_code)
            results = resp.json()["response"]['groups'][0]['items']
            
            # return only relevant information for each nearby venue
            venues_list.append([(
                city,
                name,
                lat,
                lng,
                v['venue']['name'],
                v['venue']['location']['lat'],
                v['venue']['location']['lng'], 
                v['venue']['categories'][0]['name']) for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['City','Neighborhood',
                  'Neighborhood Latitude',
                  'Neighborhood Longitude',
                  'Venue',
                  'Venue Latitude',
                  'Venue Longitude',
                  'Venue Category']
        nearby_venues.to_csv(df_4square_venues_file, index=False)
        
        return(nearby_venues)

In [133]:
if not os.path.exists(df_4square_venues_file):
    print("getting from net")
    nyc_sfo_venues = getNearbyVenues(cities=df['City'], names=df['Neighborhood'], latitudes=df['Latitude'], longitudes=df['Longitude'])
else:
    print("getting from local disk")
    nyc_sfo_venues = pd.read_csv(df_4square_venues_file)

getting from net
New York Richmond Valley
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5201055,-74.2293113&v=20180604&radius=500&limit=100
200
New York Pleasant Plains
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5239944,-74.2156999&v=20180604&radius=500&limit=100
200
New York Prince's Bay
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.528994399999995,-74.197644&v=20180604&radius=500&limit=100
200
New York Charleston
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5367719,-74.237367200000

200
New York Travis
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5931598,-74.1879218&v=20180604&radius=500&limit=100
200
New York Arverne
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5934173,-73.7895462&v=20180604&radius=500&limit=100
200
New York New Springville
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5934376,-74.163199&v=20180604&radius=500&limit=100
200
New York Marine Park
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.5955682,-73.91661010799871&v=20180604&radius=500&limit=100

200
New York Bay Ridge
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.630188000000004,-74.0271902&v=20180604&radius=500&limit=100
200
New York Elm Park
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6314925,-74.14875440000002&v=20180604&radius=500&limit=100
200
New York Arlington
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6323257,-74.1651437&v=20180604&radius=500&limit=100
200
New York Port Richmond
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6331592,-74.1365318&v=20180604&radius=500&

200
New York Prospect Heights
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6778708,-73.9684725&v=20180604&radius=500&limit=100
200
New York Springfield Gardens
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.678159,-73.746521&v=20180604&radius=500&limit=100
200
New York Carroll Gardens
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6784201,-73.9948021&v=20180604&radius=500&limit=100
200
New York Gowanus
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.6791695,-73.9885041&v=20180604&radius=500

200
New York Middle Village
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.7182153,-73.8786698186696&v=20180604&radius=500&limit=100
200
New York Little Italy
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.71927279999999,-73.9982152&v=20180604&radius=500&limit=100
200
New York SoHo
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.722879999999996,-73.9987505&v=20180604&radius=500&limit=100
200
New York Rego Park
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.72293705,-73.86220651513041&v=201806

200
New York Murray Hill
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.7481574,-73.97875&v=20180604&radius=500&limit=100
200
New York Tudor City
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.7486231,-73.97138890000001&v=20180604&radius=500&limit=100
200
New York Turtle Bay
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.7534666,-73.96886590000001&v=20180604&radius=500&limit=100
200
New York Oakland Gardens
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.753991,-73.765966&v=20180604&radius=50

200
New York Port Morris
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.801514700000006,-73.9095811&v=20180604&radius=500&limit=100
200
New York Harlem
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8078786,-73.94541540000002&v=20180604&radius=500&limit=100
200
New York Mott Haven
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.808989700000005,-73.92291469999999&v=20180604&radius=500&limit=100
200
New York Harding Park
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8094444,-73.85638890000001&

200
New York Pelham Parkway
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8592334,-73.8550166&v=20180604&radius=500&limit=100
200
New York Fordham
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8592667,-73.8984694&v=20180604&radius=500&limit=100
200
New York University Heights
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8601,-73.90930300000001&v=20180604&radius=500&limit=100
200
New York Baychester
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=40.8612222,-73.8412865&v=20180604&radius=500&l

200
San Francisco Lakeside
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.730753899999996,-122.4754281&v=20180604&radius=500&limit=100
200
San Francisco Lakeshore
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.7330369,-122.490249388431&v=20180604&radius=500&limit=100
200
San Francisco Glen Park
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.7331037,-122.4338051&v=20180604&radius=500&limit=100
200
San Francisco Silver Terrace
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.73385379999999,-122.

200
San Francisco Hayes Valley
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.7766853,-122.4229361&v=20180604&radius=500&limit=100
200
San Francisco North of Panhandle
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.7784081,-122.4425992&v=20180604&radius=500&limit=100
200
San Francisco Lone Mountain
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.7790963,-122.4519159&v=20180604&radius=500&limit=100
200
San Francisco Sunnyside
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.779280799999995,-122.

200
San Francisco Marina District
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.8029843,-122.43747150000002&v=20180604&radius=500&limit=100
200
San Francisco Buena Vista
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.8065321,-122.42064850000001&v=20180604&radius=500&limit=100
200
San Francisco Fisherman's Wharf
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.8091669,-122.4165994&v=20180604&radius=500&limit=100
200
San Francisco Treasure Island
https://api.foursquare.com/v2/venues/explore?client_id=HRZNHDDCZQJ4U4NWRR1LYPJJVSYSPJCAHUW22JOZADGFOVL1&client_secret=0THVETABYGSJYWMMTFCMLWJQNKJ4GT42DVDSIUDMYQC2YMEG&ll=37.8

In [134]:
nyc_sfo_venues.head()

Unnamed: 0,City,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,New York,Richmond Valley,40.520105,-74.229311,Tottenville Hot Bagels & Deli,40.516582,-74.232949,Deli / Bodega
1,New York,Richmond Valley,40.520105,-74.229311,Chase Bank,40.517461,-74.233904,Bank
2,New York,Richmond Valley,40.520105,-74.229311,MTA SIR - Richmond Valley,40.519677,-74.228854,Train Station
3,New York,Richmond Valley,40.520105,-74.229311,Burger King,40.518076,-74.234167,Fast Food Restaurant
4,New York,Richmond Valley,40.520105,-74.229311,MTA Bus - Outerbridge & Park and Ride (X17/X22...,40.522675,-74.224768,Bus Stop


In [135]:
nyc_sfo_venues.tail()

Unnamed: 0,City,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
13107,San Francisco,Treasure Island,37.823354,-122.370153,Harvey's Gourmet Mini Doughnuts stand,37.821078,-122.374524,Donut Shop
13108,San Francisco,Treasure Island,37.823354,-122.370153,Golden Gate Rugby Club,37.820999,-122.365793,Rugby Pitch
13109,San Francisco,Treasure Island,37.823354,-122.370153,The Great Lawn at Treasure Island,37.821363,-122.374829,Park
13110,San Francisco,Treasure Island,37.823354,-122.370153,Gaelic Football Field,37.82746,-122.371548,Athletics & Sports
13111,San Francisco,Treasure Island,37.823354,-122.370153,Treasure Island YMCA,37.826408,-122.368703,Gym


In [136]:
nyc_sfo_venues.shape

(13112, 8)