In [87]:
import pandas as pd
import requests
from time import sleep

import sqlite3
import orjson


# Park type clustering based the venues nearby

Use FourSquare Place API to explore popular venues near the park (identified using latitude and longitude). The parks are then grouped based on similarity among their nearby venues.  

In [4]:
# load philly park info (philly park and state park)
conn = sqlite3.connect('philly_parks_info.db')
philly_parks = pd.read_sql_query("SELECT id, name, lat, lng FROM info", conn)
conn.close()

# philly_parks = philly_parks.set_index('id')

In [16]:
philly_parks.head()

Unnamed: 0,id,name,lat,lng
0,ChIJAWkAqNL1t4kRlm4slspOSXo,Quiet Waters Park,38.93767,-76.500899
1,ChIJzcow6Xb1t4kRQVE7s1AWWr8,Hillsmere Shores Community Beach,38.927212,-76.49415
2,ChIJ_bk5K1z1t4kRQNverIUOVko,Quiet Waters Dog Beach,38.930202,-76.508341
3,ChIJ_-2cQEX1t4kRoiCrmNMbrQ8,Quiet Waters Dog Park,38.931865,-76.505658
4,ChIJQWcsgIn2t4kRCL0Ub363FyI,Pip Moyer Recreation Center (Annapolis Recreat...,38.963271,-76.50515


In [18]:
philly_parks.iloc[1][['lat', 'lng']]

lat    38.9272
lng   -76.4942
Name: 1, dtype: object

0       38.937670
1       38.927212
2       38.930202
3       38.931865
4       38.963271
          ...    
1651    40.071413
1652    40.163117
1653    40.154833
1654    40.330333
1655    40.368272
Length: 1656, dtype: float64

In [11]:
# get fourSquare config ids
fsqr_config = pd.read_csv('foursquare_config.txt', header=None)
CLIENT_ID = fsqr_config.iloc[1][0]
CLIENT_SECRET = fsqr_config.iloc[3][0]
VERSION = '20200905' 
LIMIT = 500

In [37]:
# explore popular venues near the park location (ll)
def fsq_explore(ll):
    lat = ll[0]
    lng = ll[1]
    radius = 5000 # meters, max 100,000 meters; if 25000 m (about half hour drive at 30 mile/h)
    LIMIT = 500
    SECTION = ''
    # create the API request URL for venue recommendation
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section={}&time=any&day=any'.format(
        CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT, SECTION)

    # make the GET request
    results = requests.get(url).json()#["response"]['groups'][0]['items']
    return results

In [85]:
# example
results = fsq_explore(philly_parks.iloc[1][['lat', 'lng']])
print('total venues: ' + str(len(results['response']['groups'][0]['items'])))
for i in range(len(results['response']['groups'][0]['items'])):
    print(results['response']['groups'][0]['items'][i]['venue']['name'] + ': ' + 
         results['response']['groups'][0]['items'][i]['venue']['categories'][0]['name'] + 
         str(results['response']['groups'][0]['items'][i]['venue']['location']['distance']))

total venues: 35
Quiet Waters Park: Park1439
Bay Ridge Wine & Spirits: Wine Bar2450
Rocco's Pizza: Pizza Place2445
Annapolis Smokehouse and Tavern: BBQ Joint2469
Bark 'n' Bean: Pet Store2459
Caliente Grill: Mexican Restaurant2397
Main Ingredient: American Restaurant2451
Quiet Waters Dog Park: Dog Run1084
Oyster Harbor Beach: Beach2208
Old Stein Inn: German Restaurant3793
Vin909: Wine Bar4435
Bakers & co.: Bakery4687
Bruster's Real Ice Cream: Ice Cream Shop4076
Davis' Pub: Pub4818
Sammy's Italian Pizza Kitchen: Italian Restaurant4254
Sweet Hearts Patisserie: Bakery4032
Bayside Inn: Restaurant4283
Annapolis Seafood Markets: Seafood Restaurant3362
Grapes: Wine Bar4010
Historic London Town and Gardens: Garden4338
Wine Cellars of Annapolis: Wine Shop4016
Zumiez: Clothing Store3149
Triton Beach: Beach4868
"Pip" Moyer Recreation Center: Gym4094
Redbox: Video Store4262
7-Eleven: Convenience Store4267
Annapolis wine and spirits: Liquor Store3330
Sam's Supermarket: Grocery Store3457
Panda Noodle

In [56]:
results['response']['groups'][0]['items'][1]['venue']['categories']

[{'id': '4bf58dd8d48988d123941735',
  'name': 'Wine Bar',
  'pluralName': 'Wine Bars',
  'shortName': 'Wine Bar',
  'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/winery_',
   'suffix': '.png'},
  'primary': True}]

In [None]:
nearby_venues_query('nyc_parks_info.db', 'nyc_park_nearby_venues_raw.db')

In [111]:
# philly_parks[['lat', 'lng']].apply(fsq_explore, axis=1)
# save query results in database

def nearby_venues_query(input_db, output_db):
    # input_db = 'philly_parks_info.db'; output_db = 'philly_park_nearby_venues_raw.db'
    in_conn = sqlite3.connect(input_db)
    out_conn = sqlite3.connect(output_db)
    out_conn.execute('''CREATE TABLE nearby_venues_raw (id text, name text, lat real, lng real, results blob)''')

    for ind, row in enumerate(in_conn.execute("SELECT id, name, lat, lng FROM info")):
        sleep(0.5)

        results = fsq_explore(row[2:])

        # insert place_details, curr_popularity, has_popular_times
        to_insert_values = [(row[0], row[1], row[2], row[3], orjson.dumps(results))]
        out_conn.executemany('''INSERT INTO nearby_venues_raw VALUES (?, ?, ?, ?, ?)''', to_insert_values)

        out_conn.commit()

    in_conn.close()
    out_conn.close()

In [109]:
conn = sqlite3.connect('philly_park_nearby_venues_raw.db')
test = pd.read_sql_query("SELECT * FROM nearby_venues_raw", conn)
conn.close()
test.head()

Unnamed: 0,id,name,lat,lng,results
0,ChIJAWkAqNL1t4kRlm4slspOSXo,Quiet Waters Park,38.93767,-76.500899,"b'{""meta"":{""code"":200,""requestId"":""606776a0225..."
1,ChIJzcow6Xb1t4kRQVE7s1AWWr8,Hillsmere Shores Community Beach,38.927212,-76.49415,"b'{""meta"":{""code"":200,""requestId"":""606776a1b26..."
2,ChIJ_bk5K1z1t4kRQNverIUOVko,Quiet Waters Dog Beach,38.930202,-76.508341,"b'{""meta"":{""code"":200,""requestId"":""606776a2808..."


In [108]:
orjson.loads(test.iloc[0]['results'])['response']['groups'][0]['items']

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4b630dbef964a52002602ae3',
   'name': 'Quiet Waters Park',
   'location': {'address': '600 Quiet Waters Park Rd',
    'lat': 38.93776991815253,
    'lng': -76.50374019238062,
    'labeledLatLngs': [{'label': 'display',
      'lat': 38.93776991815253,
      'lng': -76.50374019238062}],
    'distance': 246,
    'postalCode': '21403',
    'cc': 'US',
    'city': 'Annapolis',
    'state': 'MD',
    'country': 'United States',
    'formattedAddress': ['600 Quiet Waters Park Rd',
     'Annapolis, MD 21403',
     'United States']},
   'categories': [{'id': '4bf58dd8d48988d163941735',
     'name': 'Park',
     'pluralName': 'Parks',
     'shortName': 'Park',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []

#### venue categories - convert to the top category (10 categ in total)

In [44]:
# Build a dictionary for all possible fsq category ids, and their value will be the corresponding top category.

url = 'https://api.foursquare.com/v2/venues/categories?client_id={}&client_secret={}&v={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION)
all_categ = requests.get(url).json()

# find the top level category for each categ id
# features are simplified by only considering the 10 top categ
def create_categ_mapping(all_categ):    
    categ_mapping = {}
    categ_parent = {}
    def find_categ(node, label, parent):
        # map id to label
        categ_mapping[node['id']] = label
        categ_parent[node['id']] = parent

        for child in node.get('categories', []): # if at leaf, categories is empty
            find_categ(child, label, node)
        
            
    for itopc in range(10):
        root = all_categ['response']['categories'][itopc]
        find_categ(root, root['name'], None)
        
    return categ_mapping, categ_parent

categ_mapping, categ_parent = create_categ_mapping(all_categ)


In [57]:
categ_keys = []
for itopc in range(10):
    categ_keys.append(all_categ['response']['categories'][itopc]['name'])
categ_keys

['Arts & Entertainment',
 'College & University',
 'Event',
 'Food',
 'Nightlife Spot',
 'Outdoors & Recreation',
 'Professional & Other Places',
 'Residence',
 'Shop & Service',
 'Travel & Transport']

#### extract feature - number of nearby venues per categ

In [58]:

venue_stat = { k: 0 for k in categ_keys }

for item in results['response']['groups'][0]['items']:
    venue_top_categ = categ_mapping[item['venue']['categories'][0]['id']]
    venue_stat[venue_top_categ] += 1    




In [59]:
venue_stat

{'Arts & Entertainment': 0,
 'College & University': 0,
 'Event': 0,
 'Food': 14,
 'Nightlife Spot': 4,
 'Outdoors & Recreation': 8,
 'Professional & Other Places': 1,
 'Residence': 0,
 'Shop & Service': 8,
 'Travel & Transport': 0}

In [None]:
# TODO:
# foursquare sub-category id [detailed type info for each venue, however need to identify the park..]
# other type feature - whether it is a state park
