# Clustering and Scoring Job Relocation Opportunities - Playground Notebook

Austin Rainwater

---

# Initialization

In [1]:
!pip install --quiet --upgrade sqlalchemy pymysql

from urllib.parse import quote as url_encode

import pandas as pd
import numpy as np
import aiohttp
import asyncio
import requests
import xml.etree.ElementTree as xml

from concurrent.futures import ProcessPoolExecutor

from pandas import json_normalize
from itertools import product

from sqlalchemy import (
    create_engine,
    Table,
    Column,
    MetaData,
    String,
    Numeric,
    Integer
)

import yaml

with open('secrets.yaml', 'r') as secrets_file:
    secrets = yaml.safe_load(secrets_file)
    
header = {"User-Agent": 
          'datascience jupyter notebook/0.0 '
          '(https://github.com/pacorain/datascience-certification-final-project; '
          'Austin Rainwater, paco@heckin.io)'}
v = '20201108'

---

# City Definition

Obviously, a good place for me to start is with some cities. Below is the table definition for the cities I will be exploring and their specific traits.

In [2]:
engine = create_engine(secrets['db_connection_string'], echo=True)

In [3]:
meta = MetaData()

cities = Table(
    'city', meta,
    Column('city_name', String(50), primary_key=True, comment='Community Name'),
    Column('metro_name', String(50), comment='Metropolitan Area Name'),
    Column('state', String(2), nullable=False, comment='2-Letter abbreviation of State'),
    Column('lat', Numeric(10, 6), nullable=False, comment='Latitude of City'),
    Column('lng', Numeric(10, 6), nullable=False, comment='Longitude of City'),
    Column('area_val', Numeric(10, 4), nullable=False, comment='Area of city in square miles'),
    Column('total_pop', Integer, nullable=False, comment='Total population of city')
)

In [4]:
meta.drop_all(engine) # During development
meta.create_all(engine)

2020-12-06 01:17:24,564 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'sql_mode'
2020-12-06 01:17:24,565 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:24,572 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2020-12-06 01:17:24,573 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:24,585 INFO sqlalchemy.engine.base.Engine SELECT DATABASE()
2020-12-06 01:17:24,586 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:24,590 INFO sqlalchemy.engine.base.Engine show collation where `Charset` = 'utf8mb4' and `Collation` = 'utf8mb4_bin'
2020-12-06 01:17:24,590 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:24,596 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS CHAR(60)) AS anon_1
2020-12-06 01:17:24,597 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:24,607 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS CHAR(60)) AS anon_1
2020-12-06 01:17:24,608 INFO sqlalchemy.engine.base.E

Let's start with my birthplace: Fort Wayne, Indiana.

In [5]:
new_city = cities.insert()

try:
    engine.execute(new_city, [
        {'city_name': 'Fort Wayne, IN', 'metro_name': 'Fort Wayne, IN', 'state': 'IN'}
    ])
except:
    print("Oops! That didn't work.")

2020-12-06 01:17:24,801 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state) VALUES (%(city_name)s, %(metro_name)s, %(state)s)
2020-12-06 01:17:24,802 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne, IN', 'metro_name': 'Fort Wayne, IN', 'state': 'IN'}
2020-12-06 01:17:24,806 INFO sqlalchemy.engine.base.Engine ROLLBACK
Oops! That didn't work.


Ah, the table requires some more data to be able to insert the record. I could use the geocoder library from before to get the latitude and longitude, but since I will be using Wikipedia anyway, let's see if I can grab it from there.

I did some experimenting with the [Wikipedia API Sandbox](https://en.wikipedia.org/wiki/Special:ApiSandbox#action=parse&format=json&page=Fort%20Wayne%2C%20Indiana&redirects=1&prop=wikitext), and oddly enough while there are multiple endpoints capable of getting the _names_ of the templates used in a page, I could not for the life of me find a way to get the _data inserted to_ the templates in an easy format such as JSON. So instead, I'm going to grab the `parsetree` and parse it with Python's XML libraries.

In [6]:
city_name = 'Fort Wayne'
state_name = 'IN'

wikipedia_url = 'https://en.wikipedia.org/w/api.php'
params = {
    "action": "parse",
    "format": "json",
    "redirects": "1",
    "page": f"{city_name}, {state_name}",
    "prop": "parsetree"
}

response = requests.get(wikipedia_url, params=params, headers=header).json()['parse']['parsetree']['*']
response = xml.canonicalize(response, strip_text=True)

# Write XML data for local exploration
with open('data/fort_wayne.xml', 'w') as xml_file:
    xml_file.write(response)

Ah, going through the XML file, the map on the Wikipedia article is an SVG (i.e. an image, not something that contains computer-readable geographic data), so I will need to use a geocoder. 

I recall from the previous lab that when you grab data from Foursquare's API, it will geocode the 'near' parameter and return the latitude and logitude used.

I also want to include the total size of the city, so in order to enter data into the table, I need to grab data from Wikipedia _and_ Foursquare. Which is fine, because I need more data to explore possible features

In [7]:
wiki_data = xml.fromstring(response)

In [8]:
foursquare_url = "https://api.foursquare.com/v2/venues/explore"

params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'limit': '50',
    'v': v,
    'near': 'Fort Wayne, IN',
    'radius': 1000,
    'time': 'any', 
    'day': 'any',
    'sortByPopularity': '1'
}

foursquare_response = requests.get(foursquare_url, params=params, headers=header).json()['response']

In [9]:
def template_value(wiki_data, template_title, part_name):
    template = wiki_data.find(".//template[title='{}']".format(template_title))
    return template.find(".part[name='{}'].value".format(part_name)).text

lat = float(foursquare_response['geocode']['center']['lat'])
lng = float(foursquare_response['geocode']['center']['lng'])
sq_mi = float(template_value(wiki_data, "Infobox settlement", "area_total_sq_mi"))
total_pop = int(template_value(wiki_data, "Infobox settlement", "population_est"))

Alright, I've gotten the values I need initially for a city; now let's try inserting it.

In [10]:
engine.execute(new_city, [{
    'city_name': 'Fort Wayne', 
    'metro_name': 'Fort Wayne', 
    'state': 'IN', 
    'lat': lat,
    'lng': lng,
    'area_val': sq_mi,
    'total_pop': total_pop
}])

2020-12-06 01:17:26,243 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state, lat, lng, area_val, total_pop) VALUES (%(city_name)s, %(metro_name)s, %(state)s, %(lat)s, %(lng)s, %(area_val)s, %(total_pop)s)
2020-12-06 01:17:26,244 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne', 'metro_name': 'Fort Wayne', 'state': 'IN', 'lat': 41.1306, 'lng': -85.12886, 'area_val': 110.79, 'total_pop': 270402}
2020-12-06 01:17:26,248 INFO sqlalchemy.engine.base.Engine COMMIT


<sqlalchemy.engine.result.ResultProxy at 0x7fbbe25d0cd0>

In [11]:
query = cities.select()

pd.read_sql(query, engine)

2020-12-06 01:17:26,284 INFO sqlalchemy.engine.base.OptionEngine SELECT city.city_name, city.metro_name, city.state, city.lat, city.lng, city.area_val, city.total_pop 
FROM city
2020-12-06 01:17:26,285 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0,city_name,metro_name,state,lat,lng,area_val,total_pop
0,Fort Wayne,Fort Wayne,IN,41.1306,-85.12886,110.79,270402


Not bad. 

Next, I want to grab some data from Foursquare to build a feature based on what's popular within 1, 5, 25, and 100 km. I'll use the category hierarchy like I did in the week 3 lab. Given that the Foursquare API allows for 99,500 of these calls a day, and up to 5,000 per hour, I can also do this comfortably with each section defined in the `venues/explore` enpoint to see how much variety is in each section in an area.

In [12]:
url = 'https://api.foursquare.com/v2/venues/categories'
params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'v': v
}
foursquare_categories = requests.get(url, params=params).json()

def category_hier(categories, prefix=[]):
    result = []
    
    for category in categories:
        category = json_normalize(category).iloc[0]
        current_category = pd.Series(
            data=prefix + [category.shortName] + [np.nan] * (4 - len(prefix)),
            name=str(category.id),
            index=[
                'cat_level_1',
                'cat_level_2',
                'cat_level_3',
                'cat_level_4',
                'cat_level_5'
            ]
        )
        result.append(current_category)
        if subcategories := category.categories:
            result += category_hier(subcategories, prefix + [category.shortName])
            
    return result

categories = foursquare_categories['response']['categories']
category_df = pd.DataFrame(category_hier(categories))

In [13]:
radii = [1000, 5000, 25000, 100000]
sections = ['food', 'drinks', 'coffee', 'shops', 'arts', 'outdoors', 'sights', 'trending', 'topPicks']

async def get_popular_spots(city):
    """
    Get popular spots in various "sections" within various distances of `city`
    """
    async with aiohttp.ClientSession() as session:
        tasks = []
        for r, s in product(radii, sections):
            task = query_places(session, city, r, section=s)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
    return pd.concat(results, ignore_index=True)
    
    
async def query_places(session, location, radius, section='', query=''):
    """
    With an existing HTTP `session`, get popular spots of the type `section` within `radius` meters of `location`
    
    Uses multiprocessing for quicker processing of the 36 times this function is called
    """
    async with session.get("https://api.foursquare.com/v2/venues/explore", params={
        'client_id': secrets['4SQ_CLIENT_ID'],
        'client_secret': secrets['4SQ_CLIENT_SECRET'],
        'limit': '50',
        'v': v,
        'near': location,
        'radius': radius, 
        'section': section,
        'query': query,
        'sortByPopularity': 1
    }) as result:
        data = await result.json()
    loop = asyncio.get_running_loop()
    venues = await loop.run_in_executor(executor, normalize_foursquare_response, data)
    if venues is not None:
        venues['city'] = location
        venues['radius'] = radius
        if section:
            venues['section'] = section
        if query:
            venues['query'] = query
    return venues
    
    
def normalize_foursquare_response(data):
    """
    Converts the Foursquare response into a dataframe with all of the venues, as well geolocation metadata.
    """
    if 'groups' not in data['response']:
        return None
    venues = json_normalize(data, ['response', 'groups', 'items'], sep='_')
    geo = json_normalize(data['response']['geocode'], sep='_').loc[0] # json_normalize returns single-index df
    geo.index = pd.Index(f'geo_{name}' for name in geo.index)
    venues.loc[:, geo.index] = geo.values
    venues['search_popularity'] = venues.index.values
    return venues


In [32]:
executor = ProcessPoolExecutor()
places_df = await get_popular_spots('Fort Wayne, IN')
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,...,geo_geometry_bounds_sw_lng,search_popularity,city,radius,section,venue_venuePage_id,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary
0,e-3-4b5a3e80f964a5201ab728e3-0,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",46825,...,-85.303308,0,"Fort Wayne, IN",1000,food,,,,,
1,e-3-4b86ebf4f964a5200ba631e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b86ebf4f964a5200ba631e3,Papa John's Pizza,5626 Coldwater Rd,41.130839,-85.135060,"[{'label': 'display', 'lat': 41.13083878708079...",46825,...,-85.303308,1,"Fort Wayne, IN",1000,food,,,,,
2,e-3-4b26e239f964a5207c8224e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b26e239f964a5207c8224e3,Agaves Mexican Grill,211 E Washington Center Rd,41.132293,-85.138746,"[{'label': 'display', 'lat': 41.13229322911061...",46825,...,-85.303308,2,"Fort Wayne, IN",1000,food,,,,,
3,e-3-4bfec4844e5d0f47a7207d1f-3,0,"[{'summary': 'This spot is popular', 'type': '...",4bfec4844e5d0f47a7207d1f,Wendy’s,5701 Coldwater Rd,41.130943,-85.136557,"[{'label': 'display', 'lat': 41.13094264479011...",46825,...,-85.303308,3,"Fort Wayne, IN",1000,food,,,,,
4,e-3-4b5f57bff964a5201db529e3-4,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",46825,...,-85.303308,4,"Fort Wayne, IN",1000,food,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1514,e-1-4cb7291c9c7ba35db4069706-45,0,"[{'summary': 'This spot is popular', 'type': '...",4cb7291c9c7ba35db4069706,Martin's Super Market,3900 E Bristol St,41.704659,-85.915933,"[{'label': 'display', 'lat': 41.70465911291793...",46514,...,-85.303308,45,"Fort Wayne, IN",100000,topPicks,,,,,
1515,e-1-4d2dee20774f76eb80009633-46,0,"[{'summary': 'This spot is popular', 'type': '...",4d2dee20774f76eb80009633,Regal American Mall,2830 West Elm Street,40.738443,-84.156591,"[{'label': 'display', 'lat': 40.738443, 'lng':...",45805,...,-85.303308,46,"Fort Wayne, IN",100000,topPicks,,,,,
1516,e-1-4bfe7430e529c9285956bc8c-47,0,"[{'summary': 'This spot is popular', 'type': '...",4bfe7430e529c9285956bc8c,El Camino Real,290 County Road 6,41.724634,-85.974822,"[{'label': 'display', 'lat': 41.72463431636958...",46514,...,-85.303308,47,"Fort Wayne, IN",100000,topPicks,,,,,
1517,e-1-4c07c0e8271dc9b67c4a2b9a-48,0,"[{'summary': 'This spot is popular', 'type': '...",4c07c0e8271dc9b67c4a2b9a,Chalet Party Shoppe,1800 Cassopolis St,41.708729,-85.971732,"[{'label': 'display', 'lat': 41.70872910432653...",46514,...,-85.303308,48,"Fort Wayne, IN",100000,topPicks,,,,,


In [33]:
print(f"{len(places_df.venue_id.unique())} unique venues")

792 unique venues


Cool, that will give me the ability to get an idea of what we can do on an evening or a weekend. 

Let's add in the venue category hierarchy.

In [34]:
def get_categories(row):
    if not row.venue_categories:
        return pd.Series(
            [np.nan] * 5,
            category_df.columns,
            name=row.name
        )
    return category_df.loc[row.venue_categories[0]['id']]

places_df = places_df.merge(
    places_df.apply(get_categories, axis=1), 
    left_index=True,
    right_index=True
)
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,...,venue_venuePage_id,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary,cat_level_1,cat_level_2,cat_level_3,cat_level_4,cat_level_5
0,e-3-4b5a3e80f964a5201ab728e3-0,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",46825,...,,,,,,Food,Steakhouse,,,
1,e-3-4b86ebf4f964a5200ba631e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b86ebf4f964a5200ba631e3,Papa John's Pizza,5626 Coldwater Rd,41.130839,-85.135060,"[{'label': 'display', 'lat': 41.13083878708079...",46825,...,,,,,,Food,Pizza,,,
2,e-3-4b26e239f964a5207c8224e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b26e239f964a5207c8224e3,Agaves Mexican Grill,211 E Washington Center Rd,41.132293,-85.138746,"[{'label': 'display', 'lat': 41.13229322911061...",46825,...,,,,,,Food,Mexican,,,
3,e-3-4bfec4844e5d0f47a7207d1f-3,0,"[{'summary': 'This spot is popular', 'type': '...",4bfec4844e5d0f47a7207d1f,Wendy’s,5701 Coldwater Rd,41.130943,-85.136557,"[{'label': 'display', 'lat': 41.13094264479011...",46825,...,,,,,,Food,Fast Food,,,
4,e-3-4b5f57bff964a5201db529e3-4,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",46825,...,,,,,,Food,Steakhouse,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1514,e-1-4cb7291c9c7ba35db4069706-45,0,"[{'summary': 'This spot is popular', 'type': '...",4cb7291c9c7ba35db4069706,Martin's Super Market,3900 E Bristol St,41.704659,-85.915933,"[{'label': 'display', 'lat': 41.70465911291793...",46514,...,,,,,,Shops,Food & Drink,Supermarket,,
1515,e-1-4d2dee20774f76eb80009633-46,0,"[{'summary': 'This spot is popular', 'type': '...",4d2dee20774f76eb80009633,Regal American Mall,2830 West Elm Street,40.738443,-84.156591,"[{'label': 'display', 'lat': 40.738443, 'lng':...",45805,...,,,,,,Arts & Entertainment,Movie Theater,,,
1516,e-1-4bfe7430e529c9285956bc8c-47,0,"[{'summary': 'This spot is popular', 'type': '...",4bfe7430e529c9285956bc8c,El Camino Real,290 County Road 6,41.724634,-85.974822,"[{'label': 'display', 'lat': 41.72463431636958...",46514,...,,,,,,Food,Mexican,,,
1517,e-1-4c07c0e8271dc9b67c4a2b9a-48,0,"[{'summary': 'This spot is popular', 'type': '...",4c07c0e8271dc9b67c4a2b9a,Chalet Party Shoppe,1800 Cassopolis St,41.708729,-85.971732,"[{'label': 'display', 'lat': 41.70872910432653...",46514,...,,,,,,Shops,Food & Drink,Liquor Store,,


Let's pull out the columns that would be helpful in creating or visualizing features.

In [35]:
columns = [
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 
    'venue_location_crossStreet', 'venue_delivery_id', 'search_popularity', 
    'geo_where', 'geo_slug', 'geo_longId', 'geo_center_lat', 
    'geo_center_lng', 'city', 'radius', 'section', 'cat_level_1', 
    'cat_level_2', 'cat_level_3', 'cat_level_4'
]

places_df[columns]

Unnamed: 0,venue_id,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,search_popularity,geo_where,geo_slug,geo_longId,geo_center_lat,geo_center_lng,city,radius,section,cat_level_1,cat_level_2,cat_level_3,cat_level_4
0,4b5a3e80f964a5201ab728e3,BakerStreet,41.122200,-85.125421,,1502561,0,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
1,4b86ebf4f964a5200ba631e3,Papa John's Pizza,41.130839,-85.135060,,2413959,1,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Pizza,,
2,4b26e239f964a5207c8224e3,Agaves Mexican Grill,41.132293,-85.138746,Coldwater Road,,2,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Mexican,,
3,4bfec4844e5d0f47a7207d1f,Wendy’s,41.130943,-85.136557,,1682119,3,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Fast Food,,
4,4b5f57bff964a5201db529e3,Cork 'n Cleaver,41.132858,-85.138249,Coldwater Rd,,4,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1514,4cb7291c9c7ba35db4069706,Martin's Super Market,41.704659,-85.915933,Cobblestone Blvd,,45,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Shops,Food & Drink,Supermarket,
1515,4d2dee20774f76eb80009633,Regal American Mall,40.738443,-84.156591,,,46,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Arts & Entertainment,Movie Theater,,
1516,4bfe7430e529c9285956bc8c,El Camino Real,41.724634,-85.974822,,1607863,47,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,Mexican,,
1517,4c07c0e8271dc9b67c4a2b9a,Chalet Party Shoppe,41.708729,-85.971732,at Woodlawn Ave,,48,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Shops,Food & Drink,Liquor Store,


Finally, let's put these results in some tables.

In [18]:
meta = MetaData()

search_results_df = places_df[['venue_id', 'city', 'radius', 'section', 'search_popularity']]
venue_data_df = places_df[[
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 'venue_location_crossStreet',
    'venue_delivery_id', 'cat_level_1', 'cat_level_2', 'cat_level_3', 'cat_level_4'
]].drop_duplicates('venue_id')

with engine.begin() as conn:
    venue_searches = Table(
        "venue_searches", meta,
        Column('id', Integer, primary_key=True, comment='Venue search ID'),
        Column('venue_id', String(24), nullable=False, comment='Foursquare Venue ID'),
        Column('city', String(128), nullable=False, comment='Search City'),
        Column('radius', Integer, nullable=False, comment='Radius in meters'),
        Column('section', String(20), nullable=False, comment='Search section'),
        Column('search_popularity', Integer, nullable=False, comment='Popularity in search results')
    )

    venue_data = Table(
        'venue_data', meta,
        Column('venue_id', String(24), primary_key=True, comment='Foursquare Venue ID'),
        Column('venue_name', String(255), nullable=False, comment='Venue name'),
        Column('venue_location_lat', Numeric(10, 6), nullable=False, comment='Venue Location Latitude'),
        Column('venue_location_lng', Numeric(10, 6), nullable=False, comment='Venue Location Longitude'),
        Column('venue_location_crossStreet', String(255), comment='Street Intersection of Venue Location'),
        Column('venue_delivery_id', String(40), comment='Venue Delivery Identifier'),
        Column('cat_level_1', String(50), nullable=False, comment='Level 1 Category Name'),
        Column('cat_level_2', String(50), comment='Level 2 Category Name'),
        Column('cat_level_3', String(50), comment='Level 3 Category Name'),
        Column('cat_level_4', String(50), comment='Level 4 Category Name')
    )

    meta.drop_all(conn) # During development
    meta.create_all(conn)

    search_results_df.to_sql('venue_searches', conn, if_exists='append', index=False)
    venue_data_df.to_sql('venue_data', conn, if_exists='append', index=False)

2020-12-06 01:17:30,131 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-12-06 01:17:30,134 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_searches`
2020-12-06 01:17:30,135 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,142 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_data`
2020-12-06 01:17:30,143 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,152 INFO sqlalchemy.engine.base.Engine 
DROP TABLE venue_data
2020-12-06 01:17:30,153 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,176 INFO sqlalchemy.engine.base.Engine 
DROP TABLE venue_searches
2020-12-06 01:17:30,177 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,203 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_searches`
2020-12-06 01:17:30,204 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,208 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_data`
2020-12-06 01:17:30,209 INFO sqlalchemy.engine.base.Engine {}
2020-12-06 01:17:30,215 INFO sqlalchemy.engine.base.

In [19]:
pd.read_sql(venue_searches.select(), engine, index_col='id')

2020-12-06 01:17:30,666 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_searches.id, venue_searches.venue_id, venue_searches.city, venue_searches.radius, venue_searches.section, venue_searches.search_popularity 
FROM venue_searches
2020-12-06 01:17:30,668 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0_level_0,venue_id,city,radius,section,search_popularity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,4b5a3e80f964a5201ab728e3,"Fort Wayne, IN",1000,food,0
2,4b86ebf4f964a5200ba631e3,"Fort Wayne, IN",1000,food,1
3,4b26e239f964a5207c8224e3,"Fort Wayne, IN",1000,food,2
4,4bfec4844e5d0f47a7207d1f,"Fort Wayne, IN",1000,food,3
5,4b5f57bff964a5201db529e3,"Fort Wayne, IN",1000,food,4
...,...,...,...,...,...
1515,4cb7291c9c7ba35db4069706,"Fort Wayne, IN",100000,topPicks,45
1516,4d2dee20774f76eb80009633,"Fort Wayne, IN",100000,topPicks,46
1517,4bfe7430e529c9285956bc8c,"Fort Wayne, IN",100000,topPicks,47
1518,4c07c0e8271dc9b67c4a2b9a,"Fort Wayne, IN",100000,topPicks,48


In [20]:
pd.read_sql(venue_data.select(), engine, index_col='venue_id')

2020-12-06 01:17:30,745 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_data.venue_id, venue_data.venue_name, venue_data.venue_location_lat, venue_data.venue_location_lng, venue_data.`venue_location_crossStreet`, venue_data.venue_delivery_id, venue_data.cat_level_1, venue_data.cat_level_2, venue_data.cat_level_3, venue_data.cat_level_4 
FROM venue_data
2020-12-06 01:17:30,746 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0_level_0,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,cat_level_1,cat_level_2,cat_level_3,cat_level_4
venue_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4a8731bef964a5202d0320e3,Lake James,41.697409,-85.031737,,,Outdoors & Recreation,Lake,,
4b0618b3f964a52098e822e3,Regal Coldwater Crossing,41.131203,-85.142060,,,Arts & Entertainment,Movie Theater,,
4b12ed20f964a520ff9023e3,Starbucks,41.075410,-85.145640,,1507234,Food,Coffee Shop,,
4b130366f964a520a89223e3,Mad Anthony Brewing Company,41.067643,-85.152640,at Taylor,2274630,Nightlife,Brewery,,
4b1304aaf964a520c29223e3,JK O'Donnell's Irish Pub,41.078097,-85.140302,btw Harrison & Calhoun,,Food,Irish,,
...,...,...,...,...,...,...,...,...,...
5c9530f98c35dc002ce78390,Walmart Grocery Pickup and Delivery,41.128227,-85.137894,,,Shops,Food & Drink,Grocery Store,
5c95310401bc5a002cc1d2ba,Walmart Grocery Pickup & Delivery,41.687057,-86.058655,,,Shops,Food & Drink,Grocery Store,
5cb8ec6f9d7468002c55b86f,Walmart Grocery Pickup,41.362476,-85.080406,,,Shops,Food & Drink,Grocery Store,
5d4df4ed78bb0e0007c07d84,Promenade Park,41.084171,-85.143061,,,Outdoors & Recreation,Park,,


---

## Favorite Venue Types

The other thing I want to look for in cities is places that we know we enjoy. I will still use the `explore` enpoint, but with the `query` parameter.

In [21]:
favorite_venue_types = [
    'hiking trail',
    'bbq',
    'historic sites',
    'park',
    'dog park',
    'british food',
    'irish food',
    'arcade',
    'pizzeria',
    'ice cream shop'
]

In [22]:
async def get_favorite_sites(city):
    """
    Get popular spots of my favorite types in the city `city`
    """
    async with aiohttp.ClientSession() as session:
        tasks = []
        for r, q in product(radii, favorite_venue_types):
            task = query_places(session, city, r, query=q)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
    return pd.concat(results, ignore_index=True)

In [23]:
executor = ProcessPoolExecutor()
favorite_sites = await get_favorite_sites('Fort Wayne, IN')
favorite_sites

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,venue_location_cc,...,city,radius,query,venue_venuePage_id,venue_delivery_id,venue_delivery_url,venue_delivery_provider_name,venue_delivery_provider_icon_prefix,venue_delivery_provider_icon_sizes,venue_delivery_provider_icon_name
0,e-0-4dea1f20b0fb8293f7cb31de-0,0.0,"[{'summary': 'This spot is popular', 'type': '...",4dea1f20b0fb8293f7cb31de,Pufferbelly Trail,41.180053,-85.155595,"[{'label': 'display', 'lat': 41.18005280328018...",46825,US,...,"Fort Wayne, IN",1000,hiking trail,,,,,,,
1,e-0-4c780866a868370430df0b4d-1,0.0,"[{'summary': 'This spot is popular', 'type': '...",4c780866a868370430df0b4d,Parkview Outdoor Trail Head,41.173982,-85.148131,"[{'label': 'display', 'lat': 41.17398188943014...",46825,US,...,"Fort Wayne, IN",1000,hiking trail,,,,,,,
2,e-0-53a98b9d498e0aca74c23326-2,0.0,"[{'summary': 'This spot is popular', 'type': '...",53a98b9d498e0aca74c23326,RiverGreen Way Trail Head,41.087400,-85.048495,"[{'label': 'display', 'lat': 41.0874, 'lng': -...",,US,...,"Fort Wayne, IN",1000,hiking trail,,,,,,,
3,e-0-50561db8e4b017bb26f16c5b-3,0.0,"[{'summary': 'This spot is popular', 'type': '...",50561db8e4b017bb26f16c5b,Safari Trail,41.105984,-85.151028,"[{'label': 'display', 'lat': 41.10598377025788...",46805,US,...,"Fort Wayne, IN",1000,hiking trail,,,,,,,
4,e-0-4d12510e80f6721eb7bf16eb-4,0.0,"[{'summary': 'This spot is popular', 'type': '...",4d12510e80f6721eb7bf16eb,Towpath Trail (Smith Road Trailhead),41.050813,-85.210342,"[{'label': 'display', 'lat': 41.05081337287187...",,US,...,"Fort Wayne, IN",1000,hiking trail,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1122,e-0-4ba02e2df964a520dd5f37e3-45,0.0,"[{'summary': 'This spot is popular', 'type': '...",4ba02e2df964a520dd5f37e3,Payne's Restaurant,40.481977,-85.546167,"[{'label': 'display', 'lat': 40.48197658404458...",46933,US,...,"Fort Wayne, IN",100000,ice cream shop,77441402,,,,,,
1123,e-0-4bcba575fb84c9b690561f3e-46,0.0,"[{'summary': 'This spot is popular', 'type': '...",4bcba575fb84c9b690561f3e,The Chief Ice Cream,41.586222,-85.843135,"[{'label': 'display', 'lat': 41.58622156342362...",46526,US,...,"Fort Wayne, IN",100000,ice cream shop,,1614151,https://www.grubhub.com/restaurant/the-chief-i...,grubhub,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",/delivery_provider_grubhub_20180129.png
1124,e-0-4b8d74cdf964a520cefc32e3-47,0.0,"[{'summary': 'This spot is popular', 'type': '...",4b8d74cdf964a520cefc32e3,La Michoacana,41.087312,-85.145893,"[{'label': 'display', 'lat': 41.087312, 'lng':...",46808,US,...,"Fort Wayne, IN",100000,ice cream shop,,1502829,https://www.grubhub.com/restaurant/la-michoaca...,grubhub,https://fastly.4sqi.net/img/general/cap/,"[40, 50]",/delivery_provider_grubhub_20180129.png
1125,e-0-4c5f4e5990b2c9b692773a22-48,0.0,"[{'summary': 'This spot is popular', 'type': '...",4c5f4e5990b2c9b692773a22,Zesto,41.131710,-85.139029,"[{'label': 'display', 'lat': 41.13171016180453...",46825,US,...,"Fort Wayne, IN",100000,ice cream shop,,,,,,,


In [36]:
def get_categories(row):
    if not row.venue_categories:
        return pd.Series(
            [np.nan] * 5,
            category_df.columns,
            name=row.name
        )
    return category_df.loc[row.venue_categories[0]['id']]

favorite_sites = favorite_sites.merge(
    favorite_sites.apply(get_categories, axis=1), 
    left_index=True,
    right_index=True
)


KeyError: "['section', 'cat_level_3', 'cat_level_2', 'cat_level_1', 'cat_level_4'] not in index"

In [None]:
fave_columns = [
 'venue_id',
 'venue_name',
 'venue_location_lat',
 'venue_location_lng',
 'venue_location_crossStreet',
 'venue_delivery_id',
 'search_popularity',
 'geo_where',
 'geo_slug',
 'geo_longId',
 'geo_center_lat',
 'geo_center_lng',
 'city',
 'radius',
 'section',
 'cat_level_1',
 'cat_level_2',
 'cat_level_3',
 'cat_level_4']

In [None]:
meta = MetaData()

search_results_df = favorite_sites[['venue_id', 'city', 'radius', 'query', 'search_popularity']]
venue_data_df = favorite_sites[[
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 'venue_location_crossStreet',
    'venue_delivery_id', 'cat_level_1', 'cat_level_2', 'cat_level_3', 'cat_level_4'
]].drop_duplicates('venue_id')

with engine.begin() as conn:
    venue_favorites = Table(
        "venue_favorites", meta,
        Column('id', Integer, primary_key=True, comment='Venue search ID'),
        Column('venue_id', String(24), nullable=False, comment='Foursquare Venue ID'),
        Column('city', String(128), nullable=False, comment='Search City'),
        Column('radius', Integer, nullable=False, comment='Radius in meters'),
        Column('query', String(20), nullable=False, comment='Search section'),
        Column('search_popularity', Integer, nullable=False, comment='Popularity in search results')
    )

    meta.drop_all(conn) # During development
    meta.create_all(conn)

    search_results_df.to_sql('venue_searches', conn, if_exists='append', index=False)
    venue_data_df.to_sql('venue_data', conn, if_exists='append', index=False)