# Clustering and Scoring Job Relocation Opportunities - Playground Notebook

Austin Rainwater

---

# Initialization

In [1]:
!pip install --quiet --upgrade sqlalchemy pymysql

from urllib.parse import quote as url_encode

import pandas as pd
import numpy as np
import aiohttp
import asyncio
import requests
import xml.etree.ElementTree as xml

from concurrent.futures import ProcessPoolExecutor

from pandas import json_normalize
from itertools import product

from sqlalchemy import (
    create_engine,
    Table,
    Column,
    MetaData,
    String,
    Numeric,
    Integer
)

import yaml

with open('secrets.yaml', 'r') as secrets_file:
    secrets = yaml.safe_load(secrets_file)
    
header = {"User-Agent": 
          'datascience jupyter notebook/0.0 '
          '(https://github.com/pacorain/datascience-certification-final-project; '
          'Austin Rainwater, paco@heckin.io)'}
v = '20201108'

---

# City Definition

Obviously, a good place for me to start is with some cities. Below is the table definition for the cities I will be exploring and their specific traits.

In [2]:
engine = create_engine(secrets['db_connection_string'], echo=True)

In [3]:
meta = MetaData()

cities = Table(
    'city', meta,
    Column('city_name', String(50), primary_key=True, comment='Community Name'),
    Column('metro_name', String(50), comment='Metropolitan Area Name'),
    Column('state', String(2), nullable=False, comment='2-Letter abbreviation of State'),
    Column('lat', Numeric(10, 6), nullable=False, comment='Latitude of City'),
    Column('lng', Numeric(10, 6), nullable=False, comment='Longitude of City'),
    Column('area_val', Numeric(10, 4), nullable=False, comment='Area of city in square miles'),
    Column('total_pop', Integer, nullable=False, comment='Total population of city')
)

In [4]:
meta.drop_all(engine) # During development
meta.create_all(engine)

2020-12-03 19:50:52,684 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'sql_mode'
2020-12-03 19:50:52,686 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:52,697 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2020-12-03 19:50:52,699 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:52,717 INFO sqlalchemy.engine.base.Engine SELECT DATABASE()
2020-12-03 19:50:52,718 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:52,722 INFO sqlalchemy.engine.base.Engine show collation where `Charset` = 'utf8mb4' and `Collation` = 'utf8mb4_bin'
2020-12-03 19:50:52,723 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:52,735 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS CHAR(60)) AS anon_1
2020-12-03 19:50:52,736 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:52,739 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS CHAR(60)) AS anon_1
2020-12-03 19:50:52,740 INFO sqlalchemy.engine.base.E

Let's start with my birthplace: Fort Wayne, Indiana.

In [5]:
new_city = cities.insert()

try:
    engine.execute(new_city, [
        {'city_name': 'Fort Wayne, IN', 'metro_name': 'Fort Wayne, IN', 'state': 'IN'}
    ])
except:
    print("Oops! That didn't work.")

2020-12-03 19:50:52,838 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state) VALUES (%(city_name)s, %(metro_name)s, %(state)s)
2020-12-03 19:50:52,840 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne, IN', 'metro_name': 'Fort Wayne, IN', 'state': 'IN'}
2020-12-03 19:50:52,847 INFO sqlalchemy.engine.base.Engine ROLLBACK
Oops! That didn't work.


Ah, the table requires some more data to be able to insert the record. I could use the geocoder library from before to get the latitude and longitude, but since I will be using Wikipedia anyway, let's see if I can grab it from there.

I did some experimenting with the [Wikipedia API Sandbox](https://en.wikipedia.org/wiki/Special:ApiSandbox#action=parse&format=json&page=Fort%20Wayne%2C%20Indiana&redirects=1&prop=wikitext), and oddly enough while there are multiple endpoints capable of getting the _names_ of the templates used in a page, I could not for the life of me find a way to get the _data inserted to_ the templates in an easy format such as JSON. So instead, I'm going to grab the `parsetree` and parse it with Python's XML libraries.

In [6]:
city_name = 'Fort Wayne'
state_name = 'IN'

wikipedia_url = 'https://en.wikipedia.org/w/api.php'
params = {
    "action": "parse",
    "format": "json",
    "redirects": "1",
    "page": f"{city_name}, {state_name}",
    "prop": "parsetree"
}

response = requests.get(wikipedia_url, params=params, headers=header).json()['parse']['parsetree']['*']
response = xml.canonicalize(response, strip_text=True)

# Write XML data for local exploration
with open('data/fort_wayne.xml', 'w') as xml_file:
    xml_file.write(response)

Ah, going through the XML file, the map on the Wikipedia article is an SVG (i.e. an image, not something that contains computer-readable geographic data), so I will need to use a geocoder. 

I recall from the previous lab that when you grab data from Foursquare's API, it will geocode the 'near' parameter and return the latitude and logitude used.

I also want to include the total size of the city, so in order to enter data into the table, I need to grab data from Wikipedia _and_ Foursquare. Which is fine, because I need more data to explore possible features

In [7]:
wiki_data = xml.fromstring(response)

In [8]:
foursquare_url = "https://api.foursquare.com/v2/venues/explore"

params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'limit': '50',
    'v': v,
    'near': 'Fort Wayne, IN',
    'radius': 1000,
    'time': 'any', 
    'day': 'any',
    'sortByPopularity': '1'
}

foursquare_response = requests.get(foursquare_url, params=params, headers=header).json()['response']

In [9]:
def template_value(wiki_data, template_title, part_name):
    template = wiki_data.find(".//template[title='{}']".format(template_title))
    return template.find(".part[name='{}'].value".format(part_name)).text

lat = float(foursquare_response['geocode']['center']['lat'])
lng = float(foursquare_response['geocode']['center']['lng'])
sq_mi = float(template_value(wiki_data, "Infobox settlement", "area_total_sq_mi"))
total_pop = int(template_value(wiki_data, "Infobox settlement", "population_est"))

Alright, I've gotten the values I need initially for a city; now let's try inserting it.

In [10]:
engine.execute(new_city, [{
    'city_name': 'Fort Wayne', 
    'metro_name': 'Fort Wayne', 
    'state': 'IN', 
    'lat': lat,
    'lng': lng,
    'area_val': sq_mi,
    'total_pop': total_pop
}])

2020-12-03 19:50:54,081 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state, lat, lng, area_val, total_pop) VALUES (%(city_name)s, %(metro_name)s, %(state)s, %(lat)s, %(lng)s, %(area_val)s, %(total_pop)s)
2020-12-03 19:50:54,082 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne', 'metro_name': 'Fort Wayne', 'state': 'IN', 'lat': 41.1306, 'lng': -85.12886, 'area_val': 110.79, 'total_pop': 270402}
2020-12-03 19:50:54,090 INFO sqlalchemy.engine.base.Engine COMMIT


<sqlalchemy.engine.result.ResultProxy at 0x7f40e046e1c0>

In [11]:
query = cities.select()

pd.read_sql(query, engine)

2020-12-03 19:50:54,138 INFO sqlalchemy.engine.base.OptionEngine SELECT city.city_name, city.metro_name, city.state, city.lat, city.lng, city.area_val, city.total_pop 
FROM city
2020-12-03 19:50:54,140 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0,city_name,metro_name,state,lat,lng,area_val,total_pop
0,Fort Wayne,Fort Wayne,IN,41.1306,-85.12886,110.79,270402


Not bad. 

Next, I want to grab some data from Foursquare to build a feature based on what's popular within 1, 5, 25, and 100 km. I'll use the category hierarchy like I did in the week 3 lab. Given that the Foursquare API allows for 99,500 of these calls a day, and up to 5,000 per hour, I can also do this comfortably with each section defined in the `venues/explore` enpoint to see how much variety is in each section in an area.

In [12]:
url = 'https://api.foursquare.com/v2/venues/categories'
params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'v': v
}
foursquare_categories = requests.get(url, params=params).json()

def category_hier(categories, prefix=[]):
    result = []
    
    for category in categories:
        category = json_normalize(category).iloc[0]
        current_category = pd.Series(
            data=prefix + [category.shortName] + [np.nan] * (4 - len(prefix)),
            name=str(category.id),
            index=[
                'cat_level_1',
                'cat_level_2',
                'cat_level_3',
                'cat_level_4',
                'cat_level_5'
            ]
        )
        result.append(current_category)
        if subcategories := category.categories:
            result += category_hier(subcategories, prefix + [category.shortName])
            
    return result

categories = foursquare_categories['response']['categories']
category_df = pd.DataFrame(category_hier(categories))

In [25]:
radii = [1000, 5000, 25000, 100000]
sections = ['food', 'drinks', 'coffee', 'shops', 'arts', 'outdoors', 'sights', 'trending', 'topPicks']

async def get_popular_spots(city):
    """
    Get popular spots in various "sections" within various distances of `city`
    """
    async with aiohttp.ClientSession() as session:
        tasks = []
        for r, s in product(radii, sections):
            task = query_places(session, city, r, section=s)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
    return pd.concat(results, ignore_index=True)
    
    
async def query_places(session, location, radius, section='', query=''):
    """
    With an existing HTTP `session`, get popular spots of the type `section` within `radius` meters of `location`
    
    Uses multiprocessing for quicker processing of the 36 times this function is called
    """
    async with session.get("https://api.foursquare.com/v2/venues/explore", params={
        'client_id': secrets['4SQ_CLIENT_ID'],
        'client_secret': secrets['4SQ_CLIENT_SECRET'],
        'limit': '50',
        'v': v,
        'near': location,
        'radius': radius, 
        'section': section,
        'query': query,
        'sortByPopularity': 1
    }) as result:
        data = await result.json()
    loop = asyncio.get_running_loop()
    venues = await loop.run_in_executor(executor, normalize_foursquare_response, data)
    if venues is not None:
        venues['city'] = location
        venues['radius'] = radius
        if section:
            venues['section'] = section
        if query:
            venues['query'] = query
    return venues
    
    
def normalize_foursquare_response(data):
    """
    Converts the Foursquare response into a dataframe with all of the venues, as well geolocation metadata.
    """
    if 'groups' not in data['response']:
        return None
    venues = json_normalize(data, ['response', 'groups', 'items'], sep='_')
    geo = json_normalize(data['response']['geocode'], sep='_').loc[0] # json_normalize returns single-index df
    geo.index = pd.Index(f'geo_{name}' for name in geo.index)
    venues.loc[:, geo.index] = geo.values
    venues['search_popularity'] = venues.index.values
    return venues


In [26]:
executor = ProcessPoolExecutor()
places_df = await get_popular_spots('Fort Wayne, IN')
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_crossStreet,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,...,search_popularity,city,radius,section,venue_venuePage_id,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary,venue_events_items
0,e-3-4b26e239f964a5207c8224e3-0,0,"[{'summary': 'This spot is popular', 'type': '...",4b26e239f964a5207c8224e3,Agaves Mexican Grill,211 E Washington Center Rd,Coldwater Road,41.132293,-85.138746,"[{'label': 'display', 'lat': 41.13229322911061...",...,0,"Fort Wayne, IN",1000,food,,,,,,
1,e-3-4b5a3e80f964a5201ab728e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",...,1,"Fort Wayne, IN",1000,food,,,,,,
2,e-3-4b86ebf4f964a5200ba631e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b86ebf4f964a5200ba631e3,Papa John's Pizza,5626 Coldwater Rd,,41.130839,-85.135060,"[{'label': 'display', 'lat': 41.13083878708079...",...,2,"Fort Wayne, IN",1000,food,,,,,,
3,e-3-4b5f57bff964a5201db529e3-3,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,Coldwater Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",...,3,"Fort Wayne, IN",1000,food,,,,,,
4,e-3-4bfec4844e5d0f47a7207d1f-4,0,"[{'summary': 'This spot is popular', 'type': '...",4bfec4844e5d0f47a7207d1f,Wendy’s,5701 Coldwater Rd,,41.130943,-85.136557,"[{'label': 'display', 'lat': 41.13094264479011...",...,4,"Fort Wayne, IN",1000,food,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1505,e-1-4cb7291c9c7ba35db4069706-45,0,"[{'summary': 'This spot is popular', 'type': '...",4cb7291c9c7ba35db4069706,Martin's Super Market,3900 E Bristol St,Cobblestone Blvd,41.704659,-85.915933,"[{'label': 'display', 'lat': 41.70465911291793...",...,45,"Fort Wayne, IN",100000,topPicks,,,,,,
1506,e-1-4c07c0e8271dc9b67c4a2b9a-46,0,"[{'summary': 'This spot is popular', 'type': '...",4c07c0e8271dc9b67c4a2b9a,Chalet Party Shoppe,1800 Cassopolis St,at Woodlawn Ave,41.708729,-85.971732,"[{'label': 'display', 'lat': 41.70872910432653...",...,46,"Fort Wayne, IN",100000,topPicks,,,,,,
1507,e-1-528768b811d2a84dc4388b20-47,0,"[{'summary': 'This spot is popular', 'type': '...",528768b811d2a84dc4388b20,Joanna's Family Restaurant,10368 Leo Rd,,41.179797,-85.073909,"[{'label': 'display', 'lat': 41.17979651983688...",...,47,"Fort Wayne, IN",100000,topPicks,,,,,,
1508,e-1-4c955265f244b1f70cd72a1d-48,0,"[{'summary': 'This spot is popular', 'type': '...",4c955265f244b1f70cd72a1d,Lucy's vedi twist,,,41.627403,-85.424383,"[{'label': 'display', 'lat': 41.62740269, 'lng...",...,48,"Fort Wayne, IN",100000,topPicks,,,,,,


In [15]:
print(f"{len(places_df.venue_id.unique())} unique venues")

730 unique venues


Cool, that will give me the ability to get an idea of what we can do on an evening or a weekend. 

Let's add in the venue category hierarchy.

In [16]:
places_df = places_df.merge(
    places_df.apply(lambda row: category_df.loc[row.venue_categories[0]['id']], axis=1), 
    left_index=True,
    right_index=True
)
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,...,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary,venue_events_items,cat_level_1,cat_level_2,cat_level_3,cat_level_4,cat_level_5
0,e-3-5081efe6e4b0d5064a98d8b8-0,0,"[{'summary': 'This spot is popular', 'type': '...",5081efe6e4b0d5064a98d8b8,Banh Mi Barista,5320 Coldwater Rd,41.127890,-85.135835,"[{'label': 'display', 'lat': 41.12789011966495...",46825,...,,,,,,Food,Asian,Vietnamese,,
1,e-3-4b2eb7bff964a52093e524e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b2eb7bff964a52093e524e3,Jimmy John's,5412 Coldwater Rd,41.129067,-85.135295,"[{'label': 'display', 'lat': 41.12906702616283...",46825,...,,,,,,Food,Sandwiches,,,
2,e-3-4b5f57bff964a5201db529e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",46825,...,,,,,,Food,Steakhouse,,,
3,e-3-4b5a3e80f964a5201ab728e3-3,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",46825,...,,,,,,Food,Steakhouse,,,
4,e-3-4e3dd20bd22d102e8547c605-4,0,"[{'summary': 'This spot is popular', 'type': '...",4e3dd20bd22d102e8547c605,Koto Japanese Steakhouse & Sushi,301 E Washington Center Rd,41.133189,-85.137754,"[{'label': 'display', 'lat': 41.13318911813626...",46825,...,,,,,,Food,Asian,Japanese,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1510,e-1-55a15ad1498eb8d1d6fe79c0-45,0,"[{'summary': 'This spot is popular', 'type': '...",55a15ad1498eb8d1d6fe79c0,The Hoppy Gnome,203 E Berry St,41.079705,-85.137927,"[{'label': 'display', 'lat': 41.07970509668428...",46802,...,,,,,,Food,Gastropub,,,
1511,e-1-4c01289bde3376b00157f9a4-46,0,"[{'summary': 'This spot is popular', 'type': '...",4c01289bde3376b00157f9a4,Gays Hops-N-Schnapps,111 W Toledo St,41.730473,-84.933738,"[{'label': 'display', 'lat': 41.73047316936916...",46737,...,,,,,,Shops,Food & Drink,Liquor Store,,
1512,e-1-4bb0e871f964a52002693ce3-47,0,"[{'summary': 'This spot is popular', 'type': '...",4bb0e871f964a52002693ce3,Simonton Lake Drive-In,51602 State Road 19,41.743735,-85.974051,"[{'label': 'display', 'lat': 41.74373502343176...",46514,...,,,,,,Food,American,,,
1513,e-1-4bedd6fee24d20a139167214-48,0,"[{'summary': 'This spot is popular', 'type': '...",4bedd6fee24d20a139167214,Lake Wawasee,,41.405746,-85.711193,"[{'label': 'display', 'lat': 41.40574614490232...",46567,...,,,,,,Outdoors & Recreation,Lake,,,


Let's pull out the columns that would be helpful in creating or visualizing features.

In [17]:
columns = [
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 
    'venue_location_crossStreet', 'venue_delivery_id', 'search_popularity', 
    'geo_where', 'geo_slug', 'geo_longId', 'geo_center_lat', 
    'geo_center_lng', 'city', 'radius', 'section', 'cat_level_1', 
    'cat_level_2', 'cat_level_3', 'cat_level_4'
]

places_df[columns]

Unnamed: 0,venue_id,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,search_popularity,geo_where,geo_slug,geo_longId,geo_center_lat,geo_center_lng,city,radius,section,cat_level_1,cat_level_2,cat_level_3,cat_level_4
0,5081efe6e4b0d5064a98d8b8,Banh Mi Barista,41.127890,-85.135835,,,0,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Asian,Vietnamese,
1,4b2eb7bff964a52093e524e3,Jimmy John's,41.129067,-85.135295,,,1,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Sandwiches,,
2,4b5f57bff964a5201db529e3,Cork 'n Cleaver,41.132858,-85.138249,Coldwater Rd,,2,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
3,4b5a3e80f964a5201ab728e3,BakerStreet,41.122200,-85.125421,,1502561,3,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
4,4e3dd20bd22d102e8547c605,Koto Japanese Steakhouse & Sushi,41.133189,-85.137754,,1332856,4,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Asian,Japanese,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1510,55a15ad1498eb8d1d6fe79c0,The Hoppy Gnome,41.079705,-85.137927,Clinton St.,1507553,45,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,Gastropub,,
1511,4c01289bde3376b00157f9a4,Gays Hops-N-Schnapps,41.730473,-84.933738,at Broad St,,46,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Shops,Food & Drink,Liquor Store,
1512,4bb0e871f964a52002693ce3,Simonton Lake Drive-In,41.743735,-85.974051,,,47,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,American,,
1513,4bedd6fee24d20a139167214,Lake Wawasee,41.405746,-85.711193,,,48,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Outdoors & Recreation,Lake,,


Finally, let's put these results in some tables.

In [18]:
meta = MetaData()

search_results_df = places_df[['venue_id', 'city', 'radius', 'section', 'search_popularity']]
venue_data_df = places_df[[
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 'venue_location_crossStreet',
    'venue_delivery_id', 'cat_level_1', 'cat_level_2', 'cat_level_3', 'cat_level_4'
]].drop_duplicates('venue_id')

with engine.begin() as conn:
    venue_searches = Table(
        "venue_searches", meta,
        Column('id', Integer, primary_key=True, comment='Venue search ID'),
        Column('venue_id', String(24), nullable=False, comment='Foursquare Venue ID'),
        Column('city', String(128), nullable=False, comment='Search City'),
        Column('radius', Integer, nullable=False, comment='Radius in meters'),
        Column('section', String(20), nullable=False, comment='Search section'),
        Column('search_popularity', Integer, nullable=False, comment='Popularity in search results')
    )

    venue_data = Table(
        'venue_data', meta,
        Column('venue_id', String(24), primary_key=True, comment='Foursquare Venue ID'),
        Column('venue_name', String(255), nullable=False, comment='Venue name'),
        Column('venue_location_lat', Numeric(10, 6), nullable=False, comment='Venue Location Latitude'),
        Column('venue_location_lng', Numeric(10, 6), nullable=False, comment='Venue Location Longitude'),
        Column('venue_location_crossStreet', String(255), comment='Street Intersection of Venue Location'),
        Column('venue_delivery_id', String(40), comment='Venue Delivery Identifier'),
        Column('cat_level_1', String(50), nullable=False, comment='Level 1 Category Name'),
        Column('cat_level_2', String(50), comment='Level 2 Category Name'),
        Column('cat_level_3', String(50), comment='Level 3 Category Name'),
        Column('cat_level_4', String(50), comment='Level 4 Category Name')
    )

    meta.drop_all(conn) # During development
    meta.create_all(conn)

    search_results_df.to_sql('venue_searches', conn, if_exists='append', index=False)
    venue_data_df.to_sql('venue_data', conn, if_exists='append', index=False)

2020-12-03 19:50:59,883 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-12-03 19:50:59,886 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_searches`
2020-12-03 19:50:59,888 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:59,895 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_data`
2020-12-03 19:50:59,896 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:59,908 INFO sqlalchemy.engine.base.Engine 
DROP TABLE venue_data
2020-12-03 19:50:59,910 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:59,957 INFO sqlalchemy.engine.base.Engine 
DROP TABLE venue_searches
2020-12-03 19:50:59,958 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:59,988 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_searches`
2020-12-03 19:50:59,989 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:50:59,995 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_data`
2020-12-03 19:50:59,995 INFO sqlalchemy.engine.base.Engine {}
2020-12-03 19:51:00,002 INFO sqlalchemy.engine.base.

In [19]:
pd.read_sql(venue_searches.select(), engine, index_col='id')

2020-12-03 19:51:00,399 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_searches.id, venue_searches.venue_id, venue_searches.city, venue_searches.radius, venue_searches.section, venue_searches.search_popularity 
FROM venue_searches
2020-12-03 19:51:00,400 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0_level_0,venue_id,city,radius,section,search_popularity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5081efe6e4b0d5064a98d8b8,"Fort Wayne, IN",1000,food,0
2,4b2eb7bff964a52093e524e3,"Fort Wayne, IN",1000,food,1
3,4b5f57bff964a5201db529e3,"Fort Wayne, IN",1000,food,2
4,4b5a3e80f964a5201ab728e3,"Fort Wayne, IN",1000,food,3
5,4e3dd20bd22d102e8547c605,"Fort Wayne, IN",1000,food,4
...,...,...,...,...,...
1511,55a15ad1498eb8d1d6fe79c0,"Fort Wayne, IN",100000,topPicks,45
1512,4c01289bde3376b00157f9a4,"Fort Wayne, IN",100000,topPicks,46
1513,4bb0e871f964a52002693ce3,"Fort Wayne, IN",100000,topPicks,47
1514,4bedd6fee24d20a139167214,"Fort Wayne, IN",100000,topPicks,48


In [20]:
pd.read_sql(venue_data.select(), engine, index_col='venue_id')

2020-12-03 19:51:00,474 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_data.venue_id, venue_data.venue_name, venue_data.venue_location_lat, venue_data.venue_location_lng, venue_data.`venue_location_crossStreet`, venue_data.venue_delivery_id, venue_data.cat_level_1, venue_data.cat_level_2, venue_data.cat_level_3, venue_data.cat_level_4 
FROM venue_data
2020-12-03 19:51:00,475 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0_level_0,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,cat_level_1,cat_level_2,cat_level_3,cat_level_4
venue_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4b0618b3f964a52098e822e3,Regal Coldwater Crossing,41.131203,-85.142060,,,Arts & Entertainment,Movie Theater,,
4b12ed20f964a520ff9023e3,Starbucks,41.075410,-85.145640,,1507234,Food,Coffee Shop,,
4b130366f964a520a89223e3,Mad Anthony Brewing Company,41.067643,-85.152640,at Taylor,2274630,Nightlife,Brewery,,
4b1304aaf964a520c29223e3,JK O'Donnell's Irish Pub,41.078097,-85.140302,btw Harrison & Calhoun,,Food,Irish,,
4b159b5ff964a5200ab123e3,Henry's,41.079294,-85.147520,Fulton St,,Nightlife,Bar,Pub,
...,...,...,...,...,...,...,...,...,...
5d7ceaa42b61ba0007a9fe61,Starbucks Inside Kroger,41.076227,-85.275840,Scott Rd,,Food,Coffee Shop,,
5eec460b9087d80007befcfc,Spectrum Fort Wayne,41.130961,-85.129055,,,Shops,Business Services,,
5f16c1ef292d9e79ca7a23cf,Remote Flights,41.134063,-85.131263,,,Shops,Business Services,,
5f760018262e962a2c1de6fd,Bitcoin Depot ATM,41.132315,-85.135144,,,Shops,ATM,,


---

## Favorite Venue Types

The other thing I want to look for in cities is places that we know we enjoy. I will still use the `explore` enpoint, but with the `query` parameter.

In [28]:
favorite_venue_types = [
    'hiking trail',
    'bbq',
    'historic sites',
    'park',
    'dog park',
    'british food',
    'irish food',
    'arcade',
    'pizzeria',
    'ice cream shop'
]

In [29]:
async def get_favorite_sites(city):
    """
    Get popular spots of my favorite types in the city `city`
    """
    async with aiohttp.ClientSession() as session:
        tasks = []
        for r, q in product(radii, favorite_venue_types):
            task = query_places(session, city, r, query=q)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
    return pd.concat(results, ignore_index=True)

In [31]:
executor = ProcessPoolExecutor()
favorite_sites = await get_favorite_sites('Fort Wayne, IN')
favorite_sites

KeyError: "['cat_level_2', 'cat_level_1', 'cat_level_4', 'cat_level_3'] not in index"

In [33]:
favorite_sites = favorite_sites.merge(
    favorite_sites.apply(lambda row: category_df.loc[row.venue_categories[0]['id']], axis=1), 
    left_index=True,
    right_index=True
)
favorite_sites[columns]

IndexError: list index out of range

In [None]:
meta = MetaData()

search_results_df = favorite_sites[['venue_id', 'city', 'radius', 'query', 'search_popularity']]
venue_data_df = favorite_sites[[
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 'venue_location_crossStreet',
    'venue_delivery_id', 'cat_level_1', 'cat_level_2', 'cat_level_3', 'cat_level_4'
]].drop_duplicates('venue_id')

with engine.begin() as conn:
    venue_favorites = Table(
        "venue_favorites", meta,
        Column('id', Integer, primary_key=True, comment='Venue search ID'),
        Column('venue_id', String(24), nullable=False, comment='Foursquare Venue ID'),
        Column('city', String(128), nullable=False, comment='Search City'),
        Column('radius', Integer, nullable=False, comment='Radius in meters'),
        Column('query', String(20), nullable=False, comment='Search section'),
        Column('search_popularity', Integer, nullable=False, comment='Popularity in search results')
    )

    meta.drop_all(conn) # During development
    meta.create_all(conn)

    search_results_df.to_sql('venue_searches', conn, if_exists='append', index=False)
    venue_data_df.to_sql('venue_data', conn, if_exists='append', index=False)