# Clustering and Scoring Job Relocation Opportunities - Playground Notebook

Austin Rainwater

---

# Initialization

In [2]:
!pip install --quiet --upgrade sqlalchemy pymysql

from urllib.parse import quote as url_encode

import pandas as pd
import numpy as np
import aiohttp
import asyncio
import requests
import xml.etree.ElementTree as xml

from concurrent.futures import ProcessPoolExecutor

from pandas import json_normalize
from itertools import product

from sqlalchemy import (
    create_engine,
    Table,
    Column,
    MetaData,
    String,
    Numeric,
    Integer
)

import yaml

with open('secrets.yaml', 'r') as secrets_file:
    secrets = yaml.safe_load(secrets_file)
    
header = {"User-Agent": 
          'datascience jupyter notebook/0.0 '
          '(https://github.com/pacorain/datascience-certification-final-project; '
          'Austin Rainwater, paco@heckin.io)'}
v = '20201108'

---

# Postal Code Definition

Obviously, a good place for me to start is with some areas. Below is the table definition for the postal codes I will be exploring and their specific traits.

I will start with a city and go from there.

In [221]:
engine = create_engine(secrets['db_connection_string'], echo=True)

In [222]:
meta = MetaData()

postal_codes = Table(
    'postal_code', meta,
    Column('postal_code', String(10), primary_key=True, comment='Postal Code Identifier')
    Column('city_name', String(50), primary_key=True, comment='City Name'),
    Column('metro_name', String(50), comment='Metropolitan Area Name'),
    Column('state', String(2), nullable=False, comment='2-Letter abbreviation of State'),
    Column('lat', Numeric(10, 6), nullable=False, comment='Latitude of Center of Postal Code'),
    Column('lng', Numeric(10, 6), nullable=False, comment='Longitude of Center of Postal Code'),
    Column('area_val', Numeric(10, 4), nullable=False, comment='Area of city in square miles'),
    Column('total_pop', Integer, nullable=False, comment='Total population of city')
)

In [82]:
meta.drop_all(engine)
meta.create_all(engine)

2020-11-23 23:25:19,348 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'sql_mode'
2020-11-23 23:25:19,350 INFO sqlalchemy.engine.base.Engine {}
2020-11-23 23:25:19,359 INFO sqlalchemy.engine.base.Engine SHOW VARIABLES LIKE 'lower_case_table_names'
2020-11-23 23:25:19,360 INFO sqlalchemy.engine.base.Engine {}
2020-11-23 23:25:19,368 INFO sqlalchemy.engine.base.Engine SELECT DATABASE()
2020-11-23 23:25:19,369 INFO sqlalchemy.engine.base.Engine {}
2020-11-23 23:25:19,381 INFO sqlalchemy.engine.base.Engine show collation where `Charset` = 'utf8mb4' and `Collation` = 'utf8mb4_bin'
2020-11-23 23:25:19,382 INFO sqlalchemy.engine.base.Engine {}
2020-11-23 23:25:19,388 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS CHAR(60)) AS anon_1
2020-11-23 23:25:19,389 INFO sqlalchemy.engine.base.Engine {}
2020-11-23 23:25:19,392 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS CHAR(60)) AS anon_1
2020-11-23 23:25:19,393 INFO sqlalchemy.engine.base.E

Let's start with my birthplace: Fort Wayne, Indiana.

In [83]:
new_city = cities.insert()

try:
    engine.execute(new_city, [
        {'postal_code': '46845', 'city_name': 'Fort Wayne, IN', 'metro_name': 'Fort Wayne, IN', 'state': 'IN'}
    ])
except:
    print("Oops! That didn't work.")

2020-11-23 23:25:26,045 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state) VALUES (%(city_name)s, %(metro_name)s, %(state)s)
2020-11-23 23:25:26,046 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne', 'metro_name': 'Fort Wayne', 'state': 'IN'}
2020-11-23 23:25:26,050 INFO sqlalchemy.engine.base.Engine ROLLBACK
Oops! That didn't work.


Ah, the table requires some more data to be able to insert the record. I could use the geocoder library from before to get the latitude and longitude, but since I will be using Wikipedia anyway, let's see if I can grab it from there.

I did some experimenting with the [Wikipedia API Sandbox](https://en.wikipedia.org/wiki/Special:ApiSandbox#action=parse&format=json&page=Fort%20Wayne%2C%20Indiana&redirects=1&prop=wikitext), and oddly enough while there are multiple endpoints capable of getting the _names_ of the templates used in a page, I could not for the life of me find a way to get the _data inserted to_ the templates in an easy format such as JSON. So instead, I'm going to grab the [`parsetree`](https://en.wikipedia.org/wiki/Parse_tree) from the API and parse it with Python's [XML libraries](https://docs.python.org/3/library/xml.html).

I'm hoping that because Wikipedia uses an `Infobox settlement` template, that it will be the same for each city (with some minor differences).

In [3]:
city_name = 'Fort Wayne'
state_name = 'IN'

wikipedia_url = 'https://en.wikipedia.org/w/api.php'
params = {
    "action": "parse",
    "format": "json",
    "redirects": "1",
    "page": f"{city_name}, {state_name}",
    "prop": "parsetree"
}

response = requests.get(wikipedia_url, params=params, headers=header).json()['parse']['parsetree']['*']
response = xml.canonicalize(response, strip_text=True) # Get rid of ambiguous whitespace, etc.

# Write XML data for local exploration
with open('data/fort_wayne.xml', 'w') as xml_file:
    xml_file.write(response)
    
wiki_data = xml.fromstring(response)

This will give me the zip codes from Fort Wayne, but not the latitude/longitude of them. I could use a geocoder library, but I will be parsing data from Foursquare, and I recall from the previous lab that Foursquare returns a `near` property that contains the geocoded latitude and longitude of the zip code.

As a result, by the time I have the data I need, I will also have data I need from Foursquare.

Let's start by getting the zip codes out of the Wikipedia document.

This is the XML from Fort Wayne that I am trying to parse:

```xml
<part>
    <name>postal_code </name>
    <equals>=</equals>
    <value>
        <template>
            <title>Collapsible list </title>
<part>
<name>title</name>
<equals>=</equals>
<value>ZIP codes</value>
</part>
<part>
<name index="1"/>
<value>46774, 46802–46809, 46814–46816, 46818, 46819, 46825, 46835, 46845</value>
</part>
</template>
</value>
</part>
```

In [4]:
def template_value(wiki_data, template_title, part_name):
    """
    Given wiki_data as an XML element tree, find the value of a specific part `part_name` inside a specific template `template_title`
    """
    template = wiki_data.find(".//template[title='{}']".format(template_title))
    return template.find(".part[name='{}'].value".format(part_name)).text

In [10]:
zip_codes_part = wiki_data.find(".//template[title='Infobox settlement'].part[name='postal_code'].value")

In [34]:
foursquare_url = "https://api.foursquare.com/v2/venues/explore"

params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'limit': '50',
    'v': v,
    'near': 'Fort Wayne, IN',
    'radius': 1000,
    'time': 'any', 
    'day': 'any',
    'sortByPopularity': '1'
}

foursquare_response = requests.get(foursquare_url, params=params, headers=header).json()['response']

In [80]:


lat = float(foursquare_response['geocode']['center']['lat'])
lng = float(foursquare_response['geocode']['center']['lng'])
zip_codes = 
sq_mi = float(template_value(wiki_data, "Infobox settlement", "area_total_sq_mi"))
total_pop = int(template_value(wiki_data, "Infobox settlement", "population_est"))

41.1306

Alright, I've gotten the values I need initially for a city; now let's try inserting it.

In [84]:
engine.execute(new_city, [{
    'city_name': 'Fort Wayne', 
    'metro_name': 'Fort Wayne', 
    'state': 'IN', 
    'lat': lat,
    'lng': lng,
    'area_val': sq_mi,
    'total_pop': total_pop
}])

2020-11-23 23:25:29,737 INFO sqlalchemy.engine.base.Engine INSERT INTO city (city_name, metro_name, state, lat, lng, area_val, total_pop) VALUES (%(city_name)s, %(metro_name)s, %(state)s, %(lat)s, %(lng)s, %(area_val)s, %(total_pop)s)
2020-11-23 23:25:29,738 INFO sqlalchemy.engine.base.Engine {'city_name': 'Fort Wayne', 'metro_name': 'Fort Wayne', 'state': 'IN', 'lat': 41.1306, 'lng': -85.12886, 'area_val': 110.79, 'total_pop': 270402}
2020-11-23 23:25:29,742 INFO sqlalchemy.engine.base.Engine COMMIT


<sqlalchemy.engine.result.ResultProxy at 0x7f69cc6e6f70>

In [89]:
query = cities.select()

pd.read_sql(query, engine)

2020-11-23 23:39:42,785 INFO sqlalchemy.engine.base.OptionEngine SELECT city.city_name, city.metro_name, city.state, city.lat, city.lng, city.area_val, city.total_pop 
FROM city
2020-11-23 23:39:42,786 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0,city_name,metro_name,state,lat,lng,area_val,total_pop
0,Fort Wayne,Fort Wayne,IN,41.1306,-85.12886,110.79,270402


Not bad. 

Next, I want to grab some data from Foursquare to build a feature based on what's popular within 1, 5, 25, and 100 km. I'll use the category hierarchy like I did in the week 3 lab. Given that the Foursquare API allows for 99,500 of these calls a day, and up to 5,000 per hour, I can also do this comfortably with each section defined in the `venues/explore` enpoint to see how much variety is in each section in an area.

In [37]:
url = 'https://api.foursquare.com/v2/venues/categories'
params = {
    'client_id': secrets['4SQ_CLIENT_ID'],
    'client_secret': secrets['4SQ_CLIENT_SECRET'],
    'v': v
}
foursquare_categories = requests.get(url, params=params).json()

def category_hier(categories, prefix=[]):
    result = []
    
    for category in categories:
        category = json_normalize(category).iloc[0]
        current_category = pd.Series(
            data=prefix + [category.shortName] + [np.nan] * (4 - len(prefix)),
            name=str(category.id),
            index=[
                'cat_level_1',
                'cat_level_2',
                'cat_level_3',
                'cat_level_4',
                'cat_level_5'
            ]
        )
        result.append(current_category)
        if subcategories := category.categories:
            result += category_hier(subcategories, prefix + [category.shortName])
            
    return result

categories = foursquare_categories['response']['categories']
category_df = pd.DataFrame(category_hier(categories))

In [212]:
radii = [1000, 5000, 25000, 100000]
sections = ['food', 'drinks', 'coffee', 'shops', 'arts', 'outdoors', 'sights', 'trending', 'topPicks']

async def get_popular_spots(city):
    """
    Get popular spots in various "sections" within various distances of `city`
    """
    async with aiohttp.ClientSession() as session:
        tasks = []
        for r, s in product(radii, sections):
            task = query_places(session, city, r, s)
            tasks.append(task)
        results = await asyncio.gather(*tasks)
    return pd.concat(results, ignore_index=True)
    
    
async def query_places(session, location, radius, section):
    """
    With an existing HTTP `session`, get popular spots of the type `section` within `radius` meters of `location`
    
    Uses multiprocessing for quicker processing of the 36 times this function is called
    """
    async with session.get("https://api.foursquare.com/v2/venues/explore", params={
        'client_id': secrets['4SQ_CLIENT_ID'],
        'client_secret': secrets['4SQ_CLIENT_SECRET'],
        'limit': '50',
        'v': v,
        'near': location,
        'radius': radius, 
        'section': section
    }) as result:
        data = await result.json()
    loop = asyncio.get_running_loop()
    venues = await loop.run_in_executor(executor, normalize_foursquare_response, data)
    if venues is not None:
        venues['city'] = location
        venues['radius'] = radius
        venues['section'] = section
    return venues
    
    
def normalize_foursquare_response(data):
    """
    Converts the Foursquare response into a dataframe with all of the venues, as well geolocation metadata.
    """
    if 'groups' not in data['response']:
        return None
    venues = json_normalize(data, ['response', 'groups', 'items'], sep='_')
    geo = json_normalize(data['response']['geocode'], sep='_').loc[0] # json_normalize returns single-index df
    geo.index = pd.Index(f'geo_{name}' for name in geo.index)
    venues.loc[:, geo.index] = geo.values
    venues['search_popularity'] = venues.index.values
    return venues


In [213]:
executor = ProcessPoolExecutor()
places_df = await get_popular_spots('Fort Wayne, IN')
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,...,search_popularity,city,radius,section,venue_venuePage_id,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary,venue_events_items
0,e-3-5081efe6e4b0d5064a98d8b8-0,0,"[{'summary': 'This spot is popular', 'type': '...",5081efe6e4b0d5064a98d8b8,Banh Mi Barista,5320 Coldwater Rd,41.127890,-85.135835,"[{'label': 'display', 'lat': 41.12789011966495...",46825,...,0,"Fort Wayne, IN",1000,food,,,,,,
1,e-3-4b2eb7bff964a52093e524e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b2eb7bff964a52093e524e3,Jimmy John's,5412 Coldwater Rd,41.129067,-85.135295,"[{'label': 'display', 'lat': 41.12906702616283...",46825,...,1,"Fort Wayne, IN",1000,food,,,,,,
2,e-3-4b5f57bff964a5201db529e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",46825,...,2,"Fort Wayne, IN",1000,food,,,,,,
3,e-3-4b5a3e80f964a5201ab728e3-3,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",46825,...,3,"Fort Wayne, IN",1000,food,,,,,,
4,e-3-4e3dd20bd22d102e8547c605-4,0,"[{'summary': 'This spot is popular', 'type': '...",4e3dd20bd22d102e8547c605,Koto Japanese Steakhouse & Sushi,301 E Washington Center Rd,41.133189,-85.137754,"[{'label': 'display', 'lat': 41.13318911813626...",46825,...,4,"Fort Wayne, IN",1000,food,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,e-1-4b630d46f964a520ec5f2ae3-45,0,"[{'summary': 'This spot is popular', 'type': '...",4b630d46f964a520ec5f2ae3,Caruso's,2435 N 200 W,41.677603,-85.029349,"[{'label': 'display', 'lat': 41.67760342312571...",46703,...,45,"Fort Wayne, IN",100000,topPicks,,,,,,
1508,e-1-4f0b1e6ee4b0671f2a514121-46,0,"[{'summary': 'This spot is popular', 'type': '...",4f0b1e6ee4b0671f2a514121,Five Lakes Coffee,N Wayne Street,41.651249,-85.000142,"[{'label': 'display', 'lat': 41.65124934126112...",46703,...,46,"Fort Wayne, IN",100000,topPicks,,,,,,
1509,e-1-4cb7291c9c7ba35db4069706-47,0,"[{'summary': 'This spot is popular', 'type': '...",4cb7291c9c7ba35db4069706,Martin's Super Market,3900 E Bristol St,41.704659,-85.915933,"[{'label': 'display', 'lat': 41.70465911291793...",46514,...,47,"Fort Wayne, IN",100000,topPicks,,,,,,
1510,e-1-5434259c498ecc238ff8fd51-48,0,"[{'summary': 'This spot is popular', 'type': '...",5434259c498ecc238ff8fd51,Salsa Grille at Village of Coventry,5735 Falls Dr,41.031674,-85.258676,"[{'label': 'display', 'lat': 41.03167414988026...",46804,...,48,"Fort Wayne, IN",100000,topPicks,,,,,,


In [215]:
print(f"{len(places_df.venue_id.unique())} unique venues")

731 unique venues


Cool, that will give me the ability to get an idea of what we can do on an evening or a weekend. 

Let's add in the venue category hierarchy.

In [216]:
places_df = places_df.merge(
    places_df.apply(lambda row: category_df.loc[row.venue_categories[0]['id']], axis=1), 
    left_index=True,
    right_index=True
)
places_df

Unnamed: 0,referralId,reasons_count,reasons_items,venue_id,venue_name,venue_location_address,venue_location_lat,venue_location_lng,venue_location_labeledLatLngs,venue_location_postalCode,...,flags_outsideRadius,venue_location_neighborhood,venue_events_count,venue_events_summary,venue_events_items,cat_level_1,cat_level_2,cat_level_3,cat_level_4,cat_level_5
0,e-3-5081efe6e4b0d5064a98d8b8-0,0,"[{'summary': 'This spot is popular', 'type': '...",5081efe6e4b0d5064a98d8b8,Banh Mi Barista,5320 Coldwater Rd,41.127890,-85.135835,"[{'label': 'display', 'lat': 41.12789011966495...",46825,...,,,,,,Food,Asian,Vietnamese,,
1,e-3-4b2eb7bff964a52093e524e3-1,0,"[{'summary': 'This spot is popular', 'type': '...",4b2eb7bff964a52093e524e3,Jimmy John's,5412 Coldwater Rd,41.129067,-85.135295,"[{'label': 'display', 'lat': 41.12906702616283...",46825,...,,,,,,Food,Sandwiches,,,
2,e-3-4b5f57bff964a5201db529e3-2,0,"[{'summary': 'This spot is popular', 'type': '...",4b5f57bff964a5201db529e3,Cork 'n Cleaver,221 Washington Ctr Rd,41.132858,-85.138249,"[{'label': 'display', 'lat': 41.132858, 'lng':...",46825,...,,,,,,Food,Steakhouse,,,
3,e-3-4b5a3e80f964a5201ab728e3-3,0,"[{'summary': 'This spot is popular', 'type': '...",4b5a3e80f964a5201ab728e3,BakerStreet,4820 N Clinton St,41.122200,-85.125421,"[{'label': 'display', 'lat': 41.12219979566053...",46825,...,,,,,,Food,Steakhouse,,,
4,e-3-4e3dd20bd22d102e8547c605-4,0,"[{'summary': 'This spot is popular', 'type': '...",4e3dd20bd22d102e8547c605,Koto Japanese Steakhouse & Sushi,301 E Washington Center Rd,41.133189,-85.137754,"[{'label': 'display', 'lat': 41.13318911813626...",46825,...,,,,,,Food,Asian,Japanese,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,e-1-4b630d46f964a520ec5f2ae3-45,0,"[{'summary': 'This spot is popular', 'type': '...",4b630d46f964a520ec5f2ae3,Caruso's,2435 N 200 W,41.677603,-85.029349,"[{'label': 'display', 'lat': 41.67760342312571...",46703,...,,,,,,Food,Italian,,,
1508,e-1-4f0b1e6ee4b0671f2a514121-46,0,"[{'summary': 'This spot is popular', 'type': '...",4f0b1e6ee4b0671f2a514121,Five Lakes Coffee,N Wayne Street,41.651249,-85.000142,"[{'label': 'display', 'lat': 41.65124934126112...",46703,...,,,,,,Food,Coffee Shop,,,
1509,e-1-4cb7291c9c7ba35db4069706-47,0,"[{'summary': 'This spot is popular', 'type': '...",4cb7291c9c7ba35db4069706,Martin's Super Market,3900 E Bristol St,41.704659,-85.915933,"[{'label': 'display', 'lat': 41.70465911291793...",46514,...,,,,,,Shops,Food & Drink,Supermarket,,
1510,e-1-5434259c498ecc238ff8fd51-48,0,"[{'summary': 'This spot is popular', 'type': '...",5434259c498ecc238ff8fd51,Salsa Grille at Village of Coventry,5735 Falls Dr,41.031674,-85.258676,"[{'label': 'display', 'lat': 41.03167414988026...",46804,...,,,,,,Food,Mexican,,,


Let's pull out the columns that would be helpful in creating or visualizing features.

In [217]:
columns = [
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 
    'venue_location_crossStreet', 'venue_delivery_id', 'search_popularity', 
    'geo_where', 'geo_slug', 'geo_longId', 'geo_center_lat', 
    'geo_center_lng', 'city', 'radius', 'section', 'cat_level_1', 
    'cat_level_2', 'cat_level_3', 'cat_level_4'
]

places_df[columns]

Unnamed: 0,venue_id,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,search_popularity,geo_where,geo_slug,geo_longId,geo_center_lat,geo_center_lng,city,radius,section,cat_level_1,cat_level_2,cat_level_3,cat_level_4
0,5081efe6e4b0d5064a98d8b8,Banh Mi Barista,41.127890,-85.135835,,,0,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Asian,Vietnamese,
1,4b2eb7bff964a52093e524e3,Jimmy John's,41.129067,-85.135295,,,1,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Sandwiches,,
2,4b5f57bff964a5201db529e3,Cork 'n Cleaver,41.132858,-85.138249,Coldwater Rd,,2,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
3,4b5a3e80f964a5201ab728e3,BakerStreet,41.122200,-85.125421,,1502561,3,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Steakhouse,,
4,4e3dd20bd22d102e8547c605,Koto Japanese Steakhouse & Sushi,41.133189,-85.137754,,1332856,4,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",1000,food,Food,Asian,Japanese,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,4b630d46f964a520ec5f2ae3,Caruso's,41.677603,-85.029349,,,45,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,Italian,,
1508,4f0b1e6ee4b0671f2a514121,Five Lakes Coffee,41.651249,-85.000142,Calvary Lane,,46,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,Coffee Shop,,
1509,4cb7291c9c7ba35db4069706,Martin's Super Market,41.704659,-85.915933,Cobblestone Blvd,,47,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Shops,Food & Drink,Supermarket,
1510,5434259c498ecc238ff8fd51,Salsa Grille at Village of Coventry,41.031674,-85.258676,Liberty Mills Rd,,48,fort wayne in,fort-wayne-indiana,72057594042848359,41.1306,-85.12886,"Fort Wayne, IN",100000,topPicks,Food,Mexican,,


Finally, let's put these results in some tables.

In [236]:
meta = MetaData()

search_results_df = places_df[['venue_id', 'city', 'radius', 'section', 'search_popularity']]
venue_data_df = places_df[[
    'venue_id', 'venue_name', 'venue_location_lat', 'venue_location_lng', 'venue_location_crossStreet',
    'venue_delivery_id', 'cat_level_1', 'cat_level_2', 'cat_level_3', 'cat_level_4'
]].drop_duplicates('venue_id')

with engine.begin() as conn:
    venue_searches = Table(
        "venue_searches", meta,
        Column('id', Integer, primary_key=True, comment='Venue search ID'),
        Column('venue_id', String(24), nullable=False, comment='Foursquare Venue ID'),
        Column('city', String(128), nullable=False, comment='Search City'),
        Column('radius', Integer, nullable=False, comment='Radius in meters'),
        Column('section', String(20), nullable=False, comment='Search section'),
        Column('search_popularity', Integer, nullable=False, comment='Popularity in search results')
    )

    venue_data = Table(
        'venue_data', meta,
        Column('venue_id', String(24), primary_key=True, comment='Foursquare Venue ID'),
        Column('venue_name', String(255), nullable=False, comment='Venue name'),
        Column('venue_location_lat', Numeric(10, 6), nullable=False, comment='Venue Location Latitude'),
        Column('venue_location_lng', Numeric(10, 6), nullable=False, comment='Venue Location Longitude'),
        Column('venue_location_crossStreet', String(255), comment='Street Intersection of Venue Location'),
        Column('venue_delivery_id', String(40), comment='Venue Delivery Identifier'),
        Column('cat_level_1', String(50), nullable=False, comment='Level 1 Category Name'),
        Column('cat_level_2', String(50), comment='Level 2 Category Name'),
        Column('cat_level_3', String(50), comment='Level 3 Category Name'),
        Column('cat_level_4', String(50), comment='Level 4 Category Name')
    )

    meta.create_all(conn)

    search_results_df.to_sql('venue_searches', conn, if_exists='append', index=False)
    venue_data_df.to_sql('venue_data', conn, if_exists='append', index=False)

2020-12-01 03:07:06,561 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-12-01 03:07:06,564 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_searches`
2020-12-01 03:07:06,565 INFO sqlalchemy.engine.base.Engine {}
2020-12-01 03:07:06,573 INFO sqlalchemy.engine.base.Engine DESCRIBE `venue_data`
2020-12-01 03:07:06,574 INFO sqlalchemy.engine.base.Engine {}
2020-12-01 03:07:06,583 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE venue_searches (
	id INTEGER NOT NULL COMMENT 'Venue search ID' AUTO_INCREMENT, 
	venue_id VARCHAR(24) NOT NULL COMMENT 'Foursquare Venue ID', 
	city VARCHAR(128) NOT NULL COMMENT 'Search City', 
	radius INTEGER NOT NULL COMMENT 'Radius in meters', 
	section VARCHAR(20) NOT NULL COMMENT 'Search section', 
	search_popularity INTEGER NOT NULL COMMENT 'Popularity in search results', 
	PRIMARY KEY (id)
)


2020-12-01 03:07:06,585 INFO sqlalchemy.engine.base.Engine {}
2020-12-01 03:07:06,704 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE venue_data (
	venue_

In [237]:
pd.read_sql(venue_searches.select(), engine)

2020-12-01 03:08:25,385 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_searches.id, venue_searches.venue_id, venue_searches.city, venue_searches.radius, venue_searches.section, venue_searches.search_popularity 
FROM venue_searches
2020-12-01 03:08:25,386 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0,id,venue_id,city,radius,section,search_popularity
0,1,5081efe6e4b0d5064a98d8b8,"Fort Wayne, IN",1000,food,0
1,2,4b2eb7bff964a52093e524e3,"Fort Wayne, IN",1000,food,1
2,3,4b5f57bff964a5201db529e3,"Fort Wayne, IN",1000,food,2
3,4,4b5a3e80f964a5201ab728e3,"Fort Wayne, IN",1000,food,3
4,5,4e3dd20bd22d102e8547c605,"Fort Wayne, IN",1000,food,4
...,...,...,...,...,...,...
1507,1508,4b630d46f964a520ec5f2ae3,"Fort Wayne, IN",100000,topPicks,45
1508,1509,4f0b1e6ee4b0671f2a514121,"Fort Wayne, IN",100000,topPicks,46
1509,1510,4cb7291c9c7ba35db4069706,"Fort Wayne, IN",100000,topPicks,47
1510,1511,5434259c498ecc238ff8fd51,"Fort Wayne, IN",100000,topPicks,48


In [238]:
pd.read_sql(venue_data.select(), engine)

2020-12-01 03:08:51,605 INFO sqlalchemy.engine.base.OptionEngine SELECT venue_data.venue_id, venue_data.venue_name, venue_data.venue_location_lat, venue_data.venue_location_lng, venue_data.`venue_location_crossStreet`, venue_data.venue_delivery_id, venue_data.cat_level_1, venue_data.cat_level_2, venue_data.cat_level_3, venue_data.cat_level_4 
FROM venue_data
2020-12-01 03:08:51,606 INFO sqlalchemy.engine.base.OptionEngine {}


Unnamed: 0,venue_id,venue_name,venue_location_lat,venue_location_lng,venue_location_crossStreet,venue_delivery_id,cat_level_1,cat_level_2,cat_level_3,cat_level_4
0,4b0618b3f964a52098e822e3,Regal Coldwater Crossing,41.131203,-85.142060,,,Arts & Entertainment,Movie Theater,,
1,4b12ed20f964a520ff9023e3,Starbucks,41.075410,-85.145640,,1507234,Food,Coffee Shop,,
2,4b130366f964a520a89223e3,Mad Anthony Brewing Company,41.067643,-85.152640,at Taylor,2274630,Nightlife,Brewery,,
3,4b1304aaf964a520c29223e3,JK O'Donnell's Irish Pub,41.078097,-85.140302,btw Harrison & Calhoun,,Food,Irish,,
4,4b159b5ff964a5200ab123e3,Henry's,41.079294,-85.147520,Fulton St,,Nightlife,Bar,Pub,
...,...,...,...,...,...,...,...,...,...,...
726,5d7ceaa42b61ba0007a9fe61,Starbucks Inside Kroger,41.076227,-85.275840,Scott Rd,,Food,Coffee Shop,,
727,5eec460b9087d80007befcfc,Spectrum Fort Wayne,41.130961,-85.129055,,,Shops,Business Services,,
728,5f16c1ef292d9e79ca7a23cf,Remote Flights,41.134063,-85.131263,,,Shops,Business Services,,
729,5f760018262e962a2c1de6fd,Bitcoin Depot ATM,41.132315,-85.135144,,,Shops,ATM,,
