# Event-Venue Bridge table pipeline

Steps:
1. Load data from data folder
2. Extract table specific information into a pandas dataframe
3. Generate insert statements
4. Load data in SQL table

In [1]:
import os
os.chdir("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from api.api_connect import fetch_api_data
import psycopg2 
from helpers import load_config, generate_insert_statements, write_insert_statements_to_file, run_script_file, load_data_file

config = load_config()

In [3]:
events = load_data_file("event_large")

In [4]:
# Extracting event-venue bridge table data

event_venue_list = []

for e in events:
    event_id = e['id']
    
    venues = e["_embedded"]["venues"]
    v_ids = []
    
    for v in venues:
        vid = v['id']
        if vid not in v_ids:
            v_ids.append(vid)
            
    if (event_id, v_ids) not in event_venue_list:
        event_venue_list.append((event_id, v_ids))


In [5]:
event_venue_df = pd.DataFrame(data = event_venue_list, columns = ['event_id', 'venue_id'])
event_venue_df = event_venue_df.explode('venue_id')
event_venue_df.head()

Unnamed: 0,event_id,venue_id
0,G5diZ9V9IAsIW,KovZpZA7AAEA
1,G5vfZ90VIskvW,KovZpZA1J67A
2,G5vYZ9YxtXNfW,KovZpZAJaFkA
3,G5vzZ9wQCOodN,KovZpZAdEt6A
4,vvG1IZ9MdEjK84,KovZ917ACh0


In [6]:
insert_statements = generate_insert_statements(dataframe = event_venue_df, table_name = "event_venue_bridge")
write_insert_statements_to_file(insert_statements, script_name = "insert_event_venue_bridge")
run_script_file("insert_event_venue_bridge")

Insert statements written to 'sql_scripts/insert_event_venue_bridge.sql' successfully!
SQL script executed successfully!
