# Events table pipeline

Steps:
1. Load data from data folder
2. Extract table specific information into a pandas dataframe
3. Generate insert statements
4. Load data in SQL table

In [1]:
import os
os.chdir("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from api.api_connect import fetch_api_data
import psycopg2 
from helpers import load_config, generate_insert_statements, write_insert_statements_to_file, run_script_file, load_data_file

config = load_config()

In [3]:
events = load_data_file("event_large")

In [4]:
# Extracting event data

events_list = []

for e in events:
    event_id = e["id"]
    name = e["name"].replace("'", "''")
    url = e["url"]
    
    if 'dates' in e and 'start' in e['dates'] and 'localDate' in e['dates']['start']:
        event_start_date = e["dates"]["start"]["localDate"]
    else:
        event_start_date = None
        
    if 'dates' in e and 'start' in e['dates'] and 'localTime' in e['dates']['start']:
        event_start_time = e["dates"]["start"]["localTime"]
    else:
        event_start_time = None
    
    if 'priceRanges' in e:
        priceRanges_min = min([p["min"] for p in e["priceRanges"]]) # minimum of all minimum values if multiple ticketing categories
        priceRanges_max = max([p["max"] for p in e["priceRanges"]]) # max of all values if multiple ticketing categories
    else:
        priceRanges_min = None
        priceRanges_max = None
    
    if 'seatmap' in e:
        seatmap = e["seatmap"]["staticUrl"]
    else:
        seatmap = None
    
    if 'ageRestrictions' in e:
        ageRestrictions = e["ageRestrictions"]["legalAgeEnforced"]
    else:
        ageRestrictions = None
            
    events_list.append({
        'event_id' : event_id,
        'event_name' : name,
        'event_url' : url,
        'event_start_date' : event_start_date,
        'event_start_time' : event_start_time,
        'price_min' : priceRanges_min,
        'price_max' : priceRanges_max,
        'seatmap_url' : seatmap,
        'age_restrictions' : ageRestrictions,
    })

In [5]:
events_df = pd.DataFrame(data = events_list)
# events_df.replace({np.nan: None}, inplace = True) # NaN -> None/NULL
events_df.head()

Unnamed: 0,event_id,event_name,event_url,event_start_date,event_start_time,price_min,price_max,seatmap_url,age_restrictions
0,G5diZ9V9IAsIW,Billy Joel - In Concert,https://www.ticketmaster.com/billy-joel-in-con...,2024-07-25,20:00:00,89.5,495.5,https://maps.ticketmaster.com/maps/geometry/3/...,False
1,G5vfZ90VIskvW,Bruce Springsteen and The E Street Band 2024 Tour,https://www.ticketmaster.com/bruce-springsteen...,2024-09-07,19:30:00,49.5,299.5,https://maps.ticketmaster.com/maps/geometry/3/...,False
2,G5vYZ9YxtXNfW,Luke Combs - Growin'' Up And Gettin'' Old Tour,https://www.ticketmaster.com/luke-combs-growin...,2024-05-18,17:45:00,17.0,225.5,https://maps.ticketmaster.com/maps/geometry/3/...,False
3,G5vzZ9wQCOodN,George Strait,https://www.ticketmaster.com/george-strait-sal...,2024-06-29,17:45:00,72.0,3025.0,https://maps.ticketmaster.com/maps/geometry/3/...,False
4,vvG1IZ9MdEjK84,Rolling Stones: Hackney Diamonds ''24,https://www.ticketmaster.com/rolling-stones-ha...,2024-07-10,19:30:00,69.5,600.0,https://maps.ticketmaster.com/maps/geometry/3/...,False


In [6]:
if events_df.shape[0] == events_df['event_id'].nunique():
    print("Sanity check for events table successful!")

Sanity check for events table successful!


In [7]:
insert_statements = generate_insert_statements(dataframe = events_df, table_name = "event")
write_insert_statements_to_file(insert_statements, script_name = "insert_event")
run_script_file("insert_event")

Insert statements written to 'sql_scripts/insert_event.sql' successfully!
SQL script executed successfully!
