# Artist and Artist Bridge table pipeline

Steps:
1. Load data from data folder
2. Extract table specific information into a pandas dataframe
3. Create table in SQL
4. Load data in SQL table

In [1]:
import os
os.chdir("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
from api.api_connect import fetch_api_data
import psycopg2 
from helpers import load_config, generate_insert_statements, write_insert_statements_to_file, run_script_file, load_data_file

config = load_config()

In [3]:
events = load_data_file("event_small")

In [5]:
# extract artists data
artist_dict = {
    "artist_id": [],
    "artist_name": []
    }
for event in events:
    artists = event["_embedded"]["attractions"]
    for artist in artists:
        artist_id = artist["id"]
        artist_name = artist["name"]
        if artist_id not in artist_dict["artist_id"]:
            artist_dict["artist_id"].append(artist_id)
            artist_dict["artist_name"].append(artist_name)

artist_df = pd.DataFrame(artist_dict)
artist_df.head()

Unnamed: 0,artist_id,artist_name
0,K8vZ9171o-f,Green Day
1,K8vZ9171FDV,The Smashing Pumpkins
2,K8vZ9171fW0,Rancid
3,K8vZ917beWV,The Linda Lindas
4,K8vZ9171G9V,Metallica


In [6]:
# extract artist-event bridge table data
event_artist_bridge_columns = ['event_id', 'artist_id']
event_artist_bridge_list = []
for event in events:
    event_id = event["id"]
    artists = event["_embedded"]["attractions"]
    for artist in artists:
        artist_id = artist["id"]
        if (event_id, artist_id) not in event_artist_bridge_list:
            event_artist_bridge_list.append((event_id, artist_id) )

# sanity check
if len(set(event_artist_bridge_list)) == len(event_artist_bridge_list):
    print("Sanity check for Artist-Event bridge table successful")

artist_event_bridge_df = pd.DataFrame(event_artist_bridge_list, columns=event_artist_bridge_columns)
artist_event_bridge_df.head()

Sanity check for Artist-Event bridge table successful


Unnamed: 0,event_id,artist_id
0,G5vVZ9U6NySUy,K8vZ9171o-f
1,G5vVZ9U6NySUy,K8vZ9171FDV
2,G5vVZ9U6NySUy,K8vZ9171fW0
3,G5vVZ9U6NySUy,K8vZ917beWV
4,vvG1bZ99JSxSRh,K8vZ9171G9V


In [7]:
insert_statements = generate_insert_statements(dataframe=artist_df, table_name="artist")
write_insert_statements_to_file(insert_statements, script_name="insert_artist")

insert_statements = generate_insert_statements(dataframe=artist_event_bridge_df, table_name="event_artist_bridge")
write_insert_statements_to_file(insert_statements, script_name="insert_event_artist_bridge")

Insert statements written to 'sql_scripts/insert_artist.sql' successfully!
Insert statements written to 'sql_scripts/insert_event_artist_bridge.sql' successfully!


In [8]:
# load data to tables
run_script_file("insert_artist")
run_script_file("insert_event_artist_bridge")

SQL script executed successfully!
SQL script executed successfully!
