# Genre table pipeline

Steps:
1. Load data from data folder
2. Extract table specific information into a pandas dataframe
3. Generate insert statements
4. Load data in SQL table

In [1]:
import os
os.chdir("../")

import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
from api.api_connect import fetch_api_data
import psycopg2 
from helpers import load_config, generate_insert_statements, write_insert_statements_to_file, run_script_file, load_data_file

config = load_config()

In [3]:
events = load_data_file("event_large")

In [4]:
genre_list = []
event_genre_list = []
existing_genre_ids = set()

for e in events:
    event_id = e['id']
    for c in e['classifications']:
        genre_id = c['genre']['id']
        
        if 'subGenre' in c: 
            sub_genre_id = c['subGenre']['id']
            sub_genre_name = c['subGenre']['name']
        else:
            sub_genre_id = None
            sub_genre_name = None

        genre_name = c['genre']['name']
        segment = c['segment']['name']
    
        if genre_id not in existing_genre_ids:
            genre_list.append((genre_id, genre_name, sub_genre_id, sub_genre_name, segment))
            existing_genre_ids.add(genre_id)

    if (event_id, genre_id) not in event_genre_list:
        event_genre_list.append((event_id, genre_id))

if len(set(event_genre_list)) == len(event_genre_list):
    print("Sanity check for Event_Genre bridge table successful")


Sanity check for Event_Genre bridge table successful


In [5]:
genre_df = pd.DataFrame(data = genre_list, columns = ['genre_id', 'genre_name', 'sub_genre_id', 'sub_genre_name', 'segment'])
genre_df.head()

Unnamed: 0,genre_id,genre_name,sub_genre_id,sub_genre_name,segment
0,KnvZfZ7vAeA,Rock,KZazBEonSMnZfZ7v6F1,Pop,Music
1,KnvZfZ7vAv6,Country,KZazBEonSMnZfZ7vAFa,Country,Music
2,KnvZfZ7vAeF,World,,,Music
3,KnvZfZ7vAvl,Other,KZazBEonSMnZfZ7vk1I,Other,Music
4,KnvZfZ7vAvt,Metal,KZazBEonSMnZfZ7vaJ6,Nu-Metal,Music


In [6]:
genre_df = genre_df[['genre_id', 'genre_name', 'segment']].drop_duplicates()
genre_df.head()

Unnamed: 0,genre_id,genre_name,segment
0,KnvZfZ7vAeA,Rock,Music
1,KnvZfZ7vAv6,Country,Music
2,KnvZfZ7vAeF,World,Music
3,KnvZfZ7vAvl,Other,Music
4,KnvZfZ7vAvt,Metal,Music


In [7]:
insert_statements = generate_insert_statements(dataframe = genre_df, table_name = "genre")
write_insert_statements_to_file(insert_statements, script_name = "insert_genre")

Insert statements written to 'sql_scripts/insert_genre.sql' successfully!


In [8]:
run_script_file("insert_genre")

SQL script executed successfully!
