In [1]:
import configparser
import psycopg2
import time
from sql_queries import copy_table_queries, insert_table_queries
from create_tables import create_tables, drop_tables

In [2]:
def load_staging_tables(cur, conn):

    """
    Description: This function loads json files from a S3 bucket into staging tables in a Redshift cluster 
    
    Arguments:
        cur: the cursor object
        conn: connection to the database

    Returns:
        None
    """

    for query in copy_table_queries:
        print(query)
        cur.execute(query)
        conn.commit()

def insert_tables(cur, conn):

    """
    Description: This function inserts data from staging tables into 5 final tables
    
    Arguments:
        cur: the cursor object
        conn: connection to the database

    Returns:
        None
    """

    for query in insert_table_queries:
        print(query)
        cur.execute(query)
        conn.commit()

In [3]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
cur = conn.cursor()

print("1. Dropping tables...")
start_time = time.time()
drop_tables(cur, conn)
print("--- It took %s seconds ---" % (time.time() - start_time))

print("2. Creating tables...")
start_time = time.time()
create_tables(cur, conn)
print("--- It took %s seconds ---" % (time.time() - start_time))

print("3. Loading staging tables, it may take time...")
start_time = time.time()
load_staging_tables(cur, conn)
print("--- It took %s seconds ---" % (time.time() - start_time))

print("4. Loading final tables...")
start_time = time.time()
insert_tables(cur, conn)
print("--- It took %s seconds ---" % (time.time() - start_time))

conn.close()

4. Loading final tables...

        INSERT INTO songplays (start_time, user_id, level, song_id, artist_id, session_id, location, user_agent)
        SELECT 
            TIMESTAMP 'epoch' + ts/1000 *INTERVAL '1 second' AS start_time,
            userid AS user_id,
            level,
            sts.song_id AS song_id,
            sts.artist_id AS artist_id,
            sessionid AS sessions_id,
            ste.location,
            useragent AS user_agent
            FROM staging_events AS ste
                JOIN staging_songs AS sts ON ste.song = sts.title
 
--- It took 15.741201400756836 seconds ---
