In [15]:
from time import time
import configparser
import matplotlib.pyplot as plt
import pandas as pd
import configparser
import psycopg2
import boto3
import sql_queries

import importlib

In [16]:
config = configparser.ConfigParser()
config.read_file(open('aws.credentials'))

KEY                    = config.get('AWS','KEY')
SECRET                 = config.get('AWS','SECRET')

In [17]:
config = configparser.ConfigParser()
config.read_file(open('aws.cfg'))

DB_CLUSTER_TYPE        = config.get("CLUSTER","DB_CLUSTER_TYPE")
DB_NUM_NODES           = config.get("CLUSTER","DB_NUM_NODES")
DB_NODE_TYPE           = config.get("CLUSTER","DB_NODE_TYPE")

DB_CLUSTER_IDENTIFIER  = config.get("CLUSTER","DB_CLUSTER_IDENTIFIER")
DB_NAME                = config.get("CLUSTER","DB_NAME")
DB_USER                = config.get("CLUSTER","DB_USER")
DB_PASSWORD         = config.get("CLUSTER","DB_PASSWORD")
DB_PORT                = config.get("CLUSTER","DB_PORT")

IAM_ROLE_NAME          = config.get("IAM_ROLE", "IAM_ROLE_NAME")
ARN                    = config.get("IAM_ROLE", "ARN")

(DB_USER, DB_PASSWORD, DB_NAME)

pd.DataFrame({"Param":
                  ["DB_CLUSTER_TYPE", "DB_NUM_NODES", "DB_NODE_TYPE", "DB_CLUSTER_IDENTIFIER", 
                   "DB_NAME", "DB_USER", "DB_PASSWORD", "DB_PORT", "IAM_ROLE_NAME","ARN"],
              "Value":
                  [DB_CLUSTER_TYPE, DB_NUM_NODES, DB_NODE_TYPE, DB_CLUSTER_IDENTIFIER, 
                   DB_NAME, DB_USER, DB_PASSWORD, DB_PORT, IAM_ROLE_NAME, ARN],
             })

Unnamed: 0,Param,Value
0,DB_CLUSTER_TYPE,multi-node
1,DB_NUM_NODES,4
2,DB_NODE_TYPE,dc2.large
3,DB_CLUSTER_IDENTIFIER,dwhCluster
4,DB_NAME,dwh
5,DB_USER,dwhuser
6,DB_PASSWORD,Passw0rd
7,DB_PORT,5439
8,IAM_ROLE_NAME,dwhRole
9,ARN,arn:aws:iam::109203719027:role/dwhRole


In [18]:
redshift = boto3.client('redshift',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

In [19]:
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DB_CLUSTER_IDENTIFIER)['Clusters'][0]
DB_ENDPOINT = myClusterProps['Endpoint']['Address']
DB_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']

print("DB_ENDPOINT :: ", DB_ENDPOINT)
print("DB_ROLE_ARN :: ", DB_ROLE_ARN)
# DB_ENDPOINT ::  dwhcluster.csasogv133my.us-west-2.redshift.amazonaws.com
# DB_ROLE_ARN ::  arn:aws:iam::109203719027:role/dwhRole

DB_ENDPOINT ::  dwhcluster.csasogv133my.us-west-2.redshift.amazonaws.com
DB_ROLE_ARN ::  arn:aws:iam::109203719027:role/dwhRole


In [20]:
def create_tables(cur, conn):
    """
    - Creates the staging tables
    """
    for query in sql_queries.create_table_queries:
        cur.execute(query.format(DB_ROLE_ARN))
        conn.commit()

   

In [21]:
def copy_to_dim(cur, conn):
    """
    - populates the diension tables
    """
    for query in sql_queries.copy_to_dim_queries:
        print(query.format(DB_ROLE_ARN))
        cur.execute(query.format(DB_ROLE_ARN))
        conn.commit()     

In [22]:
def drop_tables(cur, conn):
    """
    - drops the dimension tables
    """
    for query in sql_queries.drop_dim_table_queries:
        print(query.format(DB_ROLE_ARN))
        cur.execute(query.format(DB_ROLE_ARN))
        conn.commit()  

In [23]:
def copy_to_fact(cur, conn):
    """
    - populates the diension tables
    """
    for query in sql_queries.copy_to_fact_queries:
        cur.execute(query.format(DB_ROLE_ARN))
        conn.commit()    

In [24]:
conn = psycopg2.connect("host={} dbname={} user={} password={} port={}"
                        .format(DB_ENDPOINT, DB_NAME, DB_USER, DB_PASSWORD, DB_PORT))

cur = conn.cursor()

In [29]:
print('Creating staging tables')
importlib.reload(sql_queries)
create_tables(cur, conn)

Creating staging tables


In [30]:
print('Populating dimension tables')
importlib.reload(sql_queries)
copy_to_dim(cur, conn)

Populating dimension tables

    copy modes from 's3://tom-baird-capstone-project-2/raw/dim/I94MODE.csv'
        credentials 'aws_iam_role=arn:aws:iam::109203719027:role/dwhRole'
        region 'us-west-2'
        format csv 
        IGNOREHEADER 1;


    copy ports from 's3://tom-baird-capstone-project-2/raw/dim/I94PORT.csv'
        credentials 'aws_iam_role=arn:aws:iam::109203719027:role/dwhRole'
        region 'us-west-2'
        format csv 
        IGNOREHEADER 1;


    copy visas from 's3://tom-baird-capstone-project-2/raw/dim/I94VISA.csv'
        credentials 'aws_iam_role=arn:aws:iam::109203719027:role/dwhRole'
        region 'us-west-2'
        format csv 
        IGNOREHEADER 1;


    copy states from 's3://tom-baird-capstone-project-2/raw/dim/I94ADDR.csv'
        credentials 'aws_iam_role=arn:aws:iam::109203719027:role/dwhRole'
        region 'us-west-2'
        format csv 
        IGNOREHEADER 1;


    copy countries from 's3://tom-baird-capstone-project-2/raw/dim/I94RES.csv'

In [31]:
print('Populating fact tables')
importlib.reload(sql_queries)
copy_to_fact(cur, conn)

Populating fact tables


In [28]:
print('Dropping tables')
importlib.reload(sql_queries)
drop_tables(cur, conn)

Dropping tables
DROP TABLE IF EXISTS i94_data;
DROP TABLE IF EXISTS modes;
DROP TABLE IF EXISTS ports;
DROP TABLE IF EXISTS visas;
DROP TABLE IF EXISTS states;
DROP TABLE IF EXISTS countires;


In [31]:
cur.execute('ROLLBACK;')
conn.commit()

cur.execute('SELECT * FROM stl_load_errors;')
conn.commit()
print(list(cur))

[]


In [19]:
cur.execute('SELECT * FROM i94_test;')
conn.commit()
print(list(cur))

[]


In [83]:
importlib.reload(sql_queries)
drop_dim_tables(cur, conn)