In [54]:
!ls ..

README.md                     call_put_consolidate_5.csv.gz
call_put_consolidate_1.csv.gz config.cfg
call_put_consolidate_2.csv.gz [34mjupyter_notebook_test[m[m
call_put_consolidate_3.csv.gz [34mlogging[m[m
call_put_consolidate_4.csv.gz [34msrc[m[m


In [55]:
import configparser
import boto3

In [56]:
# read config file
config = configparser.ConfigParser()
config.read('../config.cfg')

['../config.cfg']

In [57]:
# get KEY and SECRET from config file
KEY = config.get('AWS', 'KEY')
SECRET = config.get('AWS', 'SECRET')

In [58]:
# Create S3 client
s3 =  boto3.resource('s3', 
                    region_name = 'us-east-2',
                    aws_access_key_id = KEY,
                    aws_secret_access_key = SECRET)

# Create 'option-price-collection37' bucket 
sampleDbBucket =  s3.Bucket("option-price-collection37")

# check available data files in S3
for obj in sampleDbBucket.objects.filter(Prefix="call_put"):
    print(obj)

s3.ObjectSummary(bucket_name='option-price-collection37', key='call_put_consolidate_1.csv.gz')
s3.ObjectSummary(bucket_name='option-price-collection37', key='call_put_consolidate_2.csv.gz')
s3.ObjectSummary(bucket_name='option-price-collection37', key='call_put_consolidate_3.csv.gz')
s3.ObjectSummary(bucket_name='option-price-collection37', key='call_put_consolidate_4.csv.gz')
s3.ObjectSummary(bucket_name='option-price-collection37', key='call_put_consolidate_5.csv.gz')


In [65]:
# drop table queries
staging_options_table_drop = "DROP TABLE IF EXISTS staging_option;"
options_transaction_table_drop = "DROP TABLE IF EXISTS fact_options_transation;"
options_table_drop = "DROP TABLE IF EXISTS dim_contracts;"
tickers_table_drop = "DROP TABLE IF EXISTS dim_tickers;"
drop_table_queries = [staging_options_table_drop, options_transaction_table_drop, \
                      options_table_drop, tickers_table_drop]

In [84]:
# CREATE TABLES
staging_options__table_create= ("""
CREATE TABLE IF NOT EXISTS staging_option (
          contractSymbol VARCHAR NOT NULL,
          lastTradeDate TIMESTAMP NOT NULL sortkey,
          strike NUMERIC,
          lastPrice NUMERIC,
          bid NUMERIC, 
          ask NUMERIC,
          change NUMERIC, 
          percentChange NUMERIC,
          volume NUMERIC,
          openInterest NUMERIC,
          impliedVolatility NUMERIC,
          inTheMoney BOOLEAN,
          contractSize VARCHAR,
          currency VARCHAR,
          Ticker VARCHAR NOT NULL distkey,
          OptionType VARCHAR(1) NOT NULL,
          contractExpiryDate VARCHAR(8) NOT NULL,
          Company VARCHAR,
          Exchange VARCHAR,
          TypeDisp VARCHAR);
""")

options_transation_table_create = ("""
CREATE TABLE IF NOT EXISTS fact_options_transation(
          transaction_key INTEGER IDENTITY (0,1) PRIMARY KEY,
          contract_key INTEGER, 
          ticker_key INTEGER distkey,
          time TIMESTAMP NOT NULL sortkey,
          price NUMERIC,
          bid NUMERIC,
          ask NUMERIC,
          change NUMERIC,
          percent_change NUMERIC,
          volume INTEGER,
          open_interest INTEGER,
          implied_vol NUMERIC,
          in_the_money BOOLEAN,
          FOREIGN KEY(contract_key) REFERENCES dim_contracts(contract_key),
          FOREIGN KEY(ticker_key) REFERENCES dim_tickers(ticker_key));
""")

contracts_table_create = ("""
CREATE TABLE IF NOT EXISTS dim_contracts (
          contract_key INTEGER IDENTITY (0,1) PRIMARY KEY, 
          contract_symbol VARCHAR NOT NULL,
          maturity_date DATE NOT NULL sortkey, 
          strike NUMERIC NOT NULL, 
          option_type VARCHAR(1) NOT NULL, 
          ticker_key INTEGER NOT NULL distkey, 
          currency VARCHAR, 
          contract_size VARCHAR,
          FOREIGN KEY(ticker_key) REFERENCES dim_tickers(ticker_key));
""")

tickers_table_create = ("""
CREATE TABLE IF NOT EXISTS dim_tickers (
        ticker_key INTEGER IDENTITY (0,1) PRIMARY KEY, 
        ticker VARCHAR NOT NULL sortkey,
        company VARCHAR,
        exchange_nm VARCHAR)
diststyle all;
""")

create_table_queries = [staging_options__table_create, tickers_table_create, 
                        contracts_table_create, options_transation_table_create]

In [85]:
def drop_tables(cur, conn):
    """
    Drops each table using the queries in `drop_table_queries` list.
    
    input:
    cur: cursor variable
    conn: database connection object
    
    return: None
    """
    for query in drop_table_queries:
        cur.execute(query)
        conn.commit()

In [86]:
def create_tables(cur, conn):
    """
    - Creates and connects to the sparkifydb
    - Returns the connection and cursor to sparkifydb
    
    input: None
    return: None
    """
    for query in create_table_queries:
        cur.execute(query)
        conn.commit()

In [87]:
# connect aws redshift cluster
import psycopg2
conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
cur = conn.cursor()

# drop fact, dimension, staging table if they exist
drop_tables(cur, conn)
# create fact, dimension, staging table
create_tables(cur, conn)

conn.close()