In [55]:
import pandas as pd
import boto3
import json
import configparser
import psycopg2
import time
from botocore.exceptions import ClientError

In [56]:
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])

In [3]:
# LOAD CONFIG

In [67]:
config = configparser.ConfigParser()

config.read_file(open('iac.cfg'))

DWH_CLUSTER_TYPE       = config.get("DWH","DWH_CLUSTER_TYPE")
DWH_NUM_NODES          = config.get("DWH","DWH_NUM_NODES")
DWH_NODE_TYPE          = config.get("DWH","DWH_NODE_TYPE")

DWH_CLUSTER_IDENTIFIER = config.get("DWH","DWH_CLUSTER_IDENTIFIER")
DWH_DB                 = config.get("DWH","DWH_DB")
DWH_DB_USER            = config.get("DWH","DWH_DB_USER")
DWH_DB_PASSWORD        = config.get("DWH","DWH_DB_PASSWORD")
DWH_PORT               = config.get("DWH","DWH_PORT")

DWH_IAM_ROLE_NAME      = config.get("DWH", "DWH_IAM_ROLE_NAME")

config.read_file(open('key.cfg'))

KEY                    = config.get('AWS','KEY')
SECRET                 = config.get('AWS','SECRET')

In [58]:
# INITIALIZE CLIENTS

In [59]:
iam = boto3.client('iam',aws_access_key_id=KEY,
                     aws_secret_access_key=SECRET,
                     region_name='us-west-2'
                  )

redshift = boto3.client('redshift',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

In [7]:
# CREATE IAM AND REDSHIFT

In [8]:
try:
    print("1.1 Creating a new IAM Role") 
    dwhRole = iam.create_role(
        Path='/',
        RoleName=DWH_IAM_ROLE_NAME,
        Description = "Allows Redshift clusters to call AWS services on your behalf.",
        AssumeRolePolicyDocument=json.dumps(
            {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'})
    )    
except Exception as e:
    print(e)
    
    
print("1.2 Attaching Policy")

iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

print("1.3 Get the IAM role ARN")
DWH_ROLE_ARN = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']

print(DWH_ROLE_ARN)

1.1 Creating a new IAM Role
1.2 Attaching Policy
1.3 Get the IAM role ARN
arn:aws:iam::986480943738:role/dwhRole


In [9]:
try:
    response = redshift.create_cluster(        
        #HW
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        # NumberOfNodes=int(DWH_NUM_NODES),

        DBName=DWH_DB,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        MasterUsername=DWH_DB_USER,
        MasterUserPassword=DWH_DB_PASSWORD,
        
        IamRoles=[DWH_ROLE_ARN]  
    )
except Exception as e:
    print(e)

In [20]:
while True:
    cluster_props = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
    if cluster_props['ClusterStatus'] == 'available':
        break
    else:
        time.sleep(5)

DWH_ENDPOINT = cluster_props['Endpoint']['Address']
prettyRedshiftProps(cluster_props)

NameError: name 'redshift' is not defined

In [13]:
config_dwh = configparser.ConfigParser()

config_dwh['CLUSTER'] = {
    'HOST' : DWH_ENDPOINT,
    'DB_NAME' : config['DWH']['DWH_DB'],
    'DB_USER' : config['DWH']['DWH_DB_USER'],
    'DB_PASSWORD' : config['DWH']['DWH_DB_PASSWORD'],
    'DB_PORT' : config['DWH']['DWH_PORT']
}

config_dwh['IAM_ROLE'] = {'ARN' : DWH_ROLE_ARN}
config_dwh['S3'] = config['S3']

In [14]:
with open('dwh.cfg', 'w') as config_dwh_file:
    config_dwh.write(config_dwh_file)

In [19]:
# DELETE IAM AND REDSHIFT

In [68]:
cluster_props = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(cluster_props)

Unnamed: 0,Key,Value
0,ClusterIdentifier,redshift-cluster
1,NodeType,dc2.large
2,ClusterStatus,available
3,MasterUsername,awsuser
4,DBName,dwh
5,Endpoint,"{'Address': 'redshift-cluster.ck9s1la24v5d.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-5aadc322
7,NumberOfNodes,1


In [69]:
redshift.delete_cluster( ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)

{'Cluster': {'ClusterIdentifier': 'redshift-cluster',
  'NodeType': 'dc2.large',
  'ClusterStatus': 'deleting',
  'MasterUsername': 'awsuser',
  'DBName': 'dwh',
  'Endpoint': {'Address': 'redshift-cluster.ck9s1la24v5d.us-west-2.redshift.amazonaws.com',
   'Port': 5439},
  'ClusterCreateTime': datetime.datetime(2020, 5, 11, 18, 30, 21, 276000, tzinfo=tzlocal()),
  'AutomatedSnapshotRetentionPeriod': 1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-b558d9e4',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-5aadc322',
  'AvailabilityZone': 'us-west-2b',
  'PreferredMaintenanceWindow': 'tue:10:30-tue:11:00',
  'PendingModifiedValues': {},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 1,
  'PubliclyAccessible': True,
  'Encrypted': False,
  'Tags': [],
  'EnhancedVpcRouting': 

In [70]:
cluster_props = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(cluster_props)

Unnamed: 0,Key,Value
0,ClusterIdentifier,redshift-cluster
1,NodeType,dc2.large
2,ClusterStatus,deleting
3,MasterUsername,awsuser
4,DBName,dwh
5,Endpoint,"{'Address': 'redshift-cluster.ck9s1la24v5d.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-5aadc322
7,NumberOfNodes,1


In [71]:
iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)

{'ResponseMetadata': {'RequestId': 'cb4dd87b-9019-4249-8fad-c767963c29bb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cb4dd87b-9019-4249-8fad-c767963c29bb',
   'content-type': 'text/xml',
   'content-length': '200',
   'date': 'Mon, 11 May 2020 21:12:47 GMT'},
  'RetryAttempts': 0}}

In [38]:
# ERROR CHECKING

In [44]:
query = ('''
SELECT * FROM users LIMIT 10; 
''')

In [15]:
query = ('''
SELECT * 
FROM stl_load_errors 
''')

In [45]:
config_conn = configparser.ConfigParser()
config_conn.read('dwh.cfg')

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config_conn['CLUSTER'].values()))
df = pd.read_sql_query(query, conn)
conn.close()

df


Unnamed: 0,user_id,first_name,last_name,gender,level
0,2,Jizelle,Benjamin,F,free
1,3,Isaac,Valdez,M,free
2,4,Alivia,Terrell,F,free
3,5,Elijah,Davis,M,free
4,6,Cecilia,Owens,F,free
5,7,Adelyn,Jordan,F,free
6,8,Kaylee,Summers,F,free
7,9,Wyatt,Scott,M,free
8,10,Sylvie,Cruz,F,free
9,11,Christian,Porter,F,free
