# Set up AWS IAM role, Redshift Cluster, TCP port

In [None]:

import pandas as pd
import boto3
import json

### Data warehouse config loading

In [None]:
# This is where I create variables from my config file that includes the AWS, IAM and Redshift cluster credentials.
# Not sharing for security reasons

import configparser
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))



### Clients for EC2, S3, IAM, Redshift

In [None]:
ec2 = boto3.resource('ec2',
                    region_name = 'us-west-2',
                    aws_access_key_id = KEY,
                    aws_secret_access_key = SECRET)

s3 = boto3.resource('s3',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                   )

iam = boto3.client('iam',aws_access_key_id=KEY,
                     aws_secret_access_key=SECRET,
                     region_name='us-west-2'
                  )

redshift = boto3.client('redshift',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

### View data in S3 buckets

In [None]:
# function to iterate over S3 bucket content

def bucket_content(bucket, prefix: str):
    records = 0
    for obj in bucket.objects.filter(Prefix = prefix):
        records += 1
    print(f'Total records in bucket {bucket.name} with prefix {prefix}: {records}')    


In [None]:
# Log Bucket
bucket_name = s3.Bucket('udacity-dend')
log_bucket = bucket_content(bucket_name, 'log_data')

log_bucket

In [None]:
# Song bucket
song_bucket = bucket_content(bucket_name, 'song_data')

song_bucket

In [None]:
json_bucket = bucket_content(bucket_name, 'log_json')

json_bucket

### Create IAM Role

In [None]:
try:
    print('1.1 Creating a new IAM Role')
    dwhRole = iam.create_role(
        Path = '/',
        RoleName = DWH_IAM_ROLE_NAME,
        Description = 'Allows Redshift clusters to call AWS services on your behalf.',
        AssumeRolePolicyDocument = json.dumps(
             {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'}
        )
    )
    
except Exception as e:
    print(e)

In [None]:
# Attach role policy
print('1.2 Attaching Policy')
iam.attach_role_policy(RoleName = DWH_IAM_ROLE_NAME,
                      PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']


In [None]:
# Print role ARN
print('1.3 Get the IAM role ARN')
roleArn = iam.get_role(RoleName = DWH_IAM_ROLE_NAME)['Role']['Arn']

print(roleArn)

### Create Redshift Cluster

In [None]:
try:
    response = redshift.create_cluster( 
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        NumberOfNodes=int(DWH_NUM_NODES),
        DBName=DWH_DB,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        MasterUsername=DWH_DB_USER,
        MasterUserPassword=DWH_DB_PASSWORD,
        IamRoles=[roleArn]
    )
except Exception as e:
    print(e)

In [None]:
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

In [None]:
# Cluster info: endpoint and iam role arn
endpoint = myClusterProps['Endpoint']['Address']
roleArn = myClusterProps['IamRoles'][0]['IamRoleArn']
print("DWH_ENDPOINT :: ", endpoint)
print("DWH_ROLE_ARN :: ", roleArn)

### TCP port access for the cluster endpoint

In [None]:
try:
    vpc = ec2.Vpc(id=myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    
    defaultSg.authorize_ingress(
        GroupName= 'default',
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP', 
        FromPort=int(DWH_PORT),
        ToPort=int(DWH_PORT)
    )
except Exception as e:
    print(e)

### Connect to cluster

In [None]:
%load_ext sql

In [None]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT,DWH_DB)
print(conn_string)
%sql $conn_string

### Test a few queries

first run create_tables.py to create tables

In [None]:
%%time
%%sql

SELECT * FROM staging_events
LIMIT 5;

In [None]:
%%time
%%sql
SELECT * FROM staging_songs
LIMIT 5;

In [None]:
%%time
%%sql
SELECT COUNT(*) FROM songplays;

In [None]:
%%time
%%sql
SELECT COUNT(*) FROM users;

### Delete Redshift Cluster, detach role policy, delete role

In [None]:
# delete cluster
redshift.delete_cluster( ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)

In [None]:
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

In [None]:
# Detach role policy
iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")

In [None]:
# Delete role
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)