# AWS Setup using IaC 
- STEP 0: Save AWS `SECRET` and access `KEY` from AWS Console
- STEP 1: Create IAM ROLE / Save the `IAM_ROLE_ARN`
- STEP 2: Create Redshift Cluster / Save `CLUSTER_ENDPOINT` 
- STEP 3: Open an incoming  TCP port to access the `CLUSTER_ENDPOINT`
- STEP 4: Test Cluster Connection
- <font color='red'>STEP 5: Clean up the resources</font> 

<img src="images/aws_setup_step.png" width="85%"/>

In [178]:
import pandas as pd
import boto3
import json
import configparser
config = configparser.ConfigParser()

# STEP 0: Get AWS `SECRET` and access `KEY` from AWS Console

- Create a new [IAM User](https://console.aws.amazon.com/iam/home#/users) in your AWS account
- Give it `AdministratorAccess`, From `Attach existing policies directly` Tab
- Add `KEY` and `SECRET`  to `dwh.cfg` 


# STEP 1: Create IAM ROLE / Get the IAM role ARN
- Create an IAM Role that makes Redshift able to access S3 bucket (ReadOnly)

In [179]:
#read instances from config
config.read_file(open('dwh.cfg'))

KEY                = config.get('AWS','KEY')
SECRET             = config.get('AWS','SECRET')
IAM_ROLE_NAME      = config.get("IAM_ROLE", "IAM_ROLE_NAME")

## 1.0 Create Client for IAM

In [180]:
#Create Client for IAM,
iam = boto3.client('iam',aws_access_key_id=KEY,
                     aws_secret_access_key=SECRET,
                     region_name='us-west-2'
                  )

## 1.1 Create new IAM Role

In [181]:
from botocore.exceptions import ClientError
 
try:
    print("Creating a new IAM Role...") 
    dwhRole = iam.create_role(
        Path='/',
        RoleName=IAM_ROLE_NAME,
        Description = "Allows Redshift clusters to call AWS services on your behalf.",
        AssumeRolePolicyDocument=json.dumps(
            {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'})
    )    
except Exception as e:
    print(e)
    
    
print("Attaching Policy...")

iam.attach_role_policy(RoleName=IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

roleCreateDate = iam.get_role(RoleName=IAM_ROLE_NAME)['Role']['CreateDate']
print("Successfully created at",roleCreateDate)

Creating a new IAM Role...
Attaching Policy...
Successfully created at 2020-05-31 07:50:11+00:00


## 1.2 Get the IAM role ARN and save in `dwh.cfg`

In [182]:
roleArn = iam.get_role(RoleName=IAM_ROLE_NAME)['Role']['Arn']
#print("IAM_ROLE_ARN ::",roleArn) # add to config

# STEP 2:  Create Redshift Cluster / Get `Cluster Endpoint` 

- Create a RedShift Cluster. For arguments to `create_cluster`, see [docs](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.create_cluster)

In [183]:
#read instances from config
config.read_file(open('dwh.cfg'))
#from step 0
KEY                = config.get('AWS','KEY')
SECRET             = config.get('AWS','SECRET')

CLUSTER_TYPE       = config.get("CLUSTER_SETUP","CLUSTER_TYPE")
NUM_NODES          = config.get("CLUSTER_SETUP","NUM_NODES")
NODE_TYPE          = config.get("CLUSTER_SETUP","NODE_TYPE")
CLUSTER_IDENTIFIER = config.get("CLUSTER_SETUP","CLUSTER_IDENTIFIER")
 
DB_NAME            = config.get("CLUSTER","DB_NAME")
DB_USER            = config.get("CLUSTER","DB_USER")
DB_PASSWORD        = config.get("CLUSTER","DB_PASSWORD")
DB_PORT            = config.get("CLUSTER","DB_PORT")

#from step 1 
IAM_ROLE_ARN      = config.get("IAM_ROLE","IAM_ROLE_ARN") 


## 2.0 Create Client for Redshift

In [184]:
# Create Client for Redshift,
redshift = boto3.client('redshift',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

## 2.1 Create Redshift Cluster

In [185]:
try:
    response = redshift.create_cluster(        
        #HW
        ClusterType=CLUSTER_TYPE,
        NodeType=NODE_TYPE,
        NumberOfNodes=int(NUM_NODES),

        #Identifiers & Credentials
        DBName=DB_NAME,
        ClusterIdentifier=CLUSTER_IDENTIFIER,
        MasterUsername=DB_USER,
        MasterUserPassword=DB_PASSWORD,
        
        #Roles (for s3 access)
        IamRoles=[IAM_ROLE_ARN]#from step 1  
    )
except Exception as e:
    print(e)

## 2.2 *Describe* the cluster to see its status
- run this block several times until the cluster status becomes <font color='green'> "Available" </font>

In [186]:
#to print redshift details including status
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', 0)
    keysToShow = ["ClusterIdentifier", 
                  "NodeType", 
                  "ClusterStatus", 
                  "MasterUsername", 
                  "DBName", 
                  "Endpoint", 
                  "NumberOfNodes", 
                  "VpcId",
                  "IamRoles"]
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    df = pd.DataFrame(data=x, columns=["Key", "Value"])
    display(df)

In [187]:
#prettyRedshiftProps(redshift.describe_clusters(ClusterIdentifier=CLUSTER_IDENTIFIER)['Clusters'][0])
print("Status:",redshift.describe_clusters(ClusterIdentifier=CLUSTER_IDENTIFIER)['Clusters'][0]['ClusterStatus'])

Status: creating


#### <font color='red'>RUN this unless after cluster status becomes</font> <font color='green'>"Available"</font> 
## 2.3 Save the cluster endpoint in `dwh.cfg`
- Add CLUSTER_ENDPOINT as HOST to `dwh.cfg`

In [166]:
##add HOST to config file.
#print("CLUSTER_ENDPOINT :: ", myClusterProps['Endpoint']['Address'])

##uncomment below to get ARN
#print("IAM_ROLE_ARN :: ", myClusterProps['IamRoles'][0]['IamRoleArn'])

# STEP 3: Open an incoming  TCP port to access the `CLUSTER_ENDPOINT`

In [167]:
#read instances from config
config.read_file(open('dwh.cfg'))

KEY                = config.get('AWS','KEY')
SECRET             = config.get('AWS','SECRET')
DB_PORT            = config.get("CLUSTER","DB_PORT")

VPC_ID             = myClusterProps['VpcId']#from step 2

## 3.0 Create Client for EC2

In [168]:
# Create Client for EC2,
ec2 = boto3.resource('ec2',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                    )

## 3.1 Open TCP port

In [169]:
try:
    vpc = ec2.Vpc(id=VPC_ID)
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName=defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(DB_PORT),
        ToPort=int(DB_PORT)
    )
except Exception as e:
    print(e)

ec2.SecurityGroup(id='sg-70b88221')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


# STEP 4: Test Cluster Connection

In [170]:
#read instances from config
config.read_file(open('dwh.cfg'))

DB_NAME            = config.get("CLUSTER","DB_NAME")
DB_USER            = config.get("CLUSTER","DB_USER")
DB_PASSWORD        = config.get("CLUSTER","DB_PASSWORD")
DB_PORT            = config.get("CLUSTER","DB_PORT")

HOST               = config.get("CLUSTER","HOST") #from step 2

## 4.1 Test connection to PostgeSQL database

In [173]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD,HOST,DB_PORT,DB_NAME)
#print(conn_string)
%sql $conn_string

(psycopg2.OperationalError) could not translate host name "dwhcluster.cgnkpe4iotji.us-west-2.redshift.amazonaws.com" to address: nodename nor servname provided, or not known

(Background on this error at: http://sqlalche.me/e/e3q8)
Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys(['postgresql://dwhuser:***@dwhcluster.cgnkpe4iotji.us-west-2.redshift.amazonaws.com:5439/dwh'])


### <font color='red'>When finished, run this remove resources to prevent unneccessary AWS service charge.</font>
# <font color='red'>STEP 5: Clean up the resources</font> 


In [174]:
#read instances from config
config.read_file(open('dwh.cfg'))

CLUSTER_IDENTIFIER = config.get("CLUSTER_SETUP","CLUSTER_IDENTIFIER")
IAM_ROLE_NAME = config.get("IAM_ROLE","IAM_ROLE_NAME")

## 5.1 Delete Redshift Cluster

In [175]:
redshift.delete_cluster( ClusterIdentifier=CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)

{'Cluster': {'ClusterIdentifier': 'dwhcluster',
  'NodeType': 'dc2.large',
  'ClusterStatus': 'deleting',
  'ClusterAvailabilityStatus': 'Modifying',
  'MasterUsername': 'dwhuser',
  'DBName': 'dwh',
  'Endpoint': {'Address': 'dwhcluster.cgnkpe4iotji.us-west-2.redshift.amazonaws.com',
   'Port': 5439},
  'ClusterCreateTime': datetime.datetime(2020, 5, 30, 8, 23, 15, 234000, tzinfo=tzutc()),
  'AutomatedSnapshotRetentionPeriod': 1,
  'ManualSnapshotRetentionPeriod': -1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-70b88221',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-2180e959',
  'AvailabilityZone': 'us-west-2b',
  'PreferredMaintenanceWindow': 'wed:08:00-wed:08:30',
  'PendingModifiedValues': {},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 4,
  'PubliclyAccessible'

## 5.2 Check cluster status

In [176]:
myClusterProps = redshift.describe_clusters(ClusterIdentifier=CLUSTER_IDENTIFIER)['Clusters'][0]
print("Cluster Identifier:",myClusterProps['ClusterIdentifier'])
print("DBName:",myClusterProps['DBName'])
print("Cluster Status:",myClusterProps['ClusterStatus'])
#prettyRedshiftProps(myClusterProps)


Cluster Identifier: dwhcluster
DBName: dwh
Cluster Status: deleting


## 5.3 Delete IAM Role

In [177]:
iam.detach_role_policy(RoleName=IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=IAM_ROLE_NAME)

{'ResponseMetadata': {'RequestId': '2b3ab30a-e5a5-4b42-bb46-a4b90bed5a0c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '2b3ab30a-e5a5-4b42-bb46-a4b90bed5a0c',
   'content-type': 'text/xml',
   'content-length': '200',
   'date': 'Sat, 30 May 2020 08:49:53 GMT'},
  'RetryAttempts': 0}}