# Jupyter Notebook to create and delete AWS Redshift Cluster

Simple steps to start an AWS Redshift Cluster for Data Wharehouse implementing Infrastructure as Code.

### Index:
1. [Libraries](#1.-Install)
2. [Parameters](#2.-Parameters)
3. [Resources and Clients](#3.-Resources-and-Clients)
4. [Create IAM Role](#4.-Create-IAM-Role)
5. [Create Cluster](#5.-Create-Cluster)
6. [Delete Cluster and IAM Role](#6.-Delete-Cluster-and-IAM-Role)

## 1. Install

In [1]:
# Import all libraries required
import boto3
import json
import pandas as pd
from configparser import ConfigParser

## 2. Parameters 

In [2]:
# Get User Parameters from awsUser.cfg file NEVER SHARE YOU SECRET KEY
config = ConfigParser()
config.read('awsUser.cfg')

KEY= config.get('USER_DETAILS', 'KEY')
SECRET= config.get('USER_DETAILS', 'SECRET')

In [3]:
# Get Redshift parameters

config = ConfigParser()
config.read('DW.cfg')

# DB Parameters
DB_NAME=config.get('CLUSTER', 'DB_NAME')
DB_USER=config.get('CLUSTER', 'DB_USER')
DB_PASSWORD=config.get('CLUSTER', 'DB_PASSWORD')

# DWH Parameters (Query AWS for additional options)
DWH_CLUSTER_TYPE=config.get('CLUSTER', 'DWH_CLUSTER_TYPE')
DWH_NUM_NODES=config.get('CLUSTER', 'DWH_NUM_NODES')
DWH_NODE_TYPE=config.get('CLUSTER', 'DWH_NODE_TYPE')
DWH_PORT=config.get('CLUSTER', 'DWH_PORT')
DWH_REGION=config.get('CLUSTER', 'DWH_REGION')

DWH_IAM_ROLE_NAME=config.get('CLUSTER', 'DWH_IAM_ROLE_NAME')
DWH_CLUSTER_IDENTIFIER=config.get('CLUSTER', 'DWH_CLUSTER_IDENTIFIER')
DWH_DB=config.get('CLUSTER', 'DWH_DB')

## 3. Resources and Clients

In [4]:
# Create Resources and Clients

ec2 = boto3.resource('ec2',
                    region_name=DWH_REGION,
                    aws_access_key_id=KEY,
                    aws_secret_access_key=SECRET)

s3 = boto3.resource('s3',
                    region_name=DWH_REGION,
                    aws_access_key_id=KEY,
                    aws_secret_access_key=SECRET)

iam = boto3.client('iam',
                    region_name=DWH_REGION,
                    aws_access_key_id=KEY,
                    aws_secret_access_key=SECRET)

redshift = boto3.client('redshift',
                    region_name=DWH_REGION,
                    aws_access_key_id=KEY,
                    aws_secret_access_key=SECRET)

## 4. Create IAM Role

In [5]:
# Create IAM Role:
try:
    print('Creating IAM Role:')
    dwhRole = iam.create_role(
        Path='/',
        RoleName=DWH_IAM_ROLE_NAME,
        Description='Allows redshift cluster to call AWS services on your behalf- Udacity project',
        AssumeRolePolicyDocument=json.dumps(
            {'Statement':[{'Action':'sts:AssumeRole',
                        'Effect': 'Allow',
                        'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'})
             )
except Exception as e:
    print(e)

Creating IAM Role:


In [6]:
# Attach IAM role policy

iam.attach_role_policy(RoleName=DWH_IAM_ROLE_NAME,
                      PolicyArn='arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess'
                      )['ResponseMetadata']['HTTPStatusCode']

200

In [7]:
# Get Role ARN

roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']
print(roleArn)


arn:aws:iam::879294216748:role/dwhRole


## 5. Create Cluster

In [8]:
# Create Redshift Cluster

try:
    response = redshift.create_cluster(
        # PARAMETERS FOR HARDWARE
        ClusterType=DWH_CLUSTER_TYPE,
        NodeType=DWH_NODE_TYPE,
        NumberOfNodes=int(DWH_NUM_NODES),
        # PARAMETERS FOR IDENTIFIERS & CREDENTIALS
        DBName=DB_NAME,
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
        MasterUsername=DB_USER,
        MasterUserPassword=DB_PASSWORD,
        #Parameter for role
        IamRoles=[roleArn]
    )
except Exception as e:
    print(e)

In [9]:
# Redshift Cluster Details

# Function to Get Redshift Cluster Details
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', None)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])
    

In [13]:
# Get Redshift Cluster Details, Run until ClusterStatus becomes available
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

Unnamed: 0,Key,Value
0,ClusterIdentifier,dwhcluster
1,NodeType,dc2.large
2,ClusterStatus,available
3,MasterUsername,dwhuser
4,DBName,landtempdb
5,Endpoint,"{'Address': 'dwhcluster.cg959wyk8kyf.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-81bbf0f9
7,NumberOfNodes,4


In [14]:
# Get Cluster Endpoint and Role ARN

DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']

# Update Config file
config.set('CLUSTER','HOST', DWH_ENDPOINT)
config.set('IAM_ROLE','ARN', DWH_ROLE_ARN)

with open('DW.cfg', 'w') as configfile:
    config.write(configfile)

print("DWH_ENDPOINT :: ", DWH_ENDPOINT)
print("DWH_ROLE_ARN :: ", DWH_ROLE_ARN)

DWH_ENDPOINT ::  dwhcluster.cg959wyk8kyf.us-west-2.redshift.amazonaws.com
DWH_ROLE_ARN ::  arn:aws:iam::879294216748:role/dwhRole


In [15]:
# Open Connection To the Cluster

try:
    vpc = ec2.Vpc(id=myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName='default',
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(DWH_PORT),
        ToPort=int(DWH_PORT)
    )
except Exception as e:
    print(e)

ec2.SecurityGroup(id='sg-add3f083')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


## 6. Delete Cluster and IAM Role

In [None]:
# Get Redshift Cluster Details

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

In [None]:
# Delete Cluster

redshift.delete_cluster(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER, SkipFinalClusterSnapshot=True)

In [None]:
# Get Redshift Cluster Details

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

In [None]:
# Detach and Delete IAM Role

iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)

In [None]:
# Get Redshift Cluster Details make sure is deleting

myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

## **DISCLAIMER: MAKE SURE THAT THE CLUSTER IS DELETED TO AVOID GETTING CHARGE**