In [40]:
import configparser
import boto3
import pandas as pd
import time
from botocore.exceptions import ClientError
from shutil import copyfile
import os
import json

In [41]:
# Create Environment
config = configparser.ConfigParser()
config.read_file(open('../credentials.cfg'))

KEY                = config.get('CREDENTIALS','KEY')
SECRET             = config.get('CREDENTIALS','SECRET')


In [42]:
try:
    os.remove("../dwh.cfg")
except:
    pass
copyfile("../dwh_original.cfg", "../dwh.cfg")

'../dwh.cfg'

In [43]:
# Create Environment
config = configparser.ConfigParser()
config.read_file(open('../dwh.cfg'))

IAM_ROLE_NAME      = config.get("IAM_ROLE", "IAM_ROLE_NAME")

HOST               = config.get("CLUSTER","HOST")
DB                 = config.get("CLUSTER","DB_NAME")
DB_USER            = config.get("CLUSTER","DB_USER")
DB_PASSWORD        = config.get("CLUSTER","DB_PASSWORD")
PORT               = config.get("CLUSTER","DB_PORT")

LOG_DATA           = config.get("S3","LOG_DATA")
LOG_JSONPATH       = config.get("S3","LOG_JSONPATH")
SONG_DATA          = config.get("S3","SONG_DATA")

CLUSTER_TYPE       = config.get("PARAMS","CLUSTER_TYPE")
NUM_NODES          = config.get("PARAMS","NUM_NODES")
NODE_TYPE          = config.get("PARAMS","NODE_TYPE")
ZONE               = config.get("PARAMS","ZONE")

## STEP 1: Create Environment
### 1.1 Create clients: IAM, S3, REDSHIFT

In [44]:
print("Creating EC2, IAM, S3, REDSHIFT clientes")

ec2 = boto3.resource('ec2',
                       region_name=ZONE,
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                    )
                     
s3 = boto3.resource('s3',
                       region_name=ZONE,
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                   )
iam = boto3.client('iam',
                     region_name=ZONE,
                     aws_access_key_id=KEY,
                     aws_secret_access_key=SECRET
                  )

redshift = boto3.client('redshift',
                       region_name=ZONE,
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                       )

Creating EC2, IAM, S3, REDSHIFT clientes


### 1.2 Create empty IAM Role

In [28]:
print("Creating an empty IAM Role")

try:
    empty_Role = iam.create_role(
                        Path='/',
                        RoleName=IAM_ROLE_NAME,
                        Description = "Allows Redshift clusters to call AWS services on your behalf.",
                        AssumeRolePolicyDocument=json.dumps(
                            {'Statement': [{'Action': 'sts:AssumeRole',
                               'Effect': 'Allow',
                               'Principal': {'Service': 'redshift.amazonaws.com'}}],
                             'Version': '2012-10-17'})
    )    
    print(empty_Role)
except Exception as e:
    print(e)


Creating an empty IAM Role
{'Role': {'Path': '/', 'RoleName': 'udacityUser', 'RoleId': 'AROA55MZKLWDUM4KXX7C3', 'Arn': 'arn:aws:iam::956488637831:role/udacityUser', 'CreateDate': datetime.datetime(2020, 12, 2, 21, 37, 9, tzinfo=tzutc()), 'AssumeRolePolicyDocument': {'Statement': [{'Action': 'sts:AssumeRole', 'Effect': 'Allow', 'Principal': {'Service': 'redshift.amazonaws.com'}}], 'Version': '2012-10-17'}}, 'ResponseMetadata': {'RequestId': '226b01d5-608c-4956-8ddc-2cdacedc5aa8', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '226b01d5-608c-4956-8ddc-2cdacedc5aa8', 'content-type': 'text/xml', 'content-length': '778', 'date': 'Wed, 02 Dec 2020 21:37:09 GMT'}, 'RetryAttempts': 0}}


### 1.3 Attach policy to IAM role & Save to dwh.cfg

In [7]:
print("Attaching 'S3 Read Only' policy to IAM Role")

iam.attach_role_policy(RoleName=IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

roleArn = iam.get_role(RoleName=IAM_ROLE_NAME)['Role']['Arn']

print(roleArn)


Attaching 'S3 Read Only' policy to IAM Role
arn:aws:iam::956488637831:role/udacityUser


### 1.4 Create REDSHIFT Clusterb

In [8]:
print(f"""Creating RESHIFT cluster with: {CLUSTER_TYPE} - {NODE_TYPE} - {NUM_NODES} nodes""")

try:
    response = redshift.create_cluster(        
        #HW
        ClusterType=CLUSTER_TYPE,
        NodeType=NODE_TYPE,
        NumberOfNodes=int(NUM_NODES),

        #Identifiers & Credentials
        DBName=DB,
        ClusterIdentifier=HOST,
        MasterUsername=DB_USER,
        MasterUserPassword=DB_PASSWORD,
        
        #Roles (for s3 access)
        IamRoles=[roleArn]  
    )
except Exception as e:
    print(e)
    

Creating RESHIFT cluster with: multi-node - dc2.large - 4 nodes
An error occurred (ClusterAlreadyExists) when calling the CreateCluster operation: Cluster already exists


In [9]:
print("Time to grab a cup of coffee, this may take a up to five minutes...")

myClusterProps = redshift.describe_clusters(ClusterIdentifier=HOST)['Clusters'][0]

while myClusterProps['ClusterStatus'] != 'available':
    myClusterProps = redshift.describe_clusters(ClusterIdentifier=HOST)['Clusters'][0]
    print(".", end = "")
    time.sleep(3)


print("Cluster is now available")


Time to grab a cup of coffee, this may take a up to five minutes...
Cluster is now available


In [10]:
def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keysToShow = ["ClusterIdentifier", "NodeType", "ClusterStatus", "MasterUsername", "DBName", "Endpoint", "NumberOfNodes", 'VpcId']
    x = [(k, v) for k,v in props.items() if k in keysToShow]
    return pd.DataFrame(data=x, columns=["Key", "Value"])
prettyRedshiftProps(myClusterProps)

  


Unnamed: 0,Key,Value
0,ClusterIdentifier,udacitycluster
1,NodeType,dc2.large
2,ClusterStatus,available
3,MasterUsername,ucaityuser
4,DBName,musicserver
5,Endpoint,"{'Address': 'udacitycluster.crqztkqnczh2.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,VpcId,vpc-589ecb20
7,NumberOfNodes,4


### 1.5 OPEN TCP PORT

In [11]:
print("Opening TCP port to enable jupyter connection...")

try:
    vpc = ec2.Vpc(id=myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName=defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(PORT),
        ToPort=int(PORT)
    )
except Exception as e:
    print(e)

Opening TCP port to enable jupyter connection...
ec2.SecurityGroup(id='sg-74600855')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


### 1.6 Save to file ENDPOINT and ANR

In [12]:
with open('../dwh.cfg', 'a') as f:
    f.write("\nARN="+myClusterProps['IamRoles'][0]['IamRoleArn'])
    f.write("\nENDPOINT="+myClusterProps['Endpoint']['Address'])
    
    

### 1.7 Caution msg

In [13]:
print("CAUTION! - Dont forget to manually delete created resources, or execute 'delete_resources.py")

CAUTION! - Dont forget to manually delete created resources, or execute 'delete_resources.py


In [30]:
redshift.delete_cluster( ClusterIdentifier=HOST,  SkipFinalClusterSnapshot=True)
iam.detach_role_policy(RoleName=IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=IAM_ROLE_NAME)

InvalidClusterStateFault: An error occurred (InvalidClusterState) when calling the DeleteCluster operation: There is an operation running on the Cluster. Please try to delete it at a later time.

In [16]:
!which python3

/Users/miruiz/anaconda3/bin/python3
