### Importing the libraries

In [1]:
import boto3
import configparser


### Reading credentials from 'dwh.cfg' file

In [2]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

HOST = config.get('CLUSTER', 'HOST')
CLUSTER_TYPE = config.get('CLUSTER', 'CLUSTER_TYPE')
NODE_TYPE = config.get('CLUSTER', 'NODE_TYPE')
NUMBER_OF_NODES = config.get('CLUSTER', 'NUMBER_OF_NODES')
CLUSTER_IDENTIFIER = config.get('CLUSTER', 'CLUSTER_IDENTIFIER')
DB_NAME = config.get('CLUSTER', 'DB_NAME')
DB_USER = config.get('CLUSTER', 'DB_USER')
DB_PASSWORD = config.get('CLUSTER', 'DB_PASSWORD')
DB_PORT = config.get('CLUSTER', 'DB_PORT')
ARN = config.get('IAM_ROLE', 'ARN')
LOG_DATA = config.get('S3', 'LOG_DATA')
LOG_JSONPATH = config.get('S3', 'LOG_JSONPATH')
SONG_DATA = config.get('S3', 'SONG_DATA')
KEY = config.get('USER', 'KEY')
SECRET = config.get('USER', 'SECRET')



### Creating clients for EC2, S3 and Redshift

In [3]:
ec2 = boto3.resource('ec2', region_name = 'us-west-2', aws_access_key_id = KEY, aws_secret_access_key = SECRET)
s3 = boto3.resource('s3', region_name = 'us-west-2', aws_access_key_id = KEY, aws_secret_access_key = SECRET)
redshift = boto3.client('redshift', region_name = 'us-west-2', aws_access_key_id = KEY, aws_secret_access_key = SECRET)

### Creating the Redshift cluster

In [5]:
from botocore.exceptions import ClientError

try:
    response = redshift.create_cluster(
    ClusterType = CLUSTER_TYPE,
    NodeType = NODE_TYPE,
    NumberOfNodes = int(NUMBER_OF_NODES),
    DBName = DB_NAME,
    ClusterIdentifier = CLUSTER_IDENTIFIER,
    MasterUsername = DB_USER,
    MasterUserPassword = DB_PASSWORD, 
    IamRoles = [ARN]
    )
    
except Exception as e:
    print(e)

An error occurred (InvalidClientTokenId) when calling the CreateCluster operation: The security token included in the request is invalid.


### Describing the cluster

In [5]:
import pandas as pd

def prettyRedshiftProps(props):
    pd.set_option('display.max_colwidth', -1)
    keyToShow = ['ClusterIdentifier', 'NodeType', 'ClusterStatus', 'MasterUsername', 'DBName', 'EndPoint', 
                'NumberOfNodes', 'VPCID', 'Endpoint']
    x = [(k,v) for k,v in props.items() if k in keyToShow]
    return pd.DataFrame(data = x, columns = ['key', 'value'])

myClusterProps = redshift.describe_clusters(ClusterIdentifier = 'dwhcluster')['Clusters'][0]

prettyRedshiftProps(myClusterProps)
    

Unnamed: 0,key,value
0,ClusterIdentifier,dwhcluster
1,NodeType,dc2.large
2,ClusterStatus,available
3,MasterUsername,dwh_user
4,DBName,dev
5,Endpoint,"{'Address': 'dwhcluster.cik8ixm9rejp.us-west-2.redshift.amazonaws.com', 'Port': 5439}"
6,NumberOfNodes,4


In [6]:
DWH_ENDPOINT = myClusterProps['Endpoint']['Address']

print("DWH_ENDPOINT :: ", DWH_ENDPOINT)


DWH_ENDPOINT ::  dwhcluster.cik8ixm9rejp.us-west-2.redshift.amazonaws.com


### Opening an incoming TCP port to access the cluster endpoint

In [7]:
try:
    vpc = ec2.Vpc(id = myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName = defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(DB_PORT),
        ToPort=int(DB_PORT)
    )
    
except Exception as e:
    print(e)
        


ec2.SecurityGroup(id='sg-0bd22b3186135653e')
An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists


### Attempting to connect to the cluster

In [8]:
%load_ext sql

In [9]:
conn_string = "postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)

In [10]:
print(conn_string)
%sql $conn_string

postgresql://dwh_user:Pemk2017@dwhcluster.cik8ixm9rejp.us-west-2.redshift.amazonaws.com:5439/dev


'Connected: dwh_user@dev'