# Manage Redshift Cluster

This notebook contains code snippets for creating and tearing down a Redshift cluster for the project.

In [1]:
import os
import time
from configparser import ConfigParser
import json
import boto3
from botocore.exceptions import ClientError
import pandas as pd

In [2]:
user = ConfigParser()
user.read('user.cfg')
ACCESS_KEY = user.get('AWS_USER', 'AWS_ACCESS_KEY_ID')
SECRET = user.get('AWS_USER', 'AWS_SECRET_ACCESS_KEY')

In [7]:
config = ConfigParser()
config.optionxform = lambda x: x.upper() # Keep keys uppercase
config.read('dwh.cfg')

['dwh.cfg']

## Create Cluster

In [8]:
redshift = boto3.client(
    'redshift', 
    region_name='us-west-2', 
    aws_access_key_id=ACCESS_KEY,
    aws_secret_access_key=SECRET,
)

In [9]:
response = redshift.create_cluster(
    ClusterIdentifier=config.get('CLUSTER', 'CLUSTER_ID'),
    ClusterType=config.get('CLUSTER', 'CLUSTER_TYPE'),
    NodeType=config.get('CLUSTER', 'NODE_TYPE'),
    NumberOfNodes=int(config.get('CLUSTER', 'NUMBER_OF_NODES')),
    DBName=config.get('CLUSTER', 'DB_NAME'),
    Port=int(config.get('CLUSTER', 'DB_PORT')),
    MasterUsername=config.get('CLUSTER', 'DB_USER'),
    MasterUserPassword=config.get('CLUSTER', 'DB_PASSWORD'),
    IamRoles=[config.get('IAM_ROLE', 'ARN')],
)

In [10]:
cluster = response['Cluster']

In [11]:
# Poll for cluster status every ~30 sec
counter = 0
while cluster['ClusterStatus'] != 'available':
    time.sleep(30)
    counter += 1 
    cluster = (
        redshift
        .describe_clusters(ClusterIdentifier=config.get('CLUSTER', 'CLUSTER_ID'))
        ['Clusters'][0]
    )
    print(f'ClusterStatus after {counter} tries: {cluster["ClusterStatus"]}', end='\r')                

ClusterStatus after 5 tries: available

## Save Connection Details

In [12]:
DWH_ENDPOINT = cluster['Endpoint']['Address']
DWH_PORT = config.get('CLUSTER', 'DB_PORT')
DB_NAME = config.get('CLUSTER', 'DB_NAME')
DB_USER = config.get('CLUSTER', 'DB_USER')
DB_PASSWORD = config.get('CLUSTER', 'DB_PASSWORD')
CONNECTION = f'postgres://{DB_USER}:{DB_PASSWORD}@{DWH_ENDPOINT}:{DWH_PORT}/{DB_NAME}'

In [13]:
# Set HOST in dwh.cfg for later use
config.set('CLUSTER', 'HOST', DWH_ENDPOINT)
with open('dwh.cfg', 'w') as f:
    config.write(f, space_around_delimiters=False)

## Delete Cluster

In [22]:
r = redshift.delete_cluster(
    ClusterIdentifier=config.get('CLUSTER', 'CLUSTER_ID'), 
    SkipFinalClusterSnapshot=True
)
r['Cluster']['ClusterStatus']

{'Cluster': {'ClusterIdentifier': 'dend-1',
  'NodeType': 'dc2.large',
  'ClusterStatus': 'deleting',
  'ClusterAvailabilityStatus': 'Modifying',
  'MasterUsername': 'awsuser',
  'DBName': 'dend-dwh-project',
  'Endpoint': {'Address': 'dend-1.cgepvlalvgqs.us-west-2.redshift.amazonaws.com',
   'Port': 5439},
  'ClusterCreateTime': datetime.datetime(2020, 10, 11, 18, 43, 58, 716000, tzinfo=tzutc()),
  'AutomatedSnapshotRetentionPeriod': 1,
  'ManualSnapshotRetentionPeriod': -1,
  'ClusterSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-513e617c',
    'Status': 'active'}],
  'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0',
    'ParameterApplyStatus': 'in-sync'}],
  'ClusterSubnetGroupName': 'default',
  'VpcId': 'vpc-bd0449c5',
  'AvailabilityZone': 'us-west-2c',
  'PreferredMaintenanceWindow': 'wed:11:30-wed:12:00',
  'PendingModifiedValues': {},
  'ClusterVersion': '1.0',
  'AllowVersionUpgrade': True,
  'NumberOfNodes': 4,
  'PubliclyAcce

In [23]:
# Unset HOST in config
config.set('CLUSTER', 'HOST', '')
with open('dwh.cfg', 'w') as f:
    config.write(f, space_around_delimiters=False)