# This notebook is used to create the relevant IAM roles, policy, RedShift cluster and secret in Secret Manager.

## Variables

In [2]:
secret_name='bankdemo_redshift_login' 

## The variables below are only required for notebook 01
database_name_redshift = 'bankdemo'
database_name_athena = 'bankdemo'

schema_redshift = 'dm'
schema_athena = 'athena' # have to be called athena

table_name_glue = 'bankdemo_glue'
table_name_redshift = 'data'


# Redshift configuration parameters
redshift_cluster_identifier = 'bankdemo'
database_name = 'bankdemo'
cluster_type = 'single-node' # or multi-node

master_user_name = 'bankdemo'
master_user_pw = random_char(16) + '1' # the password requires a number

# Note that only some Instance Types support Redshift Query Editor 
# (https://docs.aws.amazon.com/redshift/latest/mgmt/query-editor.html)
node_type = 'dc2.large'
# number_nodes = '1' 

# Set the security group ID if there are more than one
security_group_id = 'sg-044c6f07030b3be91'


## Setup IAM Access To Read From S3 and Athena



In [1]:
import json
import boto3
from botocore.exceptions import ClientError
from botocore.config import Config
import time
import random
import string

config = Config(
   retries = {
      'max_attempts': 10,
      'mode': 'adaptive'
   }
)


iam = boto3.client('iam', config=config)
sts = boto3.client('sts')
accountID = sts.get_caller_identity()["Account"]  
redshift = boto3.client('redshift')
sm = boto3.client('sagemaker')
ec2 = boto3.client('ec2')

def random_char(y):
       return ''.join(random.choice(string.ascii_letters) for x in range(y))

### Create AssumeRolePolicyDocument

In [3]:
role = f"arn:aws:iam::{accountID}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"
assume_role_policy_doc = {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": role,
        "Service": ["sagemaker.amazonaws.com", "redshift.amazonaws.com"]
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

assume_role_policy_doc

{'Version': '2012-10-17',
 'Statement': [{'Effect': 'Allow',
   'Principal': {'AWS': 'arn:aws:iam::138604873012:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole',
    'Service': ['sagemaker.amazonaws.com', 'redshift.amazonaws.com']},
   'Action': 'sts:AssumeRole'}]}

### Create Role

In [4]:
iam_redshift_role_name = 'BankDemo'

In [5]:
try:
    iam_role_redshift = iam.create_role(
        RoleName=iam_redshift_role_name,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
        Description='Bank Redshift Role'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Role already exists")
    else:
        print("Unexpected error: %s" % e)

Role already exists


#### Get the Role ARN

In [6]:
role = iam.get_role(RoleName=iam_redshift_role_name)
iam_role_redshift_arn = role['Role']['Arn']
print(iam_role_redshift_arn)

arn:aws:iam::138604873012:role/BankDemo


## Create Self-Managed Policies

### Define Policies

#### arn:aws:iam::aws:policy/AmazonS3FullAccess

In [7]:
my_redshift_to_s3 = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": "s3:*",
            "Resource": "*"
        }
    ]
}

#### arn:aws:iam::aws:policy/AmazonAthenaFullAccess

In [8]:
my_redshift_to_athena = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "athena:*"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "glue:CreateDatabase",
                "glue:DeleteDatabase",
                "glue:GetDatabase",
                "glue:GetDatabases",
                "glue:UpdateDatabase",
                "glue:CreateTable",
                "glue:DeleteTable",
                "glue:BatchDeleteTable",
                "glue:UpdateTable",
                "glue:GetTable",
                "glue:GetTables",
                "glue:BatchCreatePartition",
                "glue:CreatePartition",
                "glue:DeletePartition",
                "glue:BatchDeletePartition",
                "glue:UpdatePartition",
                "glue:GetPartition",
                "glue:GetPartitions",
                "glue:BatchGetPartition"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetBucketLocation",
                "s3:GetObject",
                "s3:ListBucket",
                "s3:ListBucketMultipartUploads",
                "s3:ListMultipartUploadParts",
                "s3:AbortMultipartUpload",
                "s3:CreateBucket",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::aws-athena-query-results-*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::athena-examples*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:ListBucket",
                "s3:GetBucketLocation",
                "s3:ListAllMyBuckets"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "sns:ListTopics",
                "sns:GetTopicAttributes"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "cloudwatch:PutMetricAlarm",
                "cloudwatch:DescribeAlarms",
                "cloudwatch:DeleteAlarms"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "lakeformation:GetDataAccess"
            ],
            "Resource": [
                "*"
            ]
        }
    ]
}

### Create Policy Objects

In [9]:
policyName = 'BankDemo_RedshiftPolicyToS3'
try:
    policy_redshift_s3 = iam.create_policy(
      PolicyName=policyName,
      PolicyDocument=json.dumps(my_redshift_to_s3)
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy already exists")
    else:
        print("Unexpected error: %s" % e)

Policy already exists


In [10]:
# Get ARN
account_id = sts.get_caller_identity()['Account']
policy_redshift_s3_arn = f'arn:aws:iam::{account_id}:policy/{policyName}'
print(policy_redshift_s3_arn)


arn:aws:iam::138604873012:policy/BankDemo_RedshiftPolicyToS3


In [11]:
policyName = 'BankDemo_RedshiftPolicyToAthena'
try:
    policy_redshift_athena = iam.create_policy(
      PolicyName=policyName,
      PolicyDocument=json.dumps(my_redshift_to_athena)
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy already exists")
    else:
        print("Unexpected error: %s" % e)

Policy already exists


In [12]:
# Get ARN
account_id = sts.get_caller_identity()['Account']
policy_redshift_athena_arn = f'arn:aws:iam::{account_id}:policy/{policyName}'
print(policy_redshift_athena_arn)


arn:aws:iam::138604873012:policy/BankDemo_RedshiftPolicyToAthena


### Attach Policies To Role

In [13]:
# Attach RedshiftPolicyToAthena policy
try:
    response = iam.attach_role_policy(
        PolicyArn=policy_redshift_athena_arn,
        RoleName=iam_redshift_role_name
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)

# Attach RedshiftPolicyToS3 policy
try:
    response = iam.attach_role_policy(
        PolicyArn=policy_redshift_s3_arn,
        RoleName=iam_redshift_role_name
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)
        

In [14]:
# Attach SecretsManager policy
try:
    response = iam.attach_role_policy(
        PolicyArn='arn:aws:iam::aws:policy/SecretsManagerReadWrite',
        RoleName=iam_redshift_role_name
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)
        
# Attach RedShift policy
try:
    response = iam.attach_role_policy(
        PolicyArn='arn:aws:iam::aws:policy/AmazonRedshiftFullAccess',
        RoleName=iam_redshift_role_name
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)

# Attach SageMaker policy
try:
    response = iam.attach_role_policy(
        PolicyArn='arn:aws:iam::aws:policy/AmazonSageMakerFullAccess',
        RoleName=iam_redshift_role_name
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy is already attached. This is ok.")
    else:
        print("Unexpected error: %s" % e)

### Get Security Group ID 

* Make sure the Redshift VPC is the same this notebook is running within
* Make sure the VPC has the following 2 properties enabled
 *     DNS resolution = Enabled
 *     DNS hostnames = Enabled
* This allows private, internal access to Redshift from this SageMaker notebook using the fully qualified endpoint name.

In [15]:
# for sagemaker studio
# try:
#     domain_id = sm.list_domains()['Domains'][0]['DomainId'] #['NotebookInstances'][0]['NotebookInstanceName']
#     describe_domain_response = sm.describe_domain(DomainId=domain_id)
#     vpc_id = describe_domain_response['VpcId']
#     security_groups = ec2.describe_security_groups()['SecurityGroups']
#     for security_group in security_groups:
#         if vpc_id == security_group['VpcId']:
#             security_group_id = security_group['GroupId']
#     print(security_group_id)    
# except:
#     pass



In [16]:
# for sagemaker notebook
# try:
#     notebook_instance_name = sm.list_notebook_instances()['NotebookInstances'][0]['NotebookInstanceName']
#     notebook_instance = sm.describe_notebook_instance(NotebookInstanceName=notebook_instance_name)
#     security_group_id = notebook_instance['SecurityGroups'][0]
#     print(security_group_id)    
# except:
#     pass

## Create Redshift Cluster

In [17]:
response = redshift.create_cluster(
        DBName=database_name,
        ClusterIdentifier=redshift_cluster_identifier,
        ClusterType=cluster_type,
        NodeType=node_type,
#         NumberOfNodes=int(number_nodes),       # This is required if multi-node is specified
        MasterUsername=master_user_name,
        MasterUserPassword=master_user_pw,
        IamRoles=[iam_role_redshift_arn],
        VpcSecurityGroupIds=[security_group_id],
        Port=5439,
        PubliclyAccessible=False
)

print(response)

{'Cluster': {'ClusterIdentifier': 'bankdemo', 'NodeType': 'dc2.large', 'ClusterStatus': 'creating', 'ClusterAvailabilityStatus': 'Modifying', 'MasterUsername': 'bankdemo', 'DBName': 'bankdemo', 'AutomatedSnapshotRetentionPeriod': 1, 'ManualSnapshotRetentionPeriod': -1, 'ClusterSecurityGroups': [], 'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-044c6f07030b3be91', 'Status': 'active'}], 'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0', 'ParameterApplyStatus': 'in-sync'}], 'ClusterSubnetGroupName': 'default', 'VpcId': 'vpc-0251027422c93d63d', 'PreferredMaintenanceWindow': 'sun:19:00-sun:19:30', 'PendingModifiedValues': {'MasterUserPassword': '****'}, 'ClusterVersion': '1.0', 'AllowVersionUpgrade': True, 'NumberOfNodes': 1, 'PubliclyAccessible': False, 'Encrypted': False, 'Tags': [], 'EnhancedVpcRouting': False, 'IamRoles': [{'IamRoleArn': 'arn:aws:iam::138604873012:role/BankDemo', 'ApplyStatus': 'adding'}], 'MaintenanceTrackName': 'current', 'DeferredMaintenanceW

### Please Wait for Cluster Status to change to `Available`

In [18]:
response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
cluster_status = response['Clusters'][0]['ClusterStatus']
print(cluster_status)

while cluster_status != 'available':
    time.sleep(10)
    response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
    cluster_status = response['Clusters'][0]['ClusterStatus']
    print(cluster_status)

creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
creating
available


In [19]:
response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
print(response)

{'Clusters': [{'ClusterIdentifier': 'bankdemo', 'NodeType': 'dc2.large', 'ClusterStatus': 'available', 'ClusterAvailabilityStatus': 'Unavailable', 'MasterUsername': 'bankdemo', 'DBName': 'bankdemo', 'Endpoint': {'Address': 'bankdemo.cszyoc0ofzdt.ap-southeast-1.redshift.amazonaws.com', 'Port': 5439}, 'ClusterCreateTime': datetime.datetime(2021, 9, 13, 12, 55, 1, 781000, tzinfo=tzlocal()), 'AutomatedSnapshotRetentionPeriod': 1, 'ManualSnapshotRetentionPeriod': -1, 'ClusterSecurityGroups': [], 'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-044c6f07030b3be91', 'Status': 'active'}], 'ClusterParameterGroups': [{'ParameterGroupName': 'default.redshift-1.0', 'ParameterApplyStatus': 'in-sync'}], 'ClusterSubnetGroupName': 'default', 'VpcId': 'vpc-0251027422c93d63d', 'AvailabilityZone': 'ap-southeast-1c', 'PreferredMaintenanceWindow': 'sun:19:00-sun:19:30', 'PendingModifiedValues': {}, 'ClusterVersion': '1.0', 'AllowVersionUpgrade': True, 'NumberOfNodes': 1, 'PubliclyAccessible': False, 'Encryp

In [20]:
host = response['Clusters'][0]['Endpoint']['Address']
port = response['Clusters'][0]['Endpoint']['Port']

## Create Secret in Secrets Manager

Add the RedShift, Athena and Glue information to the secret. 

In [29]:
secretsmanager = boto3.client('secretsmanager')
secretstring = f'"username":"{master_user_name}","password":"{master_user_pw}","engine":"redshift", \
"host":"{host}","port": "{port}","dbClusterIdentifier":"{redshift_cluster_identifier}", "db":"{database_name}", \
"database_name_redshift":"{database_name_redshift}","database_name_athena": "{database_name_athena}", \
"schema_redshift":"{schema_redshift}", "schema_athena":"{schema_athena}", \
"table_name_glue":"{table_name_glue}", "table_name_redshift":"{table_name_redshift}"'

# secretstring 

In [25]:
try:
    response = secretsmanager.create_secret(
        Name=secret_name,
        Description='BankDemo Redshift Login',
        SecretString= '{' + secretstring + '}',
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceExistsException':
        print("Secret already exists. This is ok.")
    else:
        print("Unexpected error: %s" % e)