# This notebook is used to create the relevant IAM roles, policy, RedShift cluster and secret in Secret Manager.

**Note:** Please set kernel to `Python 3 (Data Science)

### Variables
Variable names for secret, RedShift, Athena and Glue.

In [None]:
secret_name='bankdemo_redshift_login' 

# Random function to generate password.
import random
import string
def random_char(y):
       return ''.join(random.choice(string.ascii_letters) for x in range(y))
    
# The variables below are only required for notebook 01
# The RedShift, Athena and Glue information are stored in Secrets Manager
subnet_name = 'Private subnet' # Change this is the private subnet name is different

database_name_redshift = 'bankdemo'
database_name_glue = 'bankdemo'

schema_redshift = 'dm'
schema_athena = 'athena' # have to be athena

table_name_glue = 'bankdemo_glue'
table_name_redshift = 'data'


# Redshift configuration parameters
redshift_cluster_identifier = 'bankdemo'
database_name = 'bankdemo'
cluster_type = 'single-node' # or multi-node

master_user_name = 'bankdemo'
master_user_pw = random_char(16) + '1' # the password requires a number

# Note that only some Instance Types support Redshift Query Editor 
# (https://docs.aws.amazon.com/redshift/latest/mgmt/query-editor.html)
node_type = 'dc2.large'
# number_nodes = '1' # for multi-node. Also uncomment this line below: NumberOfNodes=int(number_nodes),

# Set the security group ID if not using the default one
# Also comment the code below to get the security group ID
# security_group_id = ''


### Import the necessary libraries and create client session


In [None]:
import json
import boto3
from botocore.exceptions import ClientError
from botocore.config import Config
import time
import sagemaker

iam = boto3.client('iam')
sts = boto3.client('sts')
accountID = sts.get_caller_identity()["Account"]  
redshift = boto3.client('redshift')
sm = boto3.client('sagemaker')
ec2 = boto3.client('ec2')
secretsmanager = boto3.client('secretsmanager')

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()


### Adding permissions to SageMaker Execution role

In [None]:
role_name = role.split("/")[-1]

print("Role name: {}".format(role_name))

In [None]:
setup_iam_roles_passed = False
admin = False
post_policies = iam.list_attached_role_policies(RoleName=role_name)["AttachedPolicies"]
for post_policy in post_policies:
    if post_policy["PolicyName"] == "AdministratorAccess":
        admin = True
        break

setup_iam_roles_passed = True
print("[OK] You are all set up to continue with this workshop!")

In [None]:
if not admin:
    pre_policies = iam.list_attached_role_policies(RoleName=role_name)["AttachedPolicies"]

    required_policies = ["IAMFullAccess"]

    for pre_policy in pre_policies:
        for role_req in required_policies:
            if pre_policy["PolicyName"] == role_req:
                print("Attached: {}".format(pre_policy["PolicyName"]))
                try:
                    required_policies.remove(pre_policy["PolicyName"])
                except:
                    pass

    if len(required_policies) > 0:
        print(
            "*************** [ERROR] You need to attach the following policies in order to continue with this workshop *****************\n"
        )
        for required_policy in required_policies:
            print("Not Attached: {}".format(required_policy))
    else:
        print("[OK] You are all set to continue with this notebook!")
else:
    print("[OK] You are all set to continue with this notebook!")

#### Create a function to add policy to the role

In [None]:
def addPolicy(policy, role_name):
    try:
        response = iam.attach_role_policy(PolicyArn="arn:aws:iam::aws:policy/{}".format(policy), RoleName=role_name)
        print("Policy {} has been succesfully attached to role: {}".format(policy, role_name))
    except ClientError as e:
        if e.response["Error"]["Code"] == "EntityAlreadyExists":
            print("[OK] Policy is already attached.")
        elif e.response["Error"]["Code"] == "LimitExceeded":
            print("[OK]")
        else:
            print("*************** [ERROR] {} *****************".format(e))


#### Add the following policies to the role.

In [None]:
addPolicy("AmazonRedshiftFullAccess", role_name)
addPolicy("SecretsManagerReadWrite", role_name)
addPolicy("AmazonAthenaFullAccess", role_name)


### Add permissions to BankDemo role
#### Create AssumeRolePolicyDocument

In [None]:
role = f"arn:aws:iam::{accountID}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"
assume_role_policy_doc = {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": role,
        "Service": ["sagemaker.amazonaws.com", "redshift.amazonaws.com"]
      },
      "Action": "sts:AssumeRole"
    }
  ]
}

assume_role_policy_doc

#### Create Role

In [None]:
iam_redshift_role_name = 'BankDemo'

In [None]:
try:
    iam_role_redshift = iam.create_role(
        RoleName=iam_redshift_role_name,
        AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
        Description='Bank Redshift Role'
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Role already exists")
    else:
        print("Unexpected error: %s" % e)

#### Get the Role ARN

In [None]:
role = iam.get_role(RoleName=iam_redshift_role_name)
iam_role_redshift_arn = role['Role']['Arn']
print(iam_role_redshift_arn)

### Create Self-Managed Policies

#### Define Policies
#### arn:aws:iam::aws:policy/AmazonS3FullAccess

In [None]:
my_redshift_to_s3 = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": "s3:*",
            "Resource": "*"
        }
    ]
}

#### arn:aws:iam::aws:policy/AmazonAthenaFullAccess

In [None]:
my_redshift_to_athena = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "athena:*"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "glue:CreateDatabase",
                "glue:DeleteDatabase",
                "glue:GetDatabase",
                "glue:GetDatabases",
                "glue:UpdateDatabase",
                "glue:CreateTable",
                "glue:DeleteTable",
                "glue:BatchDeleteTable",
                "glue:UpdateTable",
                "glue:GetTable",
                "glue:GetTables",
                "glue:BatchCreatePartition",
                "glue:CreatePartition",
                "glue:DeletePartition",
                "glue:BatchDeletePartition",
                "glue:UpdatePartition",
                "glue:GetPartition",
                "glue:GetPartitions",
                "glue:BatchGetPartition"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetBucketLocation",
                "s3:GetObject",
                "s3:ListBucket",
                "s3:ListBucketMultipartUploads",
                "s3:ListMultipartUploadParts",
                "s3:AbortMultipartUpload",
                "s3:CreateBucket",
                "s3:PutObject"
            ],
            "Resource": [
                "arn:aws:s3:::aws-athena-query-results-*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::athena-examples*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:ListBucket",
                "s3:GetBucketLocation",
                "s3:ListAllMyBuckets"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "sns:ListTopics",
                "sns:GetTopicAttributes"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "cloudwatch:PutMetricAlarm",
                "cloudwatch:DescribeAlarms",
                "cloudwatch:DeleteAlarms"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "lakeformation:GetDataAccess"
            ],
            "Resource": [
                "*"
            ]
        }
    ]
}

#### Create Policy Objects

In [None]:
policyName = 'BankDemo_RedshiftPolicyToS3'
try:
    policy_redshift_s3 = iam.create_policy(
      PolicyName=policyName,
      PolicyDocument=json.dumps(my_redshift_to_s3)
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy already exists")
    else:
        print("Unexpected error: %s" % e)

# Get ARN
account_id = sts.get_caller_identity()['Account']
policy_redshift_s3_arn = f'arn:aws:iam::{account_id}:policy/{policyName}'
print(policy_redshift_s3_arn)

In [None]:
policyName = 'BankDemo_RedshiftPolicyToAthena'
try:
    policy_redshift_athena = iam.create_policy(
      PolicyName=policyName,
      PolicyDocument=json.dumps(my_redshift_to_athena)
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'EntityAlreadyExists':
        print("Policy already exists")
    else:
        print("Unexpected error: %s" % e)

# Get ARN
account_id = sts.get_caller_identity()['Account']
policy_redshift_athena_arn = f'arn:aws:iam::{account_id}:policy/{policyName}'
print(policy_redshift_athena_arn)


#### Create function to add policy to RedShift role

In [None]:
def addCustomPolicy(policyArn, role_name):
    try:
        response = iam.attach_role_policy(
            PolicyArn=policyArn,
            RoleName=role_name
        )
    except ClientError as e:
        if e.response['Error']['Code'] == 'EntityAlreadyExists':
            print("Policy is already attached. This is ok.")
        else:
            print("Unexpected error: %s" % e)

#### Attach custom policy to role

In [None]:
addCustomPolicy(policy_redshift_athena_arn, iam_redshift_role_name)
addCustomPolicy(policy_redshift_s3_arn, iam_redshift_role_name)

        

#### Attach AWS built-in policy to role


In [None]:
addPolicy("SecretsManagerReadWrite", iam_redshift_role_name)
addPolicy("AmazonRedshiftFullAccess", iam_redshift_role_name)
addPolicy("AmazonSageMakerFullAccess", iam_redshift_role_name)


### Get Security Group ID 

* Make sure the Redshift VPC is the same this notebook is running within
* Make sure the VPC has the following 2 properties enabled
 *     DNS resolution = Enabled
 *     DNS hostnames = Enabled
* This allows private, internal access to Redshift from this SageMaker notebook using the fully qualified endpoint name.

In [None]:
try:
    domain_id = sm.list_domains()['Domains'][0]['DomainId'] #['NotebookInstances'][0]['NotebookInstanceName']
    describe_domain_response = sm.describe_domain(DomainId=domain_id)
    vpc_id = describe_domain_response['VpcId']
    security_groups = ec2.describe_security_groups(Filters=[{"Name": "vpc-id", "Values": [vpc_id]}])['SecurityGroups']
    security_group_id = ''

    for sg in security_groups:
        if(sg['GroupName'] == 'default'):
            security_group_id = sg['GroupId']

    print(security_group_id)    
except:
    pass


### Create Redshift Cluster

Get the subnet ID for the private subnet. 

In [None]:
sn_all = ec2.describe_subnets(Filters=[{"Name": "vpc-id", "Values": [vpc_id]}])
subnetId = ''
for sn in sn_all['Subnets'] :
    if(sn['Tags'][0]['Value'] == subnet_name):
       subnetId = sn['SubnetId']
subnetId

Create the RedShift subnet group and create the RedShift cluster.

In [None]:
try:
    response = redshift.create_cluster_subnet_group(
        ClusterSubnetGroupName='bankdemo-subnet',
        Description='string',
        SubnetIds=[
            subnetId,
        ]
    )
    
except ClientError as e:
    if e.response['Error']['Code'] == 'ClusterSubnetGroupAlreadyExists':
        print("Cluster subnet group already exists. This is ok.")
    else:
        print("Unexpected error: %s" % e)

In [None]:
try:
    response = redshift.create_cluster(
            DBName=database_name,
            ClusterIdentifier=redshift_cluster_identifier,
            ClusterType=cluster_type,
            NodeType=node_type,
    #         NumberOfNodes=int(number_nodes),       # This is required if multi-node is specified
            ClusterSubnetGroupName='bankdemo-subnet',
            MasterUsername=master_user_name,
            MasterUserPassword=master_user_pw,
            IamRoles=[iam_role_redshift_arn],
            VpcSecurityGroupIds=[security_group_id],
            Port=5439,
            PubliclyAccessible=False
    )
    
except ClientError as e:
    if e.response['Error']['Code'] == 'ClusterAlreadyExists':
        print("Cluster already exists. This is ok.")
    else:
        print("Unexpected error: %s" % e)

#### Please Wait for Cluster Status to change to `Available`

In [None]:
response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
cluster_status = response['Clusters'][0]['ClusterStatus']
print(cluster_status)

while cluster_status != 'available':
    time.sleep(10)
    response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
    cluster_status = response['Clusters'][0]['ClusterStatus']
    print(cluster_status)

In [None]:
response = redshift.describe_clusters(ClusterIdentifier=redshift_cluster_identifier)
host = response['Clusters'][0]['Endpoint']['Address']
port = response['Clusters'][0]['Endpoint']['Port']
print(host)

### Create Secret in Secrets Manager

Add RedShift, Athena and Glue information to the secret. 

Note: If the secret already exists and you are creating the RedShift cluster again, the secret will not be updated to the new password. Please update the password manually in Secrets Manager.
This is to prevent accidential update to the secret.

In [None]:
secretstring = f'"username":"{master_user_name}","password":"{master_user_pw}","engine":"redshift", \
"host":"{host}","port": "{port}","dbClusterIdentifier":"{redshift_cluster_identifier}", "db":"{database_name}", \
"database_name_redshift":"{database_name_redshift}","database_name_glue": "{database_name_glue}", \
"schema_redshift":"{schema_redshift}", "schema_athena":"{schema_athena}", \
"table_name_glue":"{table_name_glue}", "table_name_redshift":"{table_name_redshift}"'

secretstring 

In [None]:
try:
    response = secretsmanager.create_secret(
        Name=secret_name,
        Description='BankDemo Redshift Login',
        SecretString= '{' + secretstring + '}',
    )
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceExistsException':
        print("Secret already exists. If you are recreating the RedShift cluster, please update the password manually ")
    else:
        print("Unexpected error: %s" % e)