In [1]:
import boto3
import mysql.connector

## Initialize MySQL database 

In [2]:
# Create RDS instance
rds = boto3.client('rds')

try:
    response = rds.create_db_instance(
        DBInstanceIdentifier='autobiography-db',
        DBName='autobiography',
        MasterUsername='username',
        MasterUserPassword='password',
        DBInstanceClass='db.t3.micro',
        Engine='MySQL',
        AllocatedStorage=5
    )

    # Wait until DB is available to continue
    rds.get_waiter('db_instance_available').wait(DBInstanceIdentifier='autobiography-db')

except rds.exceptions.DBInstanceAlreadyExistsFault:
    print("Database instance already exists.")

In [3]:
# Describe where DB is available and on what port
try:
    db = rds.describe_db_instances(DBInstanceIdentifier='autobiography-db')['DBInstances'][0]
    ENDPOINT = db['Endpoint']['Address']
    PORT = db['Endpoint']['Port']
    DBID = db['DBInstanceIdentifier']

    print(DBID, "is available at", ENDPOINT, "on Port", PORT)
except Exception as e:
    print("Error retrieving database information:", e)

autobiography-db is available at autobiography-db.cvb2z3vt9fut.us-east-1.rds.amazonaws.com on Port 3306


In [4]:
# Get Name of Security Group
SGNAME = db['VpcSecurityGroups'][0]['VpcSecurityGroupId']

# Adjust Permissions for that security group so that we can access it on Port 3306
# If already SG is already adjusted, print this out
try:
    ec2 = boto3.client('ec2')
    data = ec2.authorize_security_group_ingress(
            GroupId=SGNAME,
            IpPermissions=[
                {'IpProtocol': 'tcp',
                 'FromPort': PORT,
                 'ToPort': PORT,
                 'IpRanges': [{'CidrIp': '0.0.0.0/0'}]}
            ]
    )
except ec2.exceptions.ClientError as e:
    if e.response["Error"]["Code"] == 'InvalidPermission.Duplicate':
        print("Permissions already adjusted.")
    else:
        print(e)

Permissions already adjusted.


In [5]:
import mysql.connector

# Database connection configuration
config = {
    'user': 'username',
    'password': 'password',
    'host': 'autobiography-db.cvb2z3vt9fut.us-east-1.rds.amazonaws.com',
    'database': 'autobiography',
    'raise_on_warnings': True
}

try:
    # Connect to the database
    conn = mysql.connector.connect(**config)
    cursor = conn.cursor()

    # SQL to create the 'books' and 'persons' tables without foreign keys
    create_books_table = """
    CREATE TABLE books (
        book_title VARCHAR(255) PRIMARY KEY,
        Extroversion FLOAT,
        Neuroticism FLOAT,
        Agreeableness FLOAT,
        Conscientiousness FLOAT,
        Openness FLOAT,
        person_name VARCHAR(255) NULL  
    );
    """
    create_persons_table = """
    CREATE TABLE persons (
        person_name VARCHAR(255) PRIMARY KEY,
        Extroversion FLOAT,
        Neuroticism FLOAT,
        Agreeableness FLOAT,
        Conscientiousness FLOAT,
        Openness FLOAT,
        book_title VARCHAR(255) NULL 
    );
    """

    # Execute SQL commands to create tables
    cursor.execute(create_books_table)
    cursor.execute(create_persons_table)

    # Commit changes
    conn.commit()
    print("Tables created successfully.")

except mysql.connector.Error as err:
    print("Error occurred:", err)

finally:
    if conn.is_connected():
        cursor.close()
        conn.close()
        print("Connection closed.")

Tables created successfully.
Connection closed.


# Initialize S3 to store the raw data

In [6]:
# create s3 bucket
bucket_name = 'autobiography-raw-data'
region = 'us-east-1'
s3 = boto3.client('s3', region_name=region if region else 'us-east-1')
try:
    if region is None or region == 'us-east-1':
        s3.create_bucket(Bucket=bucket_name)
    else:
        s3.create_bucket(
            Bucket=bucket_name,
            CreateBucketConfiguration={'LocationConstraint': region}
        )
    print(f"Bucket {bucket_name} created successfully.")
except s3.exceptions.BucketAlreadyExists as e:
    print(f"Bucket {bucket_name} already exists.")
except s3.exceptions.BucketAlreadyOwnedByYou as e:
    print(f"Bucket {bucket_name} already owned by you.")
except Exception as e:
    print(f"An error occurred: {e}")

Bucket autobiography-raw-data created successfully.


# Create a step function 

In [7]:
import boto3
import json

def make_def(lambda_arn):
    definition = {
      "Comment": "Q2 State Machine",
      "StartAt": "Map",
      "States": {
        "Map": {
          "Type": "Map",
          "End": True,
          "MaxConcurrency": 10,
          "Iterator": {
            "StartAt": "Lambda Invoke",
            "States": {
              "Lambda Invoke": {
                "Type": "Task",
                "Resource": "arn:aws:states:::lambda:invoke",
                "OutputPath": "$.Payload",
                "Parameters": {
                  "Payload.$": "$",
                  "FunctionName": lambda_arn
                },
                "Retry": [
                  {
                    "ErrorEquals": [
                      "Lambda.ServiceException",
                      "Lambda.AWSLambdaException",
                      "Lambda.SdkClientException",
                      "Lambda.TooManyRequestsException",
                      "States.TaskFailed",
                      "Lambda.Unknown"                      
                    ],
                    "IntervalSeconds": 2,
                    "MaxAttempts": 6,
                    "BackoffRate": 2
                  }
                ],
                "End": True
              }
            }
          }
        }
      }
    }
    return definition

if __name__ == '__main__':
    iam = boto3.client('iam')
    sfn = boto3.client('stepfunctions')
    aws_lambda = boto3.client('lambda')
    role = iam.get_role(RoleName='LabRole')

    lambda_function_name = 'backend_personality'

    # Get Lambda Function ARN and Role ARN
    # Assumes Lambda function already exists
    lambda_arn = [f['FunctionArn']
                  for f in aws_lambda.list_functions()['Functions']
                  if f['FunctionName'] == lambda_function_name][0]
    
    # Throttle concurrent executions to 10
    response = aws_lambda.put_function_concurrency(
            FunctionName=lambda_function_name,
            ReservedConcurrentExecutions=10
        )

    sfn_function_name = "personalities-state-machine"

    # Use Lambda ARN to create State Machine Definition
    sf_def = make_def(lambda_arn)

    # Create Step Function State Machine if doesn't already exist
    try:
        response = sfn.create_state_machine(
            name=sfn_function_name,
            definition=json.dumps(sf_def),
            roleArn=role['Role']['Arn'],
            type='EXPRESS'
        )
    except sfn.exceptions.StateMachineAlreadyExists:
        response = sfn.list_state_machines()
        state_machine_arn = [sm['stateMachineArn'] 
                            for sm in response['stateMachines'] 
                            if sm['name'] == sfn_function_name][0]
        response = sfn.update_state_machine(
            stateMachineArn=state_machine_arn,
            definition=json.dumps(sf_def),
            roleArn=role['Role']['Arn']
        )

# Delete the bucket and RDS instance

In [5]:
# close the connection
conn.close()

NameError: name 'conn' is not defined

In [7]:
# close instance
rds.delete_db_instance(DBInstanceIdentifier='autobiography-db', SkipFinalSnapshot=True)

{'DBInstance': {'DBInstanceIdentifier': 'autobiography-db',
  'DBInstanceClass': 'db.t3.micro',
  'Engine': 'mysql',
  'DBInstanceStatus': 'deleting',
  'MasterUsername': 'username',
  'DBName': 'autobiography',
  'Endpoint': {'Address': 'autobiography-db.cvb2z3vt9fut.us-east-1.rds.amazonaws.com',
   'Port': 3306,
   'HostedZoneId': 'Z2R2ITUGPM61AM'},
  'AllocatedStorage': 5,
  'InstanceCreateTime': datetime.datetime(2024, 5, 21, 19, 41, 34, 728000, tzinfo=tzutc()),
  'PreferredBackupWindow': '08:31-09:01',
  'BackupRetentionPeriod': 1,
  'DBSecurityGroups': [],
  'VpcSecurityGroups': [{'VpcSecurityGroupId': 'sg-00b93c009a63e6ff0',
    'Status': 'active'}],
  'DBParameterGroups': [{'DBParameterGroupName': 'default.mysql8.0',
    'ParameterApplyStatus': 'in-sync'}],
  'AvailabilityZone': 'us-east-1c',
  'DBSubnetGroup': {'DBSubnetGroupName': 'default',
   'DBSubnetGroupDescription': 'default',
   'VpcId': 'vpc-068ca4cb197f54d96',
   'SubnetGroupStatus': 'Complete',
   'Subnets': [{'Subn

In [8]:
import boto3
from botocore.exceptions import ClientError

bucket_name = 'autobiography-raw-data'
region = 'us-east-1'
s3 = boto3.client('s3', region_name=region if region else 'us-east-1')

def empty_bucket(bucket_name):
    try:
        # List all objects in the bucket
        response = s3.list_objects_v2(Bucket=bucket_name)
        while 'Contents' in response:
            for obj in response['Contents']:
                s3.delete_object(Bucket=bucket_name, Key=obj['Key'])
            response = s3.list_objects_v2(Bucket=bucket_name)

        # If the bucket has versioning enabled, delete all versions
        response = s3.list_object_versions(Bucket=bucket_name)
        if 'Versions' in response:
            for version in response['Versions']:
                s3.delete_object(Bucket=bucket_name, Key=version['Key'], VersionId=version['VersionId'])
        if 'DeleteMarkers' in response:
            for marker in response['DeleteMarkers']:
                s3.delete_object(Bucket=bucket_name, Key=marker['Key'], VersionId=marker['VersionId'])
        
        print(f"All objects deleted from bucket {bucket_name}.")
    except ClientError as e:
        print(f"Error emptying bucket {bucket_name}: {e}")

def delete_bucket(bucket_name):
    try:
        s3.delete_bucket(Bucket=bucket_name)
        print(f"Bucket {bucket_name} deleted successfully.")
    except ClientError as e:
        print(f"Error deleting bucket {bucket_name}: {e}")

try:
    # Empty the bucket
    empty_bucket(bucket_name)
    # Delete the bucket
    delete_bucket(bucket_name)
except Exception as e:
    print(f"An error occurred: {e}")


All objects deleted from bucket autobiography-raw-data.
Bucket autobiography-raw-data deleted successfully.
