Create S3 Bucket

In [3]:
import boto3

s3_client = boto3.client('s3', 'us-east-1')
s3_client.create_bucket(Bucket='bucket-capstone-project')

{'ResponseMetadata': {'RequestId': 'WXMVNNPYJ3J13T19',
  'HostId': 'a1FmliAvblDWOE7DQQjwiT1APaqpBWL14N7XdwjNFDyvb1fIcU/zmwbQAPom8HIARR8Y0U5b6GQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'a1FmliAvblDWOE7DQQjwiT1APaqpBWL14N7XdwjNFDyvb1fIcU/zmwbQAPom8HIARR8Y0U5b6GQ=',
   'x-amz-request-id': 'WXMVNNPYJ3J13T19',
   'date': 'Thu, 11 Apr 2024 18:06:11 GMT',
   'location': '/bucket-capstone-project',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/bucket-capstone-project'}

Create table Definition

In [8]:
table_name = 'capstone-metadata'
attributes = [
    {
        'AttributeName' : 'id',
        'AttributeType' : 'S'
    },
    {
        'AttributeName' : 'filetype',
        'AttributeType' : 'S'
    },
    {
        'AttributeName' : 'size',
        'AttributeType' : 'N'
    }
]

key_schema = [
    {
        'AttributeName' : 'id',
        'KeyType' : 'HASH'
    },
    {
        'AttributeName' : 'size',
        'KeyType' : 'RANGE'
    }
]

provisioned_throughput = {
    'ReadCapacityUnits' : 5,
    'WriteCapacityUnits' : 5
}


Create Dynamo Database

In [15]:
dynamo_client = boto3.client('dynamodb','us-east-1')

response = dynamo_client.create_table(
    TableName = table_name,
    AttributeDefinitions = attributes,
    KeySchema = key_schema,
    ProvisionedThroughput = provisioned_throughput,
    GlobalSecondaryIndexes = [
        {
            'IndexName' : 'idx1',
            'KeySchema' : [
                {
                    'AttributeName' : 'filetype',
                    'KeyType' : 'HASH'
                }
            ],
            'Projection' : {'ProjectionType' : 'ALL'},
            'ProvisionedThroughput' : {
                'ReadCapacityUnits' : 5,
                'WriteCapacityUnits' : 5
            }
        }
    ]
)

Upload Media function

In [16]:
from pathlib import Path

def upload_to_s3(local_path, bucket_name):
    key = Path(local_path).name
    s3_client.upload_file(Filename=local_path,Bucket=bucket_name,Key=key)

    return f"{bucket_name}/{key}"

In [19]:
dynamodb = boto3.client('dynamodb','us-east-1')

### Extract metadata from the file ###
def extract_metadata(event):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    size = event['Records'][0]['s3']['object']['size']
    file_type = Path(key).suffix[1:]
    if not file_type:
        file_type = "None"
    return bucket, key, file_type, size

### Add metadata to database. Use file identifier as id ###
def add_to_database(bucket, key, file_type, size):
    id = f"{bucket}/{key}"
    response = dynamodb.put_item(
        TableName='MediaMetadata',  # Change to your table
        Item={
            'id': {'S': id},
            'filetype': {'S': file_type},
            'size': {'N': str(size)}
        }
    )
    print(f"Data added to DynamoDB: {response}")

### Lambda handler routine ###
def lambda_handler(event, context):
    # Extract bucket and file key from S3 Event
    bucket, key, file_type, size = extract_metadata(event)
    print(file_type, size / 1024)
    add_to_database(bucket, key, file_type, size / 1024)

In [22]:
import json 

iam_client = boto3.client('iam', region_name="us-east-1")

lambda_execution_policy = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": "arn:aws:logs:*:*:*"
        },
        {
            "Effect": "Allow",
            "Action": [  
                "s3:GetObject"
            ],
            "Resource": "arn:aws:s3:::*/*"
        },
        {
            "Effect": "Allow",
            "Action": [
                "dynamodb:PutItem"  
            ],
            "Resource": "arn:aws:dynamodb:*:*:table/capstone-metadata" 
        }
    ]
}


role_name = 'LambdaMetaDataTrigger'
role_description = 'Role for Trigger Lambda'
role_response = iam_client.create_role(
    RoleName=role_name,
    Description=role_description,
    AssumeRolePolicyDocument=json.dumps({
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "lambda.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            }
        ]
    })
)

policy_name = 'LambdaTriggerPolicy'
iam_client.put_role_policy(
    RoleName=role_name,
    PolicyName=policy_name,
    PolicyDocument=json.dumps(lambda_execution_policy)
)

role_arn = role_response['Role']['Arn']

In [23]:
role_arn

'arn:aws:iam::767398018810:role/LambdaMetaDataTrigger'

In [24]:
with open("lambda.py", "r") as f:
    function_code = f.read()

In [26]:
print(function_code)

from pathlib import Path
import boto3

dynamodb = boto3.client('dynamodb')

### Extract metadata from the file ###
def extract_metadata(event):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    size = event['Records'][0]['s3']['object']['size']
    file_type = Path(key).suffix[1:]
    if not file_type:
        file_type = "None"
    return bucket, key, file_type, size

### Add metadata to database. Use file identifier as id ###
def add_to_database(bucket, key, file_type, size):
    id = f"{bucket}/{key}"
    response = dynamodb.put_item(
        TableName='MediaMetadata',  # Change to your table
        Item={
            'id': {'S': id},
            'filetype': {'S': file_type},
            'size': {'N': str(size)}
        }
    )
    print(f"Data added to DynamoDB: {response}")

### Lambda handler routine ###
def lambda_handler(event, context):
    # Extract bucket and file key from S3 Event
    bucket, key, file_type, s

In [27]:
function_name = "metadata"

import io
import zipfile

lambda_client = boto3.client('lambda', region_name='us-east-1')

with io.BytesIO() as deployment_package:
    with zipfile.ZipFile(deployment_package, 'w') as zipf:
        zipf.writestr('lambda_function.py', function_code)

    create_function_response = lambda_client.create_function(
       FunctionName=function_name,
       Runtime="python3.8",
       Role=role_arn,
       Handler="lambda_function.lambda_handler",
       Code={
           'ZipFile': deployment_package.getvalue()
       }
    )


In [28]:
bucket_arn = "arn:aws:s3:::bucket-capstone-project"  # Change media-library-bucket-capstone to your bucket name
lambda_client.add_permission(
     FunctionName=function_name,
     StatementId='metadata_trigger',  # Unique statement ID
     Action='lambda:InvokeFunction',  # Allow to invoke the function
     Principal='s3.amazonaws.com',  # 
     SourceArn=bucket_arn,
 )


{'ResponseMetadata': {'RequestId': 'ce14c1b8-1304-421c-8a36-b6aa12e0dd2a',
  'HTTPStatusCode': 201,
  'HTTPHeaders': {'date': 'Fri, 12 Apr 2024 18:50:21 GMT',
   'content-type': 'application/json',
   'content-length': '315',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'ce14c1b8-1304-421c-8a36-b6aa12e0dd2a'},
  'RetryAttempts': 0},
 'Statement': '{"Sid":"metadata_trigger","Effect":"Allow","Principal":{"Service":"s3.amazonaws.com"},"Action":"lambda:InvokeFunction","Resource":"arn:aws:lambda:us-east-1:767398018810:function:metadata","Condition":{"ArnLike":{"AWS:SourceArn":"arn:aws:s3:::bucket-capstone-project"}}}'}

In [29]:
event_configuration = {
    'LambdaFunctionConfigurations': [
        {
            'LambdaFunctionArn': create_function_response["FunctionArn"],
            'Events': ['s3:ObjectCreated:*'],
        }
    ]
}

# Configure the S3 event trigger
s3_client.put_bucket_notification_configuration(
    Bucket="bucket-capstone-project",
    NotificationConfiguration=event_configuration
)

{'ResponseMetadata': {'RequestId': 'E0F3WYKWZ5CS2SQS',
  'HostId': 'ew/PH/pwfrnmvS1ckmJtZ++ilPvtqw1qz9LhIX71boT+6/EvMtuRVhOviqJ9O8L3ewcgEti3i1Y=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'ew/PH/pwfrnmvS1ckmJtZ++ilPvtqw1qz9LhIX71boT+6/EvMtuRVhOviqJ9O8L3ewcgEti3i1Y=',
   'x-amz-request-id': 'E0F3WYKWZ5CS2SQS',
   'date': 'Fri, 12 Apr 2024 18:52:49 GMT',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0}}