In [1]:
import boto3
from botocore.exceptions import ClientError
import json
dynamo = boto3.client("dynamodb")

In [2]:
import json
with open("body.json", "r") as f:
    event_body = json.load(f)
    print(event_body)  

{'version': '0', 'id': '48b21f9e-27fc-f7e4-f24b-20f4bd8ef0ac', 'detail-type': 'Object Created', 'source': 'aws.s3', 'account': '010928219150', 'time': '2024-09-05T20:46:10Z', 'region': 'us-east-1', 'resources': ['arn:aws:s3:::raw-datalake-iceberg-2f88fdbce7e3'], 'detail': {'version': '0', 'bucket': {'name': 'raw-datalake-iceberg-2f88fdbce7e3'}, 'object': {'key': 'icebergdatalake/sales/shop/files/House price.parquet', 'size': 9251, 'etag': '1bbebf3d8830aebb7e23df098cdfaa30', 'sequencer': '0066DA1892240E109C'}, 'request-id': 'NB2SZ8NA4AMQPP46', 'requester': '010928219150', 'source-ip-address': '167.60.154.252', 'reason': 'PutObject'}}


In [3]:
# Create dynamo table
def create_dynamo_table(table_name, key_schema, attribute_definitions, provisioned_throughput):
    dynamodb = boto3.resource('dynamodb')
    try:
        table = dynamodb.create_table(
            TableName=table_name,
            KeySchema=key_schema,
            AttributeDefinitions=attribute_definitions,
            ProvisionedThroughput=provisioned_throughput
        )
    except ClientError as e:
        print(e.response['Error']['Message'])
    else:
        return table

In [4]:
dynamo_table_name = "test_ice_new"
key_schema = [
    {
        'AttributeName': 'id',
        'KeyType': 'HASH'  # Partition key
    }
]
attribute_definitions = [
    {
        'AttributeName': 'id',
        'AttributeType': 'S'
    }
]
provisioned_throughput = {
    'ReadCapacityUnits': 5,
    'WriteCapacityUnits': 5
}
create_dynamo_table(dynamo_table_name, key_schema, attribute_definitions, provisioned_throughput)

Table already exists: test_ice_new


In [9]:
def put_item(table_name, item):
    dynamodb = boto3.client('dynamodb')
    try:
        response = dynamodb.put_item(
            TableName=table_name,
            Item=item
        )
    except Exception as e:
        print(e)
    else:
        return response

In [10]:
status = "PENDING"
file_key = event_body['detail']['object']['key']
database = file_key.split("/")[-4]
table_name = file_key.split("/")[-3]
file_name = file_key.split("/")[-1]
etag = event_body['detail']['object']['etag']
file_size = event_body['detail']['object']['size']
f_time = event_body['time']
event_id = event_body['id']

In [15]:
item = {
    'id': {"S": etag},
    'status': {"S": status},
    'file_key': {"S": file_key},
    'file_size': {"N": str(file_size)},
    'event_time': {"S": f_time},
    'event_id': {"S": event_id},
    'database': {"S": database},
    'table': {"S": table_name},
    'file_name': {"S": file_name},
    'GSI-PK': {"S": status},
    'GSI-SK': {"S": f"{database}#{table_name}#{etag}"},
}

put_item(dynamo_table_name, item)

{'ResponseMetadata': {'RequestId': '2O8OU2FP89A2O38P20PU66PDHBVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Fri, 20 Sep 2024 17:38:55 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': '2O8OU2FP89A2O38P20PU66PDHBVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [16]:
# Update item in DynamoDB table
def update_item(table_name, key, update_expression, expression_attribute_names, expression_attribute_values):
    dynamodb = boto3.client('dynamodb')
    try:
        response = dynamodb.update_item(
            TableName=table_name,
            ExpressionAttributeNames=expression_attribute_names,
            Key=key,
            UpdateExpression=update_expression,
            ExpressionAttributeValues=expression_attribute_values
        )
        return response
    except ClientError as e:
        print(e.response['Error']['Message'])
    else:
        return response['Attributes']

In [17]:
#status = "FAIL"
status = "COMPLETED"

key = {
    "id": {"S": etag}
}

expression_attribute_values = {
    ":s": {"S": status},
    ":gpk": {"S": status},
    ":gsk": {"S": f"{database}#{table_name}#{file_name}"}
}

expression_attribute_names = {
    "#STATUS": "status",
    "#GPK": "GSI-PK",
    "#GSK": "GSI-SK"
}

update_expression = "SET #STATUS = :s, #GPK = :gpk, #GSK = :gsk"

update_item(
    table_name=dynamo_table_name,
    key=key,
    expression_attribute_values=expression_attribute_values,
    expression_attribute_names=expression_attribute_names,
    update_expression=update_expression
)


{'ResponseMetadata': {'RequestId': 'J0COPI7D0R6FBG1N0ISV9UOGFJVV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Fri, 20 Sep 2024 17:39:13 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'J0COPI7D0R6FBG1N0ISV9UOGFJVV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [47]:
key = {
    "id": {"S": etag}
}

In [48]:
# Check if a element exist in DynamoDB
def check_element_in_dynamo(dynamo_table, element):
    dynamodb = boto3.client('dynamodb')
    try:
        response = dynamodb.get_item(
            TableName=dynamo_table,
            Key=element,
        )
    except ClientError as e:
        print(f"[ERROR] Error checking element in DynamoDB: {e}")
        return False
    else:
        item = response.get('Item', {})
        if item:
            return item
        else:
            return False

In [49]:
status_file = check_element_in_dynamo(dynamo_table_name, key)

In [None]:
status_file