# S3 Object Steps

## Install and Import required libraries

In [None]:
!pip install boto3==1.35.0

In [None]:

import boto3
from botocore.config import Config
import time
import json
import time
import uuid
import logging
import random
import string
import sys
from datetime import datetime

## Load Demo State and define variables

In [None]:
spinner = ['/', '-', '\\', '|']

# Read in the Demo Variables and values.
with open("demo_state.json", "r") as f:
    data = json.load(f)
# Dynamically create python variables needed for the Demo.
for key, value in data.items():
    globals()[key] = value

## Define Functions

In [None]:
def generate_random_tags():
    """
      Used to generate data for the demo.
    """
    Projects = [
        "PB",      # Permian Basin (Texas and New Mexico)
        "EFS",     # Eagle Ford Shale (Texas)
        "BF",      # Bakken Formation (North Dakota)
        "Prudhoe", # Prudhoe Bay (Alaska)
        "GoM"      # Gulf of Mexico (offshore)
    ]
    Hydrostatic_pressures = [
        "2340", 
        "5200", 
        "9360", 
        "14560"
    ]
    Asset_Types = [
        "EXP",   # Exploration Wells: Drilled to discover new resources
        "DEV",   # Development Wells: Drilled to extract known reserves
        "PROD",  # Production Wells: Actively producing oil or gas
        "INJ"    # Injection Wells: Used for enhanced recovery (e.g., water or CO₂ injection)
    ]
    Project = random.choice(Projects)
    BPD = str(random.randint(1000,3000))
    PSI = str(random.randint(2000,5000))
    Temperature = str(random.randint(140,248))
    HSP = random.choice(Hydrostatic_pressures)
    Asset_type = random.choice(Asset_Types)
    
    return BPD, PSI, Temperature, Project, Asset_type, HSP

def generate_random_objectname(length=8,prefix=None):
    """
        Used to generate an Object name for the demo.
    """
    file_extensions = [
    ".txt",    # Plain text file
    ".csv",    # Comma-separated values
    ".json",   # JSON data file
    ".xml",    # XML file
    ".pdf",    # PDF document
    ".docx",   # Microsoft Word document
    ".xlsx",   # Microsoft Excel spreadsheet
    ".jpg",    # JPEG image
    ".png",    # PNG image
    ".mp4",    # MPEG-4 video
    ".zip",    # ZIP archive
    ]
    if prefix: 
        prefix = prefix + '/'
    basename = ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
    extension = random.choice(file_extensions)
    return prefix + basename + extension

def upload_file_to_s3(file_name, bucket_name, object_name, tags, endpoint_url):
    """
       Upload an object via S3v4 to VAST Cluster and update User Tags.
    """
    # Initialize the S3 client with custom endpoint
    s3_client = boto3.client(
        's3',
        endpoint_url=endpoint_url,
        aws_access_key_id=S3_ACCESS_KEY,
        aws_secret_access_key=S3_SECRET_KEY,
        config=Config(signature_version='s3v4', 
                      parameter_validation=False, 
                      s3={'payload_signing_enabled':False,'addressing_style':'path','checksum_algorithm': None}
                     ),
        verify=False  # Set to False if the endpoint doesn't use SSL (http)
    )

    # Upload file to S3
    try:
        # Upload the file
        s3_client.upload_file(file_name, bucket_name, object_name)
        #time.sleep(5)
        # Add tags to the uploaded object
        s3_client.put_object_tagging(
            Bucket=bucket_name,
            Key=object_name,
            Tagging={
                'TagSet': tags
            }
        )
        return True

    except Exception as e:
        print(f'Error uploading {file_name} to {bucket_name}: {e}')
        return False

## Simulate Object Events

### Put Objects and Tags

In [None]:
create_number = 500
file_name = 'demo_state.json'
spin = 0 
last_percent = -1
start_time = time.time()
s3_endpoint_url = f"http://{vip_pool_ip}"
s3_objects = []
for i in range(create_number):
    
    gen_tags = generate_random_tags()
    tags = [
        { 'Key': 'BPD', 'Value': gen_tags[0]},
        { 'Key': 'PSI', 'Value': gen_tags[1]},
        { 'Key': 'Temperature', 'Value': gen_tags[2]},
        { 'Key': 'Project', 'Value': gen_tags[3]},
        { 'Key': 'Asset_type', 'Value': gen_tags[4]},
        { 'Key': 'HSP', 'Value': gen_tags[5]}
    ]
    s3_object_key = generate_random_objectname(length=8,prefix='site1')
    s3_objects.append(s3_object_key)
    upload_file_to_s3(file_name, S3_bucket_name, s3_object_key , tags, s3_endpoint_url)
    
    # Provide feedback to user on progress for every 10% completed.
    percent = (i * 100) // create_number
    if percent % 10 == 0 and percent != last_percent:
      spin += 1
      if spin > len(spinner) - 1:
        spin = 0
      last_percent = percent
    sys.stdout.write(f'\r{spinner[spin]}')  
    sys.stdout.flush()       
print(f"Elapsed time: {time.time() - start_time:.2f}")

In [None]:
s3_objects[0]

#### Query Kafka now

* Switch to the S3 Events - Kafka Notebook and process the events that were generated.
* Using the VAST DB Notebook run SQL queries against the events.

### Delete 10% of the Objects from S3 bucket

In [None]:
#
# Randomly select 10% of the Object that were created and delete them.
#
num_to_delete = max(1, len(s3_objects) // 10)  # At least 1
objects_to_delete = random.sample(s3_objects, num_to_delete)
objects_deleted = 0 
s3_client = boto3.client(
    's3',
    endpoint_url=s3_endpoint_url,
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    config=Config(
        signature_version='s3v4',
        parameter_validation=False,
        s3={
            'payload_signing_enabled': False, 'addressing_style': 'path', 'checksum_algorithm': None
        }
    ),
    verify=False
)

for key in objects_to_delete:
    try:
        s3_client.delete_object(Bucket= S3_bucket_name, Key=key)
        objects_deleted += 1
    except Exception as e:
        print(f"Error deleting {key}: {e}")
print(f"{objects_deleted} objects were deleted from the {S3_bucket_name} bucket.")
print(f"Here are 10 Objects that were marked for deletion:")
random.sample(objects_to_delete, 10)

#### Query Kafka now

* Switch to the Kafka Notebook and process the events that were generated.
* Using the VAST DB Notebook run SQL queries against the events.

### Add User Tags

In [None]:
def tag_objects_with_extension(bucket_name, s3_client, extension=".txt", new_tags=None):
    """
       Adds new tags to list of Objects that have a given extension.
    """
    count = 0 
    if new_tags is None:
        new_tags = {}

    paginator = s3_client.get_paginator('list_objects_v2')
    page_iterator = paginator.paginate(Bucket=bucket_name)

    for page in page_iterator:
        for obj in page.get('Contents', []):
            key = obj['Key']
            if not key.endswith(extension):
                continue

            # Get existing tags
            try:
                response = s3_client.get_object_tagging(Bucket=bucket_name, Key=key)
                tag_set = response.get('TagSet', [])
            except s3_client.exceptions.NoSuchTagSet:
                tag_set = []

            # Merge existing tags with new tags
            tag_dict = {tag['Key']: tag['Value'] for tag in tag_set}
            tag_dict.update(new_tags)

            # Format tag set
            updated_tag_set = [{'Key': k, 'Value': v} for k, v in tag_dict.items()]

            # Apply updated tags
            s3_client.put_object_tagging(
                Bucket=bucket_name,
                Key=key,
                Tagging={'TagSet': updated_tag_set}
            )
            count += 1
    return count         


s3_client = boto3.client(
    's3',
    endpoint_url=s3_endpoint_url,
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    config=Config(
        signature_version='s3v4',
        parameter_validation=False,
        s3={
            'payload_signing_enabled': False, 'addressing_style': 'path', 'checksum_algorithm': None
        }
    ),
    verify=False
)
update_extension = ".docx"
proc_count = tag_objects_with_extension(S3_bucket_name, s3_client, extension=update_extension, new_tags={"processed": "False", "ASCII": "False"})
print(f"{proc_count} `{update_extension}` Objects were updated.")

#### Query Kafka now

* Switch to the Kafka Notebook and process the events that were generated.
* Using the VAST DB Notebook run SQL queries against the events.

### Update User Tags

In [None]:

def update_s3_object_tag_value(s3_client, bucket_name, tag_key, old_value, new_value):
    """
    Iterates over all objects in the specified S3 bucket and updates the value of a specific tag
    if it matches the provided old value.

    :param s3_client: An initialized boto3 S3 client
    :param bucket_name: Name of the S3 bucket to process
    :param tag_key: The key of the tag to look for (e.g., 'Project')
    :param old_value: The current value of the tag that should be replaced
    :param new_value: The new value to assign to the tag if matched
    """
    paginator = s3_client.get_paginator('list_objects_v2')
    page_iterator = paginator.paginate(Bucket=bucket_name)

    for page in page_iterator:
        for obj in page.get('Contents', []):
            key = obj['Key']
            try:
                tag_response = s3_client.get_object_tagging(Bucket=bucket_name, Key=key)
                tags = tag_response['TagSet']

                tag_updated = False
                for tag in tags:
                    if tag['Key'] == tag_key and tag['Value'] == old_value:
                        tag['Value'] = new_value
                        tag_updated = True
                        break

                if tag_updated:
                    s3_client.put_object_tagging(
                        Bucket=bucket_name,
                        Key=key,
                        Tagging={'TagSet': tags}
                    )
                    print(f"Updated tag '{tag_key}' from '{old_value}' to '{new_value}' for object: {key}")
            except s3_client.exceptions.NoSuchKey:
                print(f"Object not found: {key}")
            except Exception as e:
                print(f"Error processing object {key}: {e}")


s3_client = boto3.client(
    's3',
    endpoint_url=s3_endpoint_url,
    aws_access_key_id=S3_ACCESS_KEY,
    aws_secret_access_key=S3_SECRET_KEY,
    config=Config(
        signature_version='s3v4',
        parameter_validation=False,
        s3={
            'payload_signing_enabled': False, 'addressing_style': 'path', 'checksum_algorithm': None
        }
    ),
    verify=False
)

#
# Find all of the S3 Objects in the bucket that have a Project of "GoM" and update it to "GoA"
#
update_s3_object_tag_value(
    s3_client=s3_client,
    bucket_name=S3_bucket_name,
    tag_key='Project',
    old_value='GoM',
    new_value='GoA'
)