# Test Localstack + boto3

In [None]:
# !pip install boto3 awscli


Collecting boto3
  Downloading boto3-1.38.24-py3-none-any.whl (139 kB)
[K     |████████████████████████████████| 139 kB 820 kB/s eta 0:00:01
[?25hCollecting awscli
  Downloading awscli-1.40.23-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 12.7 MB/s eta 0:00:01
[?25hCollecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting botocore<1.39.0,>=1.38.24
  Downloading botocore-1.38.24-py3-none-any.whl (13.6 MB)
[K     |████████████████████████████████| 13.6 MB 1.8 MB/s eta 0:00:01
[?25hCollecting s3transfer<0.14.0,>=0.13.0
  Downloading s3transfer-0.13.0-py3-none-any.whl (85 kB)
[K     |████████████████████████████████| 85 kB 1.8 MB/s eta 0:00:01
[?25hCollecting rsa<4.8,>=3.1.2
  Downloading rsa-4.7.2-py3-none-any.whl (34 kB)
Collecting colorama<0.4.7,>=0.2.5
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting docutils<=0.19,>=0.18.1
  Downloading docutils-0.19-py3-none-any.whl (570 kB)
[K     |███

In [5]:
import boto3

s3 = boto3.client(
    's3',
    aws_access_key_id='test',
    aws_secret_access_key='test',
    region_name='us-east-1',
    endpoint_url='http://localhost:4566',  # Localstack S3 endpoint
)

s3.create_bucket(Bucket='my-bucket')
s3.create_bucket(Bucket='my-bucket2')
s3.create_bucket(Bucket='my-bucket3')


{'ResponseMetadata': {'RequestId': '19397434-0a1d-40cd-adad-f8fca4d4c797',
  'HostId': 's9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'TwistedWeb/24.3.0',
   'date': 'Wed, 28 May 2025 02:28:38 GMT',
   'access-control-allow-origin': '*',
   'access-control-allow-methods': 'HEAD,GET,PUT,POST,DELETE,OPTIONS,PATCH',
   'access-control-allow-headers': 'authorization,cache-control,content-length,content-md5,content-type,etag,location,x-amz-acl,x-amz-content-sha256,x-amz-date,x-amz-request-id,x-amz-security-token,x-amz-tagging,x-amz-target,x-amz-user-agent,x-amz-version-id,x-amzn-requestid,x-localstack-target,amz-sdk-invocation-id,amz-sdk-request,x-amz-log-type',
   'access-control-expose-headers': 'etag,x-amz-version-id,x-amz-log-result,x-amz-executed-version,x-amz-function-error',
   'vary': 'Origin',
   'location': '/my-bucket3',
   'x-amz-request-id': '19397434-0a1d-40cd-adad-f8fca4d4c797',
   'x-amz-id

In [6]:
import boto3

s3 = boto3.client(
    's3',
    aws_access_key_id='test',
    aws_secret_access_key='test',
    region_name='us-east-1',
    endpoint_url='http://localhost:4566'
)

response = s3.list_buckets()
for bucket in response['Buckets']:
    print(bucket['Name'])


my-bucket
my-bucket2
my-bucket3


# Functionality #1 & #2: Find buckets/objects that don't follow the naming convention
it requires returning the list of bucket and object names, and then the LLM (mcp client) will decide which ones don't follow the naming convention. Tools provided:

* list_buckets()
* list_objects_v2(Bucket=bucket_name)


In [None]:
buckets = s3.list_buckets()['Buckets']
print([b['Name'] for b in buckets])

['my-bucket', 'my-bucket2', 'my-bucket3']


In [22]:
buckets = s3.list_buckets()['Buckets']
for b in buckets:
    objects = s3.list_objects_v2(Bucket=b['Name']).get('Contents', [])
    print(b['Name'], objects)

my-bucket [{'Key': 'test.txt', 'LastModified': datetime.datetime(2025, 5, 28, 2, 31, tzinfo=tzutc()), 'ETag': '"5eb63bbbe01eeed093cb22bb8f5acdc3"', 'ChecksumAlgorithm': ['CRC32'], 'ChecksumType': 'FULL_OBJECT', 'Size': 11, 'StorageClass': 'STANDARD'}]
my-bucket2 [{'Key': 'test2.txt', 'LastModified': datetime.datetime(2025, 5, 28, 2, 31, tzinfo=tzutc()), 'ETag': '"5eb63bbbe01eeed093cb22bb8f5acdc3"', 'ChecksumAlgorithm': ['CRC32'], 'ChecksumType': 'FULL_OBJECT', 'Size': 11, 'StorageClass': 'STANDARD'}]
my-bucket3 [{'Key': 'test3.txt', 'LastModified': datetime.datetime(2025, 5, 28, 2, 31, tzinfo=tzutc()), 'ETag': '"5eb63bbbe01eeed093cb22bb8f5acdc3"', 'ChecksumAlgorithm': ['CRC32'], 'ChecksumType': 'FULL_OBJECT', 'Size': 11, 'StorageClass': 'STANDARD'}]


In [30]:
# TODO: there should be a tool to modify the names, it receives the new names from the client

# Functionality #3: Lifecycle policies
Return the lifecycle policies of the S3 buckets, the LLM will analyze if they meet the guidelines. Tools provided:

* get_bucket_lifecycle_configuration()
* set_bucket_lifecycle_configuration(configuration)


In [24]:
import boto3

# Use LocalStack endpoint and dummy credentials
s3 = boto3.client(
    's3',
    endpoint_url='http://localhost:4566',
    aws_access_key_id='test',
    aws_secret_access_key='test',
    region_name='us-east-1'
)

bucket_name = 'my-bucket'

In [None]:
# modify configuration. TODO: this is also a tool
lifecycle_configuration = {
    'Rules': [
        {
            'ID': 'TransitionToStandardIA',
            'Filter': {'Prefix': ''},
            'Status': 'Enabled',
            'Transitions': [
                {
                    'Days': 30,
                    'StorageClass': 'STANDARD_IA'
                }
            ]
        },
        {
            'ID': 'TransitionToGlacier',
            'Filter': {'Prefix': ''},
            'Status': 'Enabled',
            'Transitions': [
                {
                    'Days': 90,
                    'StorageClass': 'GLACIER'
                }
            ]
        },
        {
            'ID': 'ExpireAfter365Days',
            'Filter': {'Prefix': ''},
            'Status': 'Enabled',
            'Expiration': {
                'Days': 365
            }
        }
    ]
}

response = s3.put_bucket_lifecycle_configuration(
    Bucket=bucket_name,
    LifecycleConfiguration=lifecycle_configuration
)

In [28]:
try:
    response = s3.get_bucket_lifecycle_configuration(Bucket=bucket_name)
    rules = response.get('Rules', [])
    print("Lifecycle Rules:")
    for rule in rules:
        print(rule)
except s3.exceptions.ClientError as e:
    if e.response['Error']['Code'] == 'NoSuchLifecycleConfiguration':
        print("No lifecycle configuration found for this bucket.")
    else:
        raise

Lifecycle Rules:
{'ID': 'TransitionToStandardIA', 'Filter': {'Prefix': ''}, 'Status': 'Enabled', 'Transitions': [{'Days': 30, 'StorageClass': 'STANDARD_IA'}]}
{'ID': 'TransitionToGlacier', 'Filter': {'Prefix': ''}, 'Status': 'Enabled', 'Transitions': [{'Days': 90, 'StorageClass': 'GLACIER'}]}
{'Expiration': {'Days': 365}, 'ID': 'ExpireAfter365Days', 'Filter': {'Prefix': ''}, 'Status': 'Enabled'}


# Functionality #4: return versioning status of the S3 buckets
Return the versioning status of the S3 buckets, the LLM will analyze if they meet the guidelines. tools provided:

* get_bucket_versioning(Bucket=bucket_name)
* set_bucket_versioning(Bucket=bucket_name, VersioningConfiguration={'Status': 'Enabled'})


In [29]:
import boto3

s3 = boto3.client(
    's3',
    endpoint_url='http://localhost:4566',  # Use if working with LocalStack; omit for real AWS
    aws_access_key_id='test',               # Use your credentials for real AWS
    aws_secret_access_key='test',
    region_name='us-east-1'
)

bucket_name = 'my-bucket'

response = s3.get_bucket_versioning(Bucket=bucket_name)
status = response.get('Status', 'Not enabled')

print(f"Versioning status for bucket '{bucket_name}': {status}")


Versioning status for bucket 'my-bucket': Not enabled


In [None]:
# Enable versioning
s3.put_bucket_versioning(
    Bucket=bucket,
    VersioningConfiguration={'Status': 'Enabled'}
)

# Functionality #5: ACCESS CONTROL & PERMISSIONS
Return the list of buckets and objects that have public access enabled, the LLM will analyze if they meet the guidelines. tools provided:

* list_buckets()
* list_objects_v2(Bucket=bucket_name)
* get_bucket_acl(Bucket=bucket_name)
* get_object_acl(Bucket=bucket_name, Key=key_name)

This functionality could also include tools to analyze IAM service, instead of the ACLs

In [34]:
import boto3

s3 = boto3.client(
    's3',
    endpoint_url='http://localhost:4566',  # Use if working with LocalStack; omit for real AWS
    aws_access_key_id='test',               # Use your credentials for real AWS
    aws_secret_access_key='test',
    region_name='us-east-1'
)

bucket_name = 'my-bucket'

try:
    response = s3.get_public_access_block(Bucket=bucket_name)
    pab = response['PublicAccessBlockConfiguration']
    print(f"Public Access Block settings for {bucket_name}:")
    print(pab)
except Exception as e:
    print(f"No Public Access Block configuration set for {bucket_name}: {e}")


Public Access Block settings for my-bucket:
{'BlockPublicAcls': True, 'IgnorePublicAcls': True, 'BlockPublicPolicy': True, 'RestrictPublicBuckets': True}


# Functionality #6: Encryption
For each bucket, check if it has encryption enabled and if it is using the correct encryption method. If not, it can configured. Tools provided:

* get_bucket_encryption(Bucket=bucket_name)
* put_bucket_encryption(Bucket=bucket_name, ServerSideEncryptionConfiguration=encryption_config)



In [37]:
import boto3
from botocore.exceptions import ClientError

s3 = boto3.client(
    's3',
    endpoint_url='http://localhost:4566',  # Remove for real AWS
    aws_access_key_id='test',
    aws_secret_access_key='test',
    region_name='us-east-1'
)

bucket_name = 'my-bucket'

try:
    response = s3.get_bucket_encryption(Bucket=bucket_name)
    rules = response['ServerSideEncryptionConfiguration']['Rules']
    
    for rule in rules:
        algo = rule['ApplyServerSideEncryptionByDefault']['SSEAlgorithm']
        kms_key = rule['ApplyServerSideEncryptionByDefault'].get('KMSMasterKeyID')
        print(f"Encryption algorithm: {algo}")
        if algo == 'aws:kms':
            print(f"KMS Key ID: {kms_key}")
except ClientError as e:
    if e.response['Error']['Code'] == 'ServerSideEncryptionConfigurationNotFoundError':
        print(f"No server-side encryption configured for bucket: {bucket_name}")
    else:
        raise


Encryption algorithm: aws:kms
KMS Key ID: None


In [None]:
s3.put_bucket_encryption(
    Bucket=bucket_name,
    ServerSideEncryptionConfiguration={
        'Rules': [
            {
                'ApplyServerSideEncryptionByDefault': {
                    'SSEAlgorithm': 'aws:kms'  # or 'aws:kms'
                }
            }
        ]
    }
)

{'ResponseMetadata': {'RequestId': '5cf01051-c58f-4a95-885f-2cc7ba4c41af',
  'HostId': 's9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'TwistedWeb/24.3.0',
   'date': 'Wed, 28 May 2025 03:29:37 GMT',
   'x-amz-request-id': '5cf01051-c58f-4a95-885f-2cc7ba4c41af',
   'x-amz-id-2': 's9lzHYrFp76ZVxRcpX9+5cjAnEH2ROuNkd2BHfIa6UkFVdtjf5mKR3/eTPFvsiP/XV/VLi31234=',
   'content-length': '0'},
  'RetryAttempts': 0}}

# Functionality #7: Find duplicates 

You typically consider files "duplicates" if they match:

✅ Content hash (body) (e.g., MD5 or SHA256)

🟡 Optional: File name or metadata (for more context)

Tools provided:

* get_duplicates()

In [None]:
import boto3

s3 = boto3.client(
    's3',
    aws_access_key_id='test',
    aws_secret_access_key='test',
    region_name='us-east-1',
    endpoint_url='http://localhost:4566',  # Localstack S3 endpoint
)

s3.create_bucket(Bucket='my-bucket')
s3.create_bucket(Bucket='my-bucket2')
s3.create_bucket(Bucket='my-bucket3')

s3.put_object(Bucket='my-bucket', Key='test.txt', Body='hello world')
response = s3.get_object(Bucket='my-bucket', Key='test.txt')
print(response['Body'].read().decode())

s3.put_object(Bucket='my-bucket2', Key='test2.txt', Body='hello world')
response = s3.get_object(Bucket='my-bucket2', Key='test2.txt')
print(response['Body'].read().decode())


s3.put_object(Bucket='my-bucket3', Key='test3.txt', Body='hello world')
response = s3.get_object(Bucket='my-bucket3', Key='test3.txt')
print(response['Body'].read().decode())

In [None]:
# Use list_objects_v2 to retrieve object keys and metadata from each bucket.

buckets = s3.list_buckets()['Buckets']
for b in buckets:
    objects = s3.list_objects_v2(Bucket=b['Name']).get('Contents', [])


In [None]:
# Use get_object() and compute the hash manually (since the ETag is only a reliable MD5 for non-multipart, unencrypted files).
import hashlib

def hash_object(bucket, key):
    obj = s3.get_object(Bucket=bucket, Key=key)
    body = obj['Body'].read()
    return hashlib.md5(body).hexdigest()  # or use sha256


In [None]:
# Create a mapping of hashes to (bucket, key) pairs.

duplicates = {}
hash_map = {}
bucket_names = [bucket['Name'] for bucket in s3.list_buckets()['Buckets']]
for bucket in bucket_names:
    objects = s3.list_objects_v2(Bucket=bucket).get('Contents', [])
    for obj in objects:
        key = obj['Key']
        file_hash = hash_object(bucket, key)

        if file_hash in hash_map:
            # Duplicate found
            duplicates.setdefault(file_hash, []).append((bucket, key))
        else:
            hash_map[file_hash] = (bucket, key)


In [12]:
for file_hash, locations in duplicates.items():
    print(f"\nDuplicate hash: {file_hash}")
    for bucket, key in locations:
        print(f" - s3://{bucket}/{key}")



Duplicate hash: 5eb63bbbe01eeed093cb22bb8f5acdc3
 - s3://my-bucket2/test2.txt
 - s3://my-bucket3/test3.txt


In [2]:
!pip install mcp

[31mERROR: Could not find a version that satisfies the requirement mcp (from versions: none)[0m
[31mERROR: No matching distribution found for mcp[0m
You should consider upgrading via the '/Users/juanmari/Documents/mcp-cloud-analyzer/venv/bin/python3 -m pip install --upgrade pip' command.[0m
