In [19]:
!pip install boto3
!pip install botocore



In [37]:
#Creating a new Bucket on AWS

import boto3                                                                    # Import the AWS SDK for Python (boto3)
import logging                                                                  # Import logging for error handling
from botocore.exceptions import ClientError                                     # Import ClientError for handling exceptions

s3=boto3.client('s3')                                                           # Create an S3 client 

def create_bucket(bucket_name, region=None):
    """
    Creates an S3 bucket in the specified region.

    Args:
        bucket_name (str): The name of the bucket to create.
        region (str, optional): The AWS region where the bucket should be created. 
                                If None, the bucket will be created in the default region.

    Returns:
        bool: True if the bucket was created successfully, False otherwise.
    """



    try:
        if region is None:                                                      # If no region is specified, use the default region
            s3_client =boto3.client('s3')
            s3_client.create_bucket(Bucket=bucket_name)
        else:                                                                   # If a region is specified, create the bucket in the specified region
            s3_client =boto3.client('s3', region_name=region)
            location = {'LocationConstraint': region}
            s3_client.create_bucket(Bucket=bucket_name,
                                    CreateBucketConfiguration=location)
    
    except ClientError as e:                                                    # Log the error and return False if an error occurs
        logging.error(e)
        return False
    
    return True                                                                 # Return True if the bucket was created successfully

create_bucket(                                                                  # Call the function to create the bucket
    bucket_name="bucket-testing-dnc-tigureis"
)

True

In [None]:
#List AWS Buckets on the AWS account

response=s3.list_buckets()                                                      # Get a list of all S3 buckets

for bucket in response["Buckets"]:                                              # Iterate through the list of buckets and print information about each one

{'Name': 'bucket-testing-dnc-tigureis', 'CreationDate': datetime.datetime(2025, 1, 14, 2, 12, 8, tzinfo=tzutc())}
{'Name': 'dnc-thiago-test', 'CreationDate': datetime.datetime(2025, 1, 13, 18, 14, 17, tzinfo=tzutc())}


In [27]:
# listing directories within an AWS bucket

BUCKET_NAME='bucket-testing-dnc-tigureis'
PREFIX = ''
DELIMITER = '/'



response=s3.list_objects_v2(                                                    #Request to AWS to get a list of all the files and objects stored in the specified S3 bucket.
    Bucket=BUCKET_NAME,
    Prefix=PREFIX,
    Delimiter=DELIMITER)

response

{'ResponseMetadata': {'RequestId': 'PYTDPPKX2AYQDHGW',
  'HostId': 'eUOQlngmwG8VQ5fIKKbPlCB8LlmEQqrN8ZMwmFDPzW+WFqjs0AcsOqyxBTnwTC0v1RPvotekFLY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'eUOQlngmwG8VQ5fIKKbPlCB8LlmEQqrN8ZMwmFDPzW+WFqjs0AcsOqyxBTnwTC0v1RPvotekFLY=',
   'x-amz-request-id': 'PYTDPPKX2AYQDHGW',
   'date': 'Thu, 16 Jan 2025 17:15:07 GMT',
   'x-amz-bucket-region': 'us-east-1',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'IsTruncated': False,
 'Name': 'bucket-testing-dnc-tigureis',
 'Prefix': '',
 'Delimiter': '/',
 'MaxKeys': 1000,
 'CommonPrefixes': [{'Prefix': 'uploading/'}],
 'EncodingType': 'url',
 'KeyCount': 1}

In [39]:
# Extract subdirectory names from the 'CommonPrefixes' list in the response.

directories = [prefix ['Prefix'] for prefix in response.get('CommonPrefixes',[])]
directories

[]

In [40]:
#List objects inside a Bucket

BUCKET_NAME='bucket-testing-dnc-tigureis'


response = s3.list_objects_v2(
    Bucket=BUCKET_NAME                                                          # List objects in the specified bucket
)

objects = response.get('Contents', [])                                          # Get the list of objects from the response

for object in objects:                                                          # Iterate through each object in the list and Print the object key 
    print(object['Key'])

uploading/new_world_cup.csv


In [30]:
# Upload files on an AWS Bucket

import boto3
from botocore.exceptions import ClientError
import os

def upload_file(file_name, bucket, object_name=None):
    """
    Uploads a file to an S3 bucket.

    Args:
        file_name (str): The path to the local file to upload.
        bucket (str): The name of the S3 bucket to upload the file to.
        object_name (str, optional): The desired name of the object in the S3 bucket. 
                                    If not provided, the original filename is used.

    Returns:
        bool: True if the file was uploaded successfully, False otherwise.
    """

    s3_client=boto3.client('s3')                                                # Create an S3 client

    if object_name is None:                                                     # If no object name is provided, use the original filename
        object_name= os.pat.basename(file_name)

    try:
        s3_client.upload_file(                                                  # Upload the file to the S3 bucket
            file_name, bucket, object_name)
    except ClientError as e:                                                    # Handle any errors that occur during the upload
        logging.error(e)                                                        # Log the error message
        return False
    
    return True                                                                 # Return True if the upload was successful

upload_file(
    file_name="/home/tigureis/DNC_engenharia_de_dados/Input_data/WorldCups.csv",
    bucket='bucket-testing-dnc-tigureis',
    object_name='uploading/new_world_cup.csv'
)

In [33]:
# download files from s3
import boto3

s3_client=boto3.client('s3')

s3.download_file(
    Bucket = 'bucket-testing-dnc-tigureis',
    Key='uploading/new_world_cup.csv',
    Filename='/home/tigureis/DNC_engenharia_de_dados/sink/world_cup_from_s3.csv'
)

In [35]:
# create a presigned URL for download the file

import logging
import boto3
from botocore.exceptions import ClientError

def create_presigned_url(bucket_name, object_name, expiration=3600):
    """
    Generates a pre-signed URL for accessing an S3 object.

    Args:
        bucket_name (str): The name of the S3 bucket.
        object_name (str): The name of the object within the bucket.
        expiration (int, optional): The number of seconds the URL will be valid for. Defaults to 3600 seconds (1 hour).

    Returns:
        str: The pre-signed URL if successful, False otherwise.
    """

    s3_client=boto3.client('s3')                                                # Create an S3 client

    try:
        response= s3_client.generate_presigned_url(                             # Generate the pre-signed URL
            'get_object',                                                       # Specify the action ('get_object')
            Params={                                                            
                'Bucket': bucket_name,                                          # Specify the bucket name
                'Key': object_name                                              # Specify the object key
            },
            ExpiresIn = expiration                                              # Set the URL expiration time

        )

    except ClientError as e:                                                    # Handle any errors that occur during URL generation
        logging.error(e)                                                        # Log the error message
        return False

    return response                                                             # Return the generated URL


bucket_name='bucket-testing-dnc-tigureis'
object_key='uploading/new_world_cup.csv'

url = create_presigned_url(bucket_name=bucket_name, object_name=object_key)     # Generate the URL
url