In [1]:
#pip install --upgrade boto3

In [2]:
import zipfile,json,time,io
import boto3
from   botocore.exceptions import NoCredentialsError, ClientError

In [3]:
import numpy  as np
import pandas as pd

In [4]:
#############################################################################
#############################################################################

In [5]:
# Specify your AWS credentials directly (avoid this in production; use IAM roles or environment variables)
aws_access_key_id     = 'blablabla'
aws_secret_access_key = 'blablabla'
aws_region            = 'us-east-1' 
aws_role  =   'arn:aws:iam::1111111111:role/misc-admin-role'  

In [6]:
#############################################################################
#############################################################################

In [7]:
s3_client = boto3.client(
    's3',
    region_name           = aws_region,
    aws_access_key_id     = aws_access_key_id,
    aws_secret_access_key = aws_secret_access_key,
    # Explicitly specifying endpoint for us-east-1:
    endpoint_url          ='https://s3.amazonaws.com'  
) 

def list_all_buckets():
    try:
        response = s3_client.list_buckets()
        buckets = [bucket['Name'] for bucket in response['Buckets']]
        return buckets
    except ClientError as e:
        print(f"Error listing buckets: {e}")
        return []

def create_bucket(bucket_name, region='us-east-1'):
    try:
        if region == 'us-east-1':
            s3_client.create_bucket(Bucket=bucket_name)
        else:
            location = {'LocationConstraint': region}
            s3_client.create_bucket(Bucket=bucket_name, CreateBucketConfiguration=location)
        print(f"Bucket {bucket_name} created successfully in region {region}.")
    except ClientError as e:
        print(f"Error creating bucket: {e}")
        
#delete_bucket('james-bucket-72') 
def delete_bucket(bucket_name):
    try:
        # List and delete all objects in the bucket
        response = s3_client.list_objects_v2(Bucket=bucket_name)
        if 'Contents' in response:
            for obj in response['Contents']:
                s3_client.delete_object(Bucket=bucket_name, Key=obj['Key'])
        
        # Delete the bucket
        s3_client.delete_bucket(Bucket=bucket_name)
        print(f"Bucket {bucket_name} deleted successfully.")
    except ClientError as e:
        print(f"Error deleting bucket: {e}")
        


In [8]:
#############################################################################
#############################################################################

In [9]:
### CREATE A NEW BUCKET:
bucket_name = "misc-bucket-17"
create_bucket(bucket_name,aws_region) 

Bucket misc-bucket-17 created successfully in region us-east-1.


In [10]:
### LIST ALL EXISTING BUCKETS:
list_all_buckets() 

['james-bucket-17', 'misc-bucket-17']

In [11]:
#############################################################################
#############################################################################

In [12]:
# Initialize the S3 client
s3_client = boto3.client(
    's3',
    region_name=aws_region,
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    # Explicitly specifying endpoint for us-east-1 :
    endpoint_url='https://s3.amazonaws.com'  
)

def write_s3_file(bucket_name, object_name, data, source_format=''):
    if not source_format:
        source_format = object_name.split('.')[-1] 
    try:
        if source_format == 'txt':
            s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=data)
            print(f"Text file uploaded to {bucket_name}/{object_name}.")
        
        elif source_format == 'csv':
            csv_buffer = io.StringIO()
            data.to_csv(csv_buffer, index=False)
            s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=csv_buffer.getvalue())
            print(f"CSV file uploaded to {bucket_name}/{object_name}.")
        
        elif source_format == 'xlsx':
            excel_buffer = io.BytesIO()
            with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
                data.to_excel(writer, index=False)
            s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=excel_buffer.getvalue())
            print(f"Excel file uploaded to {bucket_name}/{object_name}.")
        
        elif source_format == 'json':
            json_data = json.dumps(data)
            s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=json_data)
            print(f"JSON file uploaded to {bucket_name}/{object_name}.")
        
        elif source_format == 'parquet':
            parquet_buffer = io.BytesIO()
            data.to_parquet(parquet_buffer, index=False)
            s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=parquet_buffer.getvalue())
            print(f"Parquet file uploaded to {bucket_name}/{object_name}.") 
        
        else:
            raise ValueError(f"Unsupported source format: {source_format}")
        
    except ClientError as e:
        print(f"Error uploading file to S3: {e}") 
        

def read_s3_file(bucket_name, object_name):
    try:
        response = s3_client.get_object(Bucket=bucket_name, Key=object_name)
        body = response['Body'].read()

        file_extension = object_name.split('.')[-1]

        if file_extension == 'txt':
            content = body.decode('utf-8')
            return content
        
        elif file_extension == 'csv':
            df = pd.read_csv(io.StringIO(body.decode('utf-8')))
            return df
        
        elif file_extension == 'xlsx':
            df = pd.read_excel(io.BytesIO(body))
            return df
        
        elif file_extension == 'json':
            content = json.loads(body.decode('utf-8'))
            return content
        
        elif file_extension == 'parquet':
            df = pd.read_parquet(io.BytesIO(body))
            return df
        
        else:
            raise ValueError(f"Unsupported file extension: {file_extension}")
    
    except ClientError as e:
        print(f"Error reading file from S3: {e}")
        return None


In [13]:
#############################################################################
#############################################################################

In [14]:
### EXAMPLES OF IMPORTING AND EXPORTING OBJECTS TO/FROM S3

In [15]:
# Plain text (TXT) 
text_data = '''
Hello, this is a sample text!
With multiple lines!
''' 
write_s3_file(bucket_name, 'sample.txt', text_data)  

Text file uploaded to misc-bucket-17/sample.txt.


In [16]:
# Read TXT file
obj = read_s3_file(bucket_name, 'sample.txt') 
print(obj)   


Hello, this is a sample text!
With multiple lines!



In [17]:
# JSON with Python dictionary
json_data = {"Name": ["John", "Anna"], "Age": [28, 24]}
write_s3_file(bucket_name, 'sample.json', json_data)   

JSON file uploaded to misc-bucket-17/sample.json.


In [18]:
# Read JSON file
obj = read_s3_file(bucket_name, 'sample.json')
print(obj)   

{'Name': ['John', 'Anna'], 'Age': [28, 24]}


In [19]:
# Write CSV file from dataframe
csv_data = pd.DataFrame({'Name': ['John', 'Anna'], 'Age': [29, 24]})
write_s3_file(bucket_name, 'sample.csv', csv_data)  

CSV file uploaded to misc-bucket-17/sample.csv.


In [20]:
# Read CSV file
obj = read_s3_file(bucket_name, 'sample.csv') 
print(obj)   

   Name  Age
0  John   29
1  Anna   24


In [21]:
# Write Excel file from dataframe
excel_data = pd.DataFrame({'Name': ['John', 'Anna'], 'Age': [28, 24]})
write_s3_file(bucket_name, 'sample.xlsx', excel_data) 

Excel file uploaded to misc-bucket-17/sample.xlsx.


In [22]:
# Read XLSX file
obj = read_s3_file(bucket_name, 'sample.xlsx')
print(obj)   

   Name  Age
0  John   28
1  Anna   24


In [23]:
# Write Parquet Data
parquet_data = pd.DataFrame({'Name': ['John', 'Anna'], 'Age': [25, 24]})
write_s3_file(bucket_name, 'sample.parquet', parquet_data) 

Parquet file uploaded to misc-bucket-17/sample.parquet.


In [24]:
# Read Parquet file
obj = read_s3_file(bucket_name, 'sample.parquet')
print(obj)   

   Name  Age
0  John   25
1  Anna   24


In [25]:
#############################################################################
#############################################################################

In [26]:
#.