In [None]:
import boto3
import configparser
from botocore.exceptions import ClientError

In [None]:
config = configparser.ConfigParser()
config.read_file(open('covid19-analytics.config'))

In [None]:
KEY = config.get('AWS', 'KEY')
SECRET = config.get('AWS', 'SECRET')

In [None]:
SRC_S3 = config.get('S3', 'SRC_S3')
SRC_BUCKETS = config.get('S3', 'SRC_BUCKETS')
SRC_REGION = config.get('S3', 'SRC_REGION')

TARGET_S3 = config.get('S3', 'TARGET_S3')
TARGET_OUTPUT_S3 = config.get('S3', 'TARGET_OUTPUT_S3')
TARGET_REGION = config.get('S3', 'TARGET_REGION')

In [None]:
src_s3_client = boto3.client('s3', region_name=SRC_REGION)

target_s3_client = boto3.client(
    's3', 
    region_name=TARGET_REGION,
    aws_access_key_id=KEY, 
    aws_secret_access_key=SECRET
)

In [None]:
def create_s3_if_not_exists(bucket_name, bucket_region):
    try:
        # Check if bucket exists
        target_s3_client.head_bucket(Bucket=bucket_name)
        print(f"Bucket {bucket_name} already exists.")
    except ClientError as e:
        # If ClientError is thrown then bucket does not exist
        error_code = e.response['Error']['Code']
        if error_code == '404':
            # Create bucket
            target_s3_client.create_bucket(
                Bucket=bucket_name,
                CreateBucketConfiguration={'LocationConstraint': bucket_region}
            )
            print(f"Bucket '{bucket_name}' created.")
        else:
            print(e)
    except Exception as e:
        print(e)

In [None]:
def copy_objects_from_s3_to_s3(src_s3, src_buckets, target_s3):

        src_buckets = src_buckets.split(',')
        for src_bucket in src_buckets:
            src_bucket = src_bucket.strip() # Remove any white spaces
            print(f"\nAccessing src_bucket: {src_bucket}>>>")

            # s3 uses prefixes not folder structure so paginator iterates over src_bucket contents
            paginator = src_s3_client.get_paginator('list_objects_v2')
            for page in paginator.paginate(Bucket=src_s3, Prefix=src_bucket):
                if 'Contents' in page:
                    for obj in page['Contents']:
                        copy_source = {'Bucket': src_s3, 'Key': obj['Key']}
                        target_key = obj['Key'] # Same key in target bucket

                        try:
                            target_s3_client.head_object(Bucket=target_s3, Key=target_key)
                            print(f"Skipping {target_key}, already exists")
                            continue
                        except ClientError as e:
                            if e.response['Error']['Code'] == '404':
                                print(f"Copying {obj['Key']} to {target_s3}/{target_key}")
                                try:
                                    target_s3_client.copy_object(
                                        CopySource=copy_source, 
                                        Bucket=target_s3,
                                        Key=target_key
                                    )
                                except ClientError as e:
                                    print(f"ClientError: {e}")
                                except Exception as e:
                                    print(f"Exception: {e}")
                else:
                    print(f"No content in {src_bucket}\n")

In [None]:
create_s3_if_not_exists(TARGET_S3, TARGET_REGION)

In [None]:
create_s3_if_not_exists(TARGET_OUTPUT_S3, TARGET_REGION)

In [None]:
copy_objects_from_s3_to_s3(SRC_S3, SRC_BUCKETS, TARGET_S3)