# Get objects from an S3 storage

This script is intended to collect datasets from an S3 location, and store them in a local folder and the post them back to a different S3 prefix.

## Install the requirements

Install the python requirements for the script.

In [None]:
# install the required packages
# !pip install -r requirements.txt

## Import python libraries

Import the python libraries required for the script.

In [2]:
# imports
import os
import json
import boto3
from botocore.exceptions import ClientError
import ipywidgets as widgets

## Specify the S3 details in the environment

1. Create a file called ***.env*** in the top level directory.
1. Use the ***example.env*** file as a reference and fill out the required variables

In [8]:
# create a download button widget for the exported.env file
download_button = widgets.Button(
    description='Example file',
    button_style='primary',
    icon='download'
)

# define the function to download the file
def download_file(download_button):
    # get the file name
    file_name = 'example.env'
    # get the file content
    file_content = open(file_name, 'rb').read()
    # create a file
    with open(file_name, 'wb') as f:
        f.write(file_content)

    
# define the function to handle the file download
def handle_file_download(change):
    # check if the download button widget has been clicked
    if download_button.value:
        # download the file
        download_file(download_button)
        # reset the download button widget
        download_button.value = False
        
# register the function to handle the file download
download_button.on_click(handle_file_download)

# create a file upload widget
file_upload = widgets.FileUpload(
    accept='.env',
    multiple=False,
    description='Upload .env',
    button_style='primary',
    icon='upload'
)

# define the function to upload the file
def upload_file(file_upload):
    # get the file name
    file_name = list(file_upload.value.keys())[0]
    # get the file content
    file_content = file_upload.value[file_name]['content']
    # decode the file content
    file_content = file_content.decode('utf-8')
    # create a file
    with open(file_name, 'w') as f:
        f.write(file_content)
    
# define the function to handle the file upload
def handle_file_upload(change):
    # check if the file upload widget has a value
    if file_upload.value:
        # upload the file
        upload_file(file_upload)
        # reset the file upload widget
        file_upload.value.clear()
        
# register the function to handle the file upload
file_upload.observe(handle_file_upload, names='value')

# display the file download and upload widget with some padding
display(widgets.VBox([download_button, file_upload], layout=widgets.Layout(padding='10px')))


VBox(children=(Button(button_style='primary', description='Example file', icon='download', style=ButtonStyle()…

In [5]:
# create a button to load the environment variables
load_env_button = widgets.Button(
    description='Load .env',
    button_style='primary',
    icon='check'
)

# define the function to load the environment variables
def load_env(button):
    # take environment variables from .env.
    from dotenv import load_dotenv
    load_dotenv(".env")
    
# register the function to load the environment variables
load_env_button.on_click(load_env)

# display the button with some padding
display(widgets.VBox([load_env_button], layout=widgets.Layout(padding='10px')))


VBox(children=(Button(button_style='primary', description='Load .env', icon='check', style=ButtonStyle()),), l…

## Create an S3 client

Provide secret credentials for the S3 location, and create an S3 client.

In [13]:
# initialize the S3 client
s3_client = None
bucket_name = ""
object_prefix = "dataset"

# create a button to create an S3 client
create_s3_client_button = widgets.Button(
    description='Create S3 client',
    button_style='primary',
    icon='check'
)

# define the function to create an S3 client
def create_s3_client(button, s3_client=s3_client, bucket_name=bucket_name):
    # collect requisite data from s3
    session = boto3.session.Session()
    endpoint = 'https://' + '.'.join([os.getenv('S3_REGION'), os.getenv('S3_DOMAIN')])
    s3_client = session.client('s3',
                            region_name=os.getenv('S3_REGION'),
                            endpoint_url=endpoint,
                            aws_access_key_id=os.getenv('S3_BUCKET_KEY'),
                            aws_secret_access_key=os.getenv('S3_BUCKET_SECRET'))
    
    bucket_name = os.getenv("S3_BUCKET", "")
    object_prefix = os.getenv("S3_DATASET_PREFIX", "dataset")

# register the function to create an S3 client
create_s3_client_button.on_click(create_s3_client)

# display the button with some padding
display(widgets.VBox([create_s3_client_button], layout=widgets.Layout(padding='10px')))


VBox(children=(Button(button_style='primary', description='Create S3 client', icon='check', style=ButtonStyle(…

## list the objects in the bucket

In [14]:
# create a list objects button
list_objects_button = widgets.Button(
    description='List objects',
    button_style='primary',
    icon='check'
)

# create an output widget to display the list of objects
list_objects_output = widgets.Output()

# define the function to list objects
@list_objects_output.capture(clear_output=True)
def list_objects(button, s3_client=s3_client, bucket_name=bucket_name, object_prefix=object_prefix):
    # list bucket objects
    response = s3_client.list_objects(Bucket=bucket_name, Prefix=object_prefix)

    print(response)

# register the function to list objects
list_objects_button.on_click(list_objects)

# display the button and output widget with some padding
display(widgets.VBox([list_objects_button, list_objects_output], layout=widgets.Layout(padding='10px')))


VBox(children=(Button(button_style='primary', description='List objects', icon='check', style=ButtonStyle()), …

## Download the objects from the bucket

Get the files in a specific prefix and pull all the files from it.

In [15]:
# create a download button
download_button = widgets.Button(
    description='Download objects',
    button_style='primary',
    icon='check'
)

# create an output widget to display the download progress
download_output = widgets.Output()

# define the function to download objects
@download_output.capture(clear_output=True)
def download_objects(button, s3_client=s3_client, bucket_name=bucket_name, object_prefix=object_prefix):
    download_path = "downloads"

    # list morphology dataset
    response = s3_client.list_objects(Bucket=bucket_name, Prefix=object_prefix)

    # try downloading all of them
    try:
        print("downloading files")
        for obj in response['Contents']:
            print(obj['Key'])
            key = obj['Key']

            fname = os.path.basename(key)
            print(fname)

            # try download this file
            try:
                s3_client.download_file(bucket_name, key, os.path.join(
                    download_path, fname))
            except PermissionError as e:
                print(e)

    except KeyError as e:
        print(e)

    print("done")
    
# register the function to download objects
download_button.on_click(download_objects)

# display the button and output widget with some padding
display(widgets.VBox([download_button, download_output], layout=widgets.Layout(padding='10px')))


VBox(children=(Button(button_style='primary', description='Download objects', icon='check', style=ButtonStyle(…

## Put files into S3 storage

Put datasets back into an S3 location once analysed. Provide some credentials for the S3 location and put everything in.

In [None]:
def client_upload_file(client, file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = os.path.basename(file_name)

    # Upload the file
    try:
        response = client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        print(e)
        return False
    return True

In [None]:
# create a button to upload the results
upload_results_button = widgets.Button(
    description='Upload results',
    button_style='primary',
    icon='check'
)

# create an output widget to display the upload progress
upload_results_output = widgets.Output()

# define the function to upload the results
@upload_results_output.capture(clear_output=True)
def upload_results(button, s3_client=s3_client, bucket_name=bucket_name):
    results_dir = "uploads"

    # list the results
    result_dirs = os.listdir(results_dir)

    # do the uploads for all the result files
    # by iterating through sub-folders
    for folder in result_dirs:
        path = os.path.join(results_dir, folder)
        files = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))]

        for f in files:
            object_name = '/'.join([os.path.basename(results_dir), folder, f])
            print(object_name)

            filename = os.path.join(path, f)
            client_upload_file(s3_client, filename, bucket_name, object_name)
            

# register the function to upload the results
upload_results_button.on_click(upload_results)

# display the button and output widget with some padding
display(widgets.VBox([upload_results_button, upload_results_output], layout=widgets.Layout(padding='10px')))
