# Purpose

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

## create_bucket(bucket_name, s3_resource)

`bucket_create_response = create_bucket(bucket_name, s3_resource)`

In [1]:
def create_bucket(bucket_name, s3_resource):
    return s3_resource.create_bucket(Bucket=bucket_name)

## searching for files

`get_matching_s3_objects(bucket, prefix="", suffix="")`

`key = get_matching_s3_keys(bucket, prefix="", suffix="")`

In [2]:
#https://alexwlchan.net/2017/07/listing-s3-keys/
#https://alexwlchan.net/2019/07/listing-s3-keys/

import boto3

def get_matching_s3_objects(bucket, prefix="", suffix=""):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    kwargs = {'Bucket': bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix, )
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                return

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(bucket, prefix="", suffix=""):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj["Key"]

#for key in get_matching_s3_keys(bucket='testname.asyla.org', prefix='BlueMarble/', suffix='.jpg'):
#    print(key)
#print('\n\n')
#for key in get_matching_s3_keys(bucket='testname.asyla.org', suffix=('.jpg', '.JPG')):
#    print(key)

## does_key_exist(bucket_name, file_name)

`boolean = does_key_exist(bucket_name, file_name)`

In [54]:
#https://stackoverflow.com/questions/33842944/check-if-a-key-exists-in-a-bucket-in-s3-using-boto3

def does_key_exist(bucket_name, file_name):
    try:
        s3.Object(bucket_name, file_name).load()
    except:
        #print ('error')
        return (False)
    else:
        #print ('worked')
        return (True)

# main program

## Connect to S3

In [3]:
import boto3
s3 = boto3.resource('s3')

In [4]:
#Note that the name of a bucket must be unique to all of S3 DNS namespace
#Names can only start with [a-z0-9] but may include [a-z0-9-_./]

#bucket_name = 'Documents' #this will fail
#bucket_name = create_unique_name('') #this is safer
#bucket_name = create_unique_name('documents'+'--'+str(uuid.uuid4())) #this is easier to directly work with

bucket_name = 'documents--88767106-9edc-4028-a451-0da43b669d7f' #hardcode this so it doesnt change
#bucket_name = 'testname.asyla.org'

print(bucket_name)

documents--88767106-9edc-4028-a451-0da43b669d7f


## Setup bucket

In [5]:
if s3.Bucket(bucket_name).creation_date is None: #there is no date if it doesnt exist
    response = create_bucket(bucket_name, s3)
    print(response)
else:
    print (bucket_name + ' exists')

documents--88767106-9edc-4028-a451-0da43b669d7f exists


## create/write to a file

In [50]:
import uuid
documentID=str(uuid.uuid4())
print(documentID)

01908f89-1998-4099-9117-f90eab55be20


In [58]:
#generate the path

#couldnt get `re.sub()` to cooperate so did this instead

import re

path = ''

for part in documentID.split('-'):
    path += re.search('^.{2}', part).group(0) + '/' #match the first 2 chars and output the first match

print (path)

01/19/40/91/f9/


In [57]:
file_name=documentID+'.yaml'
key_name=path+file_name
print(key_name)

01/19/40/91/f9/01908f89-1998-4099-9117-f90eab55be20.yaml


In [55]:
does_key_exist(bucket_name, key_name)

False

## Update the file

### check for file size

In [44]:
#find the size of the file
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.content_length
print (s3.Object(bucket_name, key_name).content_length)

#last modified
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.last_modified
print (s3.Object(bucket_name, key_name).last_modified)

#associated metadata
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.metadata
#NOTE: The actual `"key": "value"` pair would be `"x-amz-meta-test": "this is a test"` while here it would show as `"test": "this is a test"`
print (s3.Object(bucket_name, key_name).metadata)

26
2019-11-15 19:21:00+00:00
{'test': 'this is a test'}


In [20]:
#download a file and append data to it

import tempfile #https://docs.python.org/3/library/tempfile.html

maxSize=1024*1024*10 #10MB

tf = tempfile.SpooledTemporaryFile(max_size=maxSize) #create a temp file stored in memory or disk depending on size

s3.Object(bucket_name, file_name).download_fileobj(tf) #download the file

tf.seek(0) #goto the beginning of the file
t=tf.read()
print (t)

tf.seek(0,2) #goto 0th byte before EOF:  https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects
tf.write(b'here is some text\n')

tf.seek(0) #goto the beginning of the file
t=tf.read()
print (t)

tf.seek(0) #goto the beginning of the file
s3.Object(bucket_name, file_name).upload_fileobj(tf) #upload the file

tf.close() #close/delete the file

b'\n\nmore text for the file\n\nhere is some text\nhere is some text\nhere is some text\nhere is some text\n'
b'\n\nmore text for the file\n\nhere is some text\nhere is some text\nhere is some text\nhere is some text\nhere is some text\n'
