# Purpose

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

## create_bucket(bucket_name, s3_resource)

`bucket_create_response = create_bucket(bucket_name, s3_resource)`

In [1]:
def create_bucket(bucket_name, s3_resource):
    return s3_resource.create_bucket(Bucket=bucket_name)

## searching for files

`get_matching_s3_objects(bucket, prefix="", suffix="")`

`key = get_matching_s3_keys(bucket, prefix="", suffix="")`

In [2]:
#https://alexwlchan.net/2017/07/listing-s3-keys/
#https://alexwlchan.net/2019/07/listing-s3-keys/

import boto3

def get_matching_s3_objects(bucket, prefix="", suffix=""):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    kwargs = {'Bucket': bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix, )
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                return

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(bucket, prefix="", suffix=""):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj["Key"]

#for key in get_matching_s3_keys(bucket='testname.asyla.org', prefix='BlueMarble/', suffix='.jpg'):
#    print(key)
#print('\n\n')
#for key in get_matching_s3_keys(bucket='testname.asyla.org', suffix=('.jpg', '.JPG')):
#    print(key)

## does_key_exist(bucket_name, file_name)

`boolean = does_key_exist(bucket_name, file_name)`

In [3]:
#https://stackoverflow.com/questions/33842944/check-if-a-key-exists-in-a-bucket-in-s3-using-boto3

def does_key_exist(bucket_name, file_name):
    try:
        s3.Object(bucket_name, file_name).load()
    except:
        #print ('error')
        return (False)
    else:
        #print ('worked')
        return (True)

# Examples

In [14]:
#.casefold() is a Python3 unicode friendly way to do a case insensitive match


t = 'TEST'
if t.casefold() == 'tEsT'.casefold():
    print (True)
else:
    print (False)

True


In [30]:
#initial setup stuff
import boto3
s3 = boto3.resource('s3')

import uuid
bucket_name = str(uuid.uuid4())

document['key_name'] = str(uuid.uuid4())

## Create a bucket

In [31]:
create_bucket(bucket_name, s3)

s3.Bucket(name='eda4d36f-9812-410a-ad14-cfa2b149c7d9')

## Create an object

In [32]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
s3.Object(bucket_name, document['key_name']).put(Body=b'the quick brown fox jumped over the lazy dog\n')

{'ResponseMetadata': {'RequestId': 'E727EE630C47C4B5',
  'HostId': 'RyH8y+cMXqF6qdwYQtJ+qDqKd2tGSa1LAar1ORX4HeYVgultO8H+sl3OHCc4LvIQSBTkjMM8dB8=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'RyH8y+cMXqF6qdwYQtJ+qDqKd2tGSa1LAar1ORX4HeYVgultO8H+sl3OHCc4LvIQSBTkjMM8dB8=',
   'x-amz-request-id': 'E727EE630C47C4B5',
   'date': 'Mon, 18 Nov 2019 19:15:17 GMT',
   'etag': '"1689cdbdec02851b893aa62c6e3cc2f7"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"1689cdbdec02851b893aa62c6e3cc2f7"'}

## Find the size of the file

In [33]:
#find the size of the file
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.content_length
print (s3.Object(bucket_name, document['key_name']).content_length)

45


## Last modified

In [34]:
#last modified
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.last_modified
print (s3.Object(bucket_name, document['key_name']).last_modified)

2019-11-18 19:15:17+00:00


## Associated metadata

In [35]:
#associated metadata
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.metadata
#NOTE: The actual `"key": "value"` pair would be `"x-amz-meta-test": "this is a test"` while here it would show as `"test": "this is a test"`
print (s3.Object(bucket_name, document['key_name']).metadata)

{}


## Download a file and append data to it

In [37]:
#download a file and append data to it

import tempfile #https://docs.python.org/3/library/tempfile.html

maxSize=1024*1024*10 #10MB

tf = tempfile.SpooledTemporaryFile(max_size=maxSize) #create a temp file stored in memory or disk depending on size

s3.Object(bucket_name, document['key_name']).download_fileobj(tf) #download the file

tf.seek(0) #goto the beginning of the file
t=tf.read()
print (t)

tf.seek(0,2) #goto 0th byte before EOF:  https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects
tf.write(b'here is some text\n')

tf.seek(0) #goto the beginning of the file
t=tf.read()
print (t)

tf.seek(0) #goto the beginning of the file
s3.Object(bucket_name, document['key_name']).upload_fileobj(tf) #upload the file

tf.close() #close/delete the file

b'the quick brown fox jumped over the lazy dog\n'
b'the quick brown fox jumped over the lazy dog\nhere is some text\n'


## return the file as a string

In [15]:
response = s3.Object(bucket_name, document['key_name']).get()['Body'].read().decode('utf-8') 
print (response)

NameError: name 'document' is not defined

## Delete a file

In [38]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.delete
s3.Object(bucket_name, document['key_name']).delete

<bound method ResourceFactory._create_action.<locals>.do_action of s3.Object(bucket_name='eda4d36f-9812-410a-ad14-cfa2b149c7d9', key='695e20b6-cf0e-4daf-8cc3-557230bcb6c8')>

## Delete all objects

In [39]:
def delete_all_objects(bucket_name, s3_resource):
    res = []
    bucket=s3_resource.Bucket(bucket_name)
    for obj_version in bucket.object_versions.all():
        res.append({'Key': obj_version.object_key,
                    'VersionId': obj_version.id})
    #print(res)
    if res!=[]:
        bucket.delete_objects(Delete={'Objects': res})
        
delete_all_objects(bucket_name, s3)

## Delete the bucket

In [40]:
#delete the (empty) bucket
s3.Bucket(bucket_name).delete()

{'ResponseMetadata': {'RequestId': '96E15738DBF7C0DF',
  'HostId': 'iFGkgK62eVOUvez4mt7EeAbofO3a+7z/bExWacaZODSDAibL704Hsjm0MZc2k8WbVRkoUwIQNHc=',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-id-2': 'iFGkgK62eVOUvez4mt7EeAbofO3a+7z/bExWacaZODSDAibL704Hsjm0MZc2k8WbVRkoUwIQNHc=',
   'x-amz-request-id': '96E15738DBF7C0DF',
   'date': 'Mon, 18 Nov 2019 19:15:21 GMT',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}