# Purpose

This notebook will create a template document containing some basic info within S3

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

## create_bucket(bucket_name, s3_resource)

`bucket_create_response = create_bucket(bucket_name, s3_resource)`

In [1]:
def create_bucket(bucket_name, s3_resource):
    return s3_resource.create_bucket(Bucket=bucket_name)

## searching for files

`get_matching_s3_objects(bucket, prefix="", suffix="")`

`key = get_matching_s3_keys(bucket, prefix="", suffix="")`

In [2]:
#https://alexwlchan.net/2017/07/listing-s3-keys/
#https://alexwlchan.net/2019/07/listing-s3-keys/

import boto3

def get_matching_s3_objects(bucket, prefix="", suffix=""):
    """
    Generate objects in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    kwargs = {'Bucket': bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix, )
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                return

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(bucket, prefix="", suffix=""):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    """
    for obj in get_matching_s3_objects(bucket, prefix, suffix):
        yield obj["Key"]

#for key in get_matching_s3_keys(bucket='testname.asyla.org', prefix='BlueMarble/', suffix='.jpg'):
#    print(key)
#print('\n\n')
#for key in get_matching_s3_keys(bucket='testname.asyla.org', suffix=('.jpg', '.JPG')):
#    print(key)

## does_key_exist(bucket_name, file_name)

`boolean = does_key_exist(bucket_name, file_name)`

In [3]:
#https://stackoverflow.com/questions/33842944/check-if-a-key-exists-in-a-bucket-in-s3-using-boto3

def does_key_exist(bucket_name, file_name):
    try:
        s3.Object(bucket_name, file_name).load()
    except:
        #print ('error')
        return (False)
    else:
        #print ('worked')
        return (True)

# main program

## Connect to S3

In [4]:
import boto3
s3 = boto3.resource('s3')

In [5]:
#Note that the name of a bucket must be unique to all of S3 DNS namespace
#Names can only start with [a-z0-9] but may include [a-z0-9-_./]

#bucket_name = 'Documents' #this will fail
#bucket_name = create_unique_name('') #this is safer
#bucket_name = create_unique_name('documents'+'--'+str(uuid.uuid4())) #this is easier to directly work with

bucket_name = 'documents--88767106-9edc-4028-a451-0da43b669d7f' #hardcode this so it doesnt change
#bucket_name = 'testname.asyla.org'

print('bucket_name='+bucket_name)

bucket_name=documents--88767106-9edc-4028-a451-0da43b669d7f


## Create bucket if needed

In [6]:
if s3.Bucket(bucket_name).creation_date is None: #there is no date if it doesnt exist
    response = create_bucket(bucket_name, s3)
    #print(response)
    print ('bucket created')

## Create a new yaml file

### Create the name

In [7]:
#generate all the file name permutations we may need

document={}

#generate the base name (documentID)
import uuid
document['base_name']=str(uuid.uuid4())
print("document['base_name']="+document['base_name'])

#generate the prefix (the path based upon the documentID)
import re
#couldnt get `re.sub()` to cooperate so did this instead
document['file_prefix'] = ''
for part in document['base_name'].split('-'):
    document['file_prefix'] += re.search('^.{2}', part).group(0) + '/' #match the first 2 chars and output the first match
print("document['file_prefix']="+document['file_prefix'])

#set the file suffix (extension)
document['file_suffix']='.ymal'
print("document['file_suffix']="+document['file_suffix'])

#generate the full filename (documentID + extension)
document['file_name']=document['base_name']+document['file_suffix']
print("document['file_name']="+document['file_name'])

#generate the full key (path + full filename)
document['key_name']=document['file_prefix']+document['file_name']
print("document['key_name']="+document['key_name'])

document

document['base_name']=6f80a72f-77aa-4969-a505-9348d62f2a5e
document['file_prefix']=6f/77/49/a5/93/
document['file_suffix']=.ymal
document['file_name']=6f80a72f-77aa-4969-a505-9348d62f2a5e.ymal
document['key_name']=6f/77/49/a5/93/6f80a72f-77aa-4969-a505-9348d62f2a5e.ymal


{'base_name': '6f80a72f-77aa-4969-a505-9348d62f2a5e',
 'file_prefix': '6f/77/49/a5/93/',
 'file_suffix': '.ymal',
 'file_name': '6f80a72f-77aa-4969-a505-9348d62f2a5e.ymal',
 'key_name': '6f/77/49/a5/93/6f80a72f-77aa-4969-a505-9348d62f2a5e.ymal'}

### Ensure the file doesnt already exist

In [8]:
#make sure the file does not exist
#UUIDv4 generates random strings so there is a (small) chance of a duplicate
import sys

if does_key_exist(bucket_name, document['key_name']):
    print ('ERROR: File exists: '+document['key_name'])
    sys.exit('ERROR: File exists: '+document['key_name'])

### Collect nessessary info

In [9]:
#insert data into `content` in the order it will be written to the file
#[Python 3.7+, dicts preserve insertion order](https://stackoverflow.com/a/52268128/12400492)

from datetime import date

content={}

fields=[
    {'id': document['base_name']},
    {'title': '!'},
    {'subtitle': '!'},
    {'author': '!'},
    {'abstract': '!'},
    {'lang': 'en'},
    {'type': '! # Options: [regulation|policy|standard|guidance|requirement|control|procedure]'},
    {'source': '!'},
    {'classification': 'public # Options: [public|private|confidential]'},
    {'referenceTag': {
            'keywords': ['! # This is a list (use `-`)'],
            'relationship': '! # `ID : [parent|peer|child]`'
        }
    },
    {'status': {
            'status': 'draft # Options: [draft|review|complete|expire]',
            'effective': '!',
            'expire': '!'
        }
    },
    {'revision': [{
            'date': date.today().strftime("%Y%m%d"),
            'name': 'N/A',
            'reason': 'Initial creation'
        }]
    },
    {'body': '!'}
]

for element in fields:
    #print (element)
    for key, value in element.items():
        #print (key)
        #print (value)
        if (key == 'id') or (key == 'lang') or (key == 'referenceTag') or (key == 'status') or (key == 'revision') or (key == 'body'):
            content[key] = value
        else:
            data = input(key+': '+value)
            if data == '': content[key] = value
            else: content[key] = data


#print (content)

title: ! 
subtitle: ! 
author: ! 
abstract: ! 
type: ! # Options: [regulation|policy|standard|guidance|requirement|control|procedure] 
source: ! 
classification: public # Options: [public|private|confidential] 


### Set the S3 object params

In [10]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put

###########################################################
#set the file permissions:
#ACL='private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control',

#print (content['classification'])

string = re.sub('#.*','',content['classification']) #remove the comment block
if re.search('public', string, flags=re.IGNORECASE):
    put_ACL='public-read'
elif re.search('private', string, flags=re.IGNORECASE):
    put_ACL='authenticated-read'
else:
    put_ACL='private'

In [11]:
###########################################################
#set the file's metadata
put_Metadata = { 'classification': string }

In [12]:
###########################################################
#set the file's language

#print (content['lang'])

string = re.sub('#.*','',content['lang']) #remove the comment block
string = re.sub('\W','',string) #remove non-word chars
if string != '':
    put_ContentLanguage=string

In [17]:
###########################################################
#format the file body
#Note that S3 expects the body to be in binary format

from ruamel.yaml import YAML #[ruamel.yaml documentation](https://yaml.readthedocs.io/en/latest/index.html)
yaml = YAML()

yaml.version = (1, 2) #https://yaml.readthedocs.io/en/latest/detail.html#document-version-support
yaml.default_flow_style = False #https://yaml.readthedocs.io/en/latest/basicuse.html#basic-usage
yaml.indent(mapping=2, sequence=4, offset=2) #https://yaml.readthedocs.io/en/latest/detail.html#indentation-of-block-sequences
#yaml.top_level_colon_align = True #https://yaml.readthedocs.io/en/latest/detail.html#positioning-in-top-level-mappings-prefixing
yaml.explicit_start=True #guessed from: https://pyyaml.org/wiki/PyYAMLDocumentation
yaml.explicit_end=True #guessed from: https://pyyaml.org/wiki/PyYAMLDocumentation
yaml.sort_keys=False #guessed from: [sort_keys=False](https://stackoverflow.com/a/55171433/12400492)

######################
#ruamel.yaml doesnt output to a variable so we have to jump through some hoops here....
#https://stackoverflow.com/questions/1218933/can-i-redirect-the-stdout-in-python-into-some-sort-of-string-buffer

from io import StringIO

old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()

yaml.dump(content, sys.stdout)

sys.stdout = old_stdout

put_Body = mystdout.getvalue()

#print (put_Body)

######################
#insert the comments as actual comments

#t = ruamel.yaml.comments.CommentedMap()
t = yaml.load(mystdout.getvalue())


t['test'] = 'asdf'
t.yaml_add_eol_comment('Test Comment!', 'test', column=0)

#https://yaml.readthedocs.io/en/latest/detail.html#adding-replacing-comments
#data['xyz'].yaml_add_eol_comment('comment 6', 'e')
#t.yaml_add_eol_comment('Options: [regulation|policy|standard|guidance|requirement|control|procedure]','type')
t.yaml_add_eol_comment('QQQQQQ','id')

yaml.dump(t, sys.stdout)

%YAML 1.2
---
id: 6f80a72f-77aa-4969-a505-9348d62f2a5e  # QQQQQQ
title: '!'
subtitle: '!'
author: '!'
abstract: '!'
lang: en
type: '! # Options: [regulation|policy|standard|guidance|requirement|control|procedure]'
source: '!'
classification: 'public # Options: [public|private|confidential]'
referenceTag:
  keywords:
    - '! # This is a list (use `-`)'
  relationship: '! # `ID : [parent|peer|child]`'
status:
  status: 'draft # Options: [draft|review|complete|expire]'
  effective: '!'
  expire: '!'
revision:
  - date: '20191119'
    name: N/A
    reason: Initial creation
body: '!'
test: asdf # Test Comment!
...


### Write the file to S3

In [14]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
s3.Object(bucket_name, document['key_name']).put(
    ACL=put_ACL,
    ContentLanguage=put_ContentLanguage,
    Metadata=put_Metadata,
    Body=put_Body
)

{'ResponseMetadata': {'RequestId': 'F342F4C0DE2765B3',
  'HostId': 'gyPZwOYrxR+nzp9LYgwmGS/cZeNzUOBn+wx9wgmWFTBZALa97TO0fagOhSeTjwrSzBIeKDGtAKI=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'gyPZwOYrxR+nzp9LYgwmGS/cZeNzUOBn+wx9wgmWFTBZALa97TO0fagOhSeTjwrSzBIeKDGtAKI=',
   'x-amz-request-id': 'F342F4C0DE2765B3',
   'date': 'Wed, 20 Nov 2019 03:33:25 GMT',
   'etag': '"f3dff3d211ee28f05f090bc16676020f"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"f3dff3d211ee28f05f090bc16676020f"'}

## Update the database

The contents of `contents.yaml` as a python datastructure:

``` python
db=[
    {'id': 'string', 'asdf': 'qwerty', etc, etc},
    {'id': 'string', 'asdf': 'qwerty', etc, etc},
    {'id': 'string', 'asdf': 'qwerty', etc, etc}
]
```

### Format new record

In [None]:
#need to fetch the file's URL too

### Check for DB and create if needed

### Update DB with latest file info