# Purpose

This notebook will create a template document containing some basic info within S3

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

In [22]:
%run "Functions.ipynb"

In [20]:
%run "Functions_S3.ipynb"

# main program

## Connect to S3

In [3]:
import boto3
s3 = boto3.resource('s3')

In [4]:
#Note that the name of a bucket must be unique to all of S3 DNS namespace
#Names can only start with [a-z0-9] but may include [a-z0-9-_./]

#bucket_name = 'Documents' #this will fail
#bucket_name = create_unique_name('') #this is safer
#bucket_name = create_unique_name('documents'+'--'+str(uuid.uuid4())) #this is easier to directly work with

bucket_name = 'documents--88767106-9edc-4028-a451-0da43b669d7f' #hardcode this so it doesnt change
#bucket_name = 'testname.asyla.org'

print('bucket_name='+bucket_name)

bucket_name=documents--88767106-9edc-4028-a451-0da43b669d7f


## Create bucket if needed

In [5]:
if s3.Bucket(bucket_name).creation_date is None: #there is no date if it doesnt exist
    response = create_bucket(bucket_name, s3)
    #print(response)
    print ('bucket created')

## Create a new (yaml) file

### Create the name

In [6]:
import uuid
document = fileNamePart (str(uuid.uuid4())) #save all of the purmutations of the name
#document

### Ensure the file doesnt already exist

In [7]:
#make sure the file does not exist
#UUIDv4 generates random strings so there is a (small) chance of a duplicate
import sys

if does_key_exist(bucket_name, document['key_name']):
    #print ('ERROR: File exists: '+document['key_name'])
    sys.exit('ERROR: File exists: '+document['key_name'])

### Collect nessessary info

In [8]:
#Using YAML format for the document template because its what this will end up as and base Python structures dont play well with comments

from datetime import date

YAMLcontent = """
'id': """+document['base_name']+""" # UUIDv4
'title': '!'
'subtitle': '!'
'author': '!' # who wrote the document
'abstract': '!'
'lang': 'en'
'type': '!' # Options: [regulation|policy|standard|guidance|requirement|control|procedure]
'source': '!'
'classification': 'public' # Options: [public|private|confidential]
referenceTag:
  keywords:
    - '!'
  relationship: '!' # Format: `ID : [parent|peer|child]`
status: # this section is to provide some automatic documentation management
  status: draft # Options: [draft|review|complete|expire]
  effective: '!'
  expire: '!'
revision:
  - date: '"""+date.today().strftime('%Y%m%d')+"""'
    name: N/A
    reason: Initial template created
'body':  |
  # Blank template

  Put your text here.
"""

#print (YAMLcontent)

In [9]:
#prompt the user for input on certain fields

from ruamel.yaml import YAML #[ruamel.yaml documentation](https://yaml.readthedocs.io/en/latest/index.html)
yaml = YAML()

YAMLdata = yaml.load(YAMLcontent) #Convert the YAML text into an OrderdDictionary

#walk the data structure
for key, value in YAMLdata.items():
    #print (key, value)

    if (key == 'title') or (key == 'subtitle') or (key == 'author') or (key == 'abstract') or (key == 'type') or (key == 'source'):
        data = input(key+': '+value)
        if data != '': YAMLdata[key] = data

#print (YAMLdata)

title: ! 
subtitle: ! 
author: ! 
abstract: ! 
type: ! 
source: ! 


### Set the S3 object params

In [11]:
#set the file permissions:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
#ACL='private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control',

#print (content['classification'])

import re
if re.search('public', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='public-read'
elif re.search('private', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='authenticated-read'
else:
    put_ACL='private'

In [12]:
#set the file's metadata
put_Metadata = { 'classification': YAMLdata['classification'] }

### Write the file to S3

In [13]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
s3.Object(bucket_name, document['key_name']).put(
    ACL=put_ACL,
    ContentLanguage=YAMLdata['lang'],
    Metadata=put_Metadata,
    Body=yamlDump(YAMLdata)
)

{'ResponseMetadata': {'RequestId': '72CD2D496211AAAE',
  'HostId': '1g32Uv2VgmZb0e/pjZ4jIAJGP78QmLoo/kqaubY/fcelMCNLOCjDrcaH0yQPlAG3Vti/e5e091Y=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '1g32Uv2VgmZb0e/pjZ4jIAJGP78QmLoo/kqaubY/fcelMCNLOCjDrcaH0yQPlAG3Vti/e5e091Y=',
   'x-amz-request-id': '72CD2D496211AAAE',
   'date': 'Sat, 23 Nov 2019 02:54:42 GMT',
   'etag': '"64d140d0a85a25198f7fff1c06d1310b"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"64d140d0a85a25198f7fff1c06d1310b"'}

## Compile document

In [14]:
#print (yamlDump(YAMLdata))

print (createPandocHeader(YAMLdata))

%YAML 1.2
---
title: '!'
author: '!'
abstract: '!'
lang: en
...



In [21]:
#fileNamePart(YAMLdata['id'])['key_name']
fileNamePart(YAMLdata['id'])

document['dir']=87/0e/4f/84/48/8765b6d5-0e32-4f73-844a-4884afc9aebc


{'base_name': '8765b6d5-0e32-4f73-844a-4884afc9aebc',
 'file_prefix': '87/0e/4f/84/48/',
 'dir': '87/0e/4f/84/48/8765b6d5-0e32-4f73-844a-4884afc9aebc',
 'file_suffix': '.ymal',
 'file_name': '8765b6d5-0e32-4f73-844a-4884afc9aebc.ymal',
 'key_name': '87/0e/4f/84/48/8765b6d5-0e32-4f73-844a-4884afc9aebc.ymal'}

In [28]:

def createMarkDownDocument (srcData, depth):
    document=''

    if isinstance(srcData['body'], list):
        print ('List detected')
        
        for element in srcData:
            
            if isinstance(element, list) or isinstance(element, dict):
                print ('Structure detected - recursing')
                results += generateDocumentBody (element, depth+1) #walk the next level down and increase the header depth
                
            elif re.search('.md$', element, flags=re.IGNORECASE):
                print ('MarkDown: ' + element)

                #read in the content
                %run "Functions_S3.ipynb" # `downloadTextFile()` is located here
                results = downloadTextFile (bucket_name, fileNamePart(YAMLdata['id'])['dir']+'/'+element)
                
            elif re.search('.yaml$', element, flags=re.IGNORECASE):
                print ('YAML: ' + element)
                #process the file
                
            elif is_valid_uuid(element):
                print ('UUIDv4: ' + element)
                #go find the file and then process the file
                
            else: #No idea what this is. Assuming its MD text
                print ('unknown: ' + element)
                results += element #assuming that if you are manually entering MD then you can control the formatting too
                
            document += '\n\n'
                  
    elif isinstance(srcData['body'], dict):
        print ('Dict detected')
        
    else:
        #print ('markdown:  no structure')
        document += srcData['body']
        
    return (document)

In [29]:
createMarkDownDocument (YAMLdata, 0)

'# Blank template\n\nPut your text here.\n'

## Update the database

The `contents.yaml` will be structured as follows:

``` yaml
id1:
  record1
id2:
  record2
idN:
  recordN
```

Where 'id' is the ID of the file and 'record' contains all but 'body'

In [None]:
file_name='contents.yaml'

### Format new record

In [None]:
#we dont want to save these in the DB file
YAMLdata.pop('id',None)
YAMLdata.pop('body',None) 
YAMLdata

#### Fetch the object's URL

In [None]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-presigned-urls.html
#note that this is doing more then just fetching the URL
#this is promising but is incomplete:  https://stackoverflow.com/a/48197877/12400492

url = boto3.client('s3').generate_presigned_url('get_object',Params={'Bucket': bucket_name,'Key': document['key_name']},ExpiresIn=60*60*24)
#print (url)

YAMLdata['object_url'] = url.split('?')[0] #for now, just remove the extra stuff

print (YAMLdata['object_url'])

### Update the tracking DB with latest info

In [None]:
updateDatabase(bucket_name, file_name, document['base_name'], YAMLdata)