# Purpose

This notebook will create a template document containing some basic info within S3

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

In [71]:
%run "Functions.ipynb"

In [70]:
%run "Functions_S3.ipynb"

# main program

## Connect to S3

In [32]:
import boto3
s3 = boto3.resource('s3')

In [33]:
#Note that the name of a bucket must be unique to all of S3 DNS namespace
#Names can only start with [a-z0-9] but may include [a-z0-9-_./]

#bucket_name = 'Documents' #this will fail
#bucket_name = create_unique_name('') #this is safer
#bucket_name = create_unique_name('documents'+'--'+str(uuid.uuid4())) #this is easier to directly work with

bucket_name = 'documents--88767106-9edc-4028-a451-0da43b669d7f' #hardcode this so it doesnt change
#bucket_name = 'testname.asyla.org'

print('bucket_name='+bucket_name)

bucket_name=documents--88767106-9edc-4028-a451-0da43b669d7f


## Create bucket if needed

In [34]:
if s3.Bucket(bucket_name).creation_date is None: #there is no date if it doesnt exist
    response = create_bucket(bucket_name, s3)
    #print(response)
    print ('bucket created')

## Create a new (yaml) file

### Create the name

In [35]:
import uuid
document = fileNamePart (str(uuid.uuid4())) #save all of the purmutations of the name
#document

document['dir']=37/2a/46/a8/25/37519f77-2ab3-4633-a8b1-2548c442ee73


### Ensure the file doesnt already exist

In [36]:
#make sure the file does not exist
#UUIDv4 generates random strings so there is a (small) chance of a duplicate
import sys

if does_key_exist(bucket_name, document['key_name']):
    #print ('ERROR: File exists: '+document['key_name'])
    sys.exit('ERROR: File exists: '+document['key_name'])

### Collect nessessary info

In [37]:
#Using YAML format for the document template because its what this will end up as and base Python structures dont play well with comments

from datetime import date

YAMLcontent = """
'id': """+document['base_name']+""" # UUIDv4
'title': '!'
'subtitle': '!'
'author': '!' # who wrote the document
'abstract': '!'
'lang': 'en'
'type': '!' # Options: [regulation|policy|standard|guidance|requirement|control|procedure]
'source': '!'
'classification': 'public' # Options: [public|private|confidential]
referenceTag:
  keywords:
    - '!'
  relationship: '!' # Format: `ID : [parent|peer|child]`
status: # this section is to provide some automatic documentation management
  status: draft # Options: [draft|review|complete|expire]
  effective: '!'
  expire: '!'
revision:
  - date: '"""+date.today().strftime('%Y%m%d')+"""'
    name: N/A
    reason: Initial template created
'body':  |
  # Blank template

  Put your text here.
"""

#print (YAMLcontent)

In [38]:
#prompt the user for input on certain fields

from ruamel.yaml import YAML #[ruamel.yaml documentation](https://yaml.readthedocs.io/en/latest/index.html)
yaml = YAML()

YAMLdata = yaml.load(YAMLcontent) #Convert the YAML text into an OrderdDictionary

#walk the data structure
for key, value in YAMLdata.items():
    #print (key, value)

    if (key == 'title') or (key == 'subtitle') or (key == 'author') or (key == 'abstract') or (key == 'type') or (key == 'source'):
        data = input(key+': '+value)
        if data != '': YAMLdata[key] = data

#print (YAMLdata)

title: ! 
subtitle: ! 
author: ! 
abstract: ! 
type: ! 
source: ! 


### Set the S3 object params

In [39]:
#set the file permissions:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
#ACL='private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control',

#print (content['classification'])

import re
if re.search('public', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='public-read'
elif re.search('private', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='authenticated-read'
else:
    put_ACL='private'

In [40]:
#set the file's metadata
put_Metadata = { 'classification': YAMLdata['classification'] }

### Write the file to S3

In [41]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
s3.Object(bucket_name, document['key_name']).put(
    ACL=put_ACL,
    ContentLanguage=YAMLdata['lang'],
    Metadata=put_Metadata,
    Body=yamlDump(YAMLdata)
)

{'ResponseMetadata': {'RequestId': 'F0F746F54CB7108D',
  'HostId': 'R9Ayq3gL+5sllZoKMRbELFysdJG3shwwBVB4dShV+raw+l5UkgvULUqD566slXdzj1ZNk+Az4gg=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'R9Ayq3gL+5sllZoKMRbELFysdJG3shwwBVB4dShV+raw+l5UkgvULUqD566slXdzj1ZNk+Az4gg=',
   'x-amz-request-id': 'F0F746F54CB7108D',
   'date': 'Mon, 25 Nov 2019 02:57:29 GMT',
   'etag': '"541ea8c2d02e7741c9d03b388a7ed554"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"541ea8c2d02e7741c9d03b388a7ed554"'}

## Compile document

In [74]:
#generate the html version of the document

import tempfile #https://docs.python.org/3/library/tempfile.html
tf = tempfile.NamedTemporaryFile() #create a temp file stored in memory or disk depending on size

results = compilePandocDocument (bucket_name, document['dir'], YAMLdata) #generate the Pandoc formatted document
#print (results)

tf.write(bytearray(results, encoding='utf-8')) #write the file in binary format

tf.seek(0) #goto the beginning of the file

srcFile = tf.name #This is the name of the file
dstFile = srcFile+".html"

#run Pandoc
import subprocess
subprocess.run([
    'pandoc', 
        srcFile, 
        '-s', 
        '--html-q-tags', 
        '-f', 'markdown+yaml_metadata_block+pandoc_title_block', 
        '-t', 'html5', 
        '-o', dstFile
])

tf.close() #close/delete the tempfile

In [75]:
f = open(dstFile, 'r')
print(f.read())
f.close()

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="generator" content="pandoc">
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
  <meta name="author" content="!">
  <title>!</title>
  <style type="text/css">code{white-space: pre;}</style>
  <!--[if lt IE 9]>
    <script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
  <![endif]-->
</head>
<body>
<header>
<h1 class="title">!</h1>
<p class="author">!</p>
</header>
<h1 id="blank-template">Blank template</h1>
<p>Put your text here.</p>
</body>
</html>



In [None]:
#save the file to S3



## Update the database

The `contents.yaml` will be structured as follows:

``` yaml
id1:
  record1
id2:
  record2
idN:
  recordN
```

Where 'id' is the ID of the file and 'record' contains all but 'body'

In [None]:
file_name='contents.yaml'

### Format new record

In [None]:
#we dont want to save these in the DB file
YAMLdata.pop('id',None)
YAMLdata.pop('body',None) 
YAMLdata

#### Fetch the object's URL

In [None]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-presigned-urls.html
#note that this is doing more then just fetching the URL
#this is promising but is incomplete:  https://stackoverflow.com/a/48197877/12400492

url = boto3.client('s3').generate_presigned_url('get_object',Params={'Bucket': bucket_name,'Key': document['key_name']},ExpiresIn=60*60*24)
#print (url)

YAMLdata['object_url'] = url.split('?')[0] #for now, just remove the extra stuff

print (YAMLdata['object_url'])

### Update the tracking DB with latest info

In [None]:
updateDatabase(bucket_name, file_name, document['base_name'], YAMLdata)