# Purpose

This notebook will create a template document containing some basic info within S3

# Discussion

this assumes that
* aws credentials have been previously setup
* Boto3 has been installed:  `pip3 install boto3`

# Functions

In [1]:
%run "Functions.ipynb"

In [2]:
%run "Functions_S3.ipynb"

# main program

In [3]:
#filename constants
document={}
document['bucket']='documents.asyla.org'
document['contentFile'] = 'content.yaml'
document['homepage'] = 'index.html'

## Connect to S3

In [4]:
import boto3
s3 = boto3.resource('s3')

## Create bucket if needed

In [5]:
if s3.Bucket(document['bucket']).creation_date is None: #there is no date if it doesnt exist
    response = create_bucket(document['bucket'], s3)
    #print(response)
    print ('bucket created')

## Create template doc if needed

In [6]:
YAMLtemplateName='template.yaml'

put_Body = """
'id': '!' # UUIDv4
'title': '!'
'subtitle': '!'
'author': '!' # who wrote the document
'abstract': '!'
'lang': '!'
'type': '!' # Options: [regulation|policy|standard|guidance|requirement|control|procedure|other]
'source': '!'
'classification': '!' # Options: [public|private|confidential]
referenceTag:
  keywords:
    - '!'
  relationship: '!' # Format: `ID : [parent|peer|child]`
status: # this section is to provide some automatic documentation management
  status: '!' # Options: [draft|review|complete|expire]
  effective: '!'
  expire: '!'
revision:
  - date: '!'
    name: '!'
    reason: '!'
'body':  '!'
"""

if not does_key_exist(document['bucket'], YAMLtemplateName):
    results = s3.Object(document['bucket'], YAMLtemplateName).put(
        ACL='bucket-owner-full-control', #the world doesnt really need to look at this
        Body=put_Body,
        ContentEncoding='utf-8', #its a normal ascii file
        ContentType='text/html' #declare its HTML so a browser will open it
    )

    #print (results)

## Create a new (yaml) file

### Create the key name

In [7]:
#UUIDv4 generates random strings so there is a (small) chance of a duplicate

while True: #keep trying new names until we get a unique one
    import uuid
    names = fileNamePart (str(uuid.uuid4())) #save all of the purmutations of the name

    if not does_key_exist(document['bucket'], names['key_name']): #make sure it doesnt exist
        document.update(names)
        break #doesnt exist: quit looping
        
#generate the homepage full key (dir + homepage filename)
document['documentHomepage']=document['dir']+document['homepage'] #this is the keyname the document will be compiled into

#document

### Collect nessessary info

In [8]:
#fetch the current template

YAMLcontent = s3.Object(document['bucket'], YAMLtemplateName).get()['Body'].read().decode('utf-8') 

from ruamel.yaml import YAML #[ruamel.yaml documentation](https://yaml.readthedocs.io/en/latest/index.html)
yaml = YAML()
YAMLdata = yaml.load(YAMLcontent) #read the YAML file content into an OrderdDictionary

YAMLdata['id'] = document['base_name']
YAMLdata['lang'] = 'en'
YAMLdata['type'] = 'other'
YAMLdata['classification'] = 'public'
YAMLdata['status']['status'] = 'draft'

from datetime import date
YAMLdata['revision'][0]['date'] = date.today().strftime('%Y%m%d')
YAMLdata['revision'][0]['name'] = 'system'
YAMLdata['revision'][0]['reason'] = 'generated from template'

#print (yamlDump(YAMLdata))

In [9]:
#prompt the user for input on certain fields

#walk the data structure
for key, value in YAMLdata.items():
    #print (key, value)

    if (key == 'title') or (key == 'subtitle') or (key == 'author') or (key == 'abstract') or (key == 'type') or (key == 'source'):
        data = input(key+': '+value)
        if data != '': YAMLdata[key] = data

#print(yamlDump(YAMLdata))

title: ! wqeav
subtitle: ! 
author: ! 
abstract: ! 
type: other 
source: ! 


### Auto-set the S3 object params

In [10]:
#set the file permissions:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
#ACL='private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control',

#print (content['classification'])

import re
if re.search('public', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='public-read'
elif re.search('private', YAMLdata['classification'], flags=re.IGNORECASE):
    put_ACL='authenticated-read'
else:
    put_ACL='private'

In [11]:
#set the file's metadata
put_Metadata = { 'classification': YAMLdata['classification'] }

### Write the source file to S3

In [12]:
#https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.put
results = s3.Object(document['bucket'], document['key_name']).put(
    ACL=put_ACL,
    Body=yamlDump(YAMLdata),
    ContentEncoding='utf-8',
    ContentLanguage=YAMLdata['lang'],
    ContentType='text/plain',
    Metadata=put_Metadata
)

#print (results)

## Compile document

In [13]:
#generate the html version of the document
results = compilePandocDocument (document['bucket'], document['dir'], YAMLdata) #generate the Pandoc formatted document
put_Body = bytearray(convertPandoc2Html (results), encoding='utf-8') #convert from Pandoc to HTML and save the results in bin format

#print (convertPandoc2Html (results))

#save the file to S3
results = s3.Object(document['bucket'], document['documentHomepage']).put(
    ACL='public-read', #this needs fixed to match the source doc's permissions
    Body=put_Body,
    ContentEncoding='utf-8', #its a normal ascii file
    ContentType='text/html' #declare its HTML so the browser will open it
)

#print (results)

Null body


## Update the database

The `contents.yaml` will be structured as follows:

``` yaml
id1:
  record1
id2:
  record2
idN:
  recordN
```

Where 'id' is the ID of the file and 'record' contains all but 'body'

### Format new record

In [14]:
#we dont want to save these in the DB file
#Note: .pop() also returns the value that (was) in there
YAMLdata.pop('id',None)
YAMLdata.pop('body',None)

#add in the additional info that has been generated
YAMLdata['url']={}
YAMLdata['url']['sourceYaml'] = getKeyURL(document['bucket'], document['key_name']) #fetch the URL of the source YAML file
YAMLdata['url']['documentHomepage'] = getKeyURL(document['bucket'], document['documentHomepage']) #fetch the URL of the dir's main page

print (YAMLdata['url']['sourceYaml'])
print (YAMLdata['url']['documentHomepage'])
#print(yamlDump(YAMLdata))

https://s3.amazonaws.com/documents.asyla.org/ac/fb/4b/bc/db/acd938f8-fbe3-4bda-bc6e-dbef0b590f17.ymal
https://s3.amazonaws.com/documents.asyla.org/ac/fb/4b/bc/db/acd938f8-fbe3-4bda-bc6e-dbef0b590f17index.html


### Update the tracking DB with latest info

In [15]:
results = updateDatabase(document['bucket'], document['contentFile'], document['base_name'], YAMLdata)
#print (results)

## Update the Table of Contents

In [16]:
from ruamel.yaml import YAML #[ruamel.yaml documentation](https://yaml.readthedocs.io/en/latest/index.html)
yaml = YAML()

dbContent = {} #create an empty dictionary

if does_key_exist(document['bucket'], document['contentFile']):
    #fetch the DB and return it as a string
    #https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Object.get
    #https://stackoverflow.com/questions/31976273/open-s3-object-as-a-string-with-boto3#35376156
    fileBody = s3.Object(document['bucket'], document['contentFile']).get()['Body'].read().decode('utf-8') 
    #print (fileBody)

    dbContent = yaml.load(fileBody) #read the file into an OrderdDictionary

In [17]:
output="""
---
'title': Table of Contents
'lang': en
...

"""

for key in dbContent:
    output += '* ['+dbContent[key]['title']+']('+dbContent[key]['url']['documentHomepage']+') '
    output += '[[yaml]('+dbContent[key]['url']['sourceYaml']+')]\n'

print (output)


---
'title': Table of Contents
'lang': en
...

* [dsfgadfsg](https://s3.amazonaws.com/documents.asyla.org/18/48/4a/8c/c6/18276360-48e8-4a4e-8c46-c6aa15f68a67index.html) [[yaml](https://s3.amazonaws.com/documents.asyla.org/18/48/4a/8c/c6/18276360-48e8-4a4e-8c46-c6aa15f68a67.ymal)]
* [lkansdxc](https://s3.amazonaws.com/documents.asyla.org/0e/3e/48/bc/fc/0ece5805-3e7a-4829-bc74-fcb9b3a9e9a0index.html) [[yaml](https://s3.amazonaws.com/documents.asyla.org/0e/3e/48/bc/fc/0ece5805-3e7a-4829-bc74-fcb9b3a9e9a0.ymal)]
* [wqeav](https://s3.amazonaws.com/documents.asyla.org/ac/fb/4b/bc/db/acd938f8-fbe3-4bda-bc6e-dbef0b590f17index.html) [[yaml](https://s3.amazonaws.com/documents.asyla.org/ac/fb/4b/bc/db/acd938f8-fbe3-4bda-bc6e-dbef0b590f17.ymal)]



In [18]:
#generate the html version of the document
put_Body = bytearray(convertPandoc2Html (output), encoding='utf-8') #convert from Pandoc to HTML and save the results in bin format

#print (convertPandoc2Html (output))

#save the file to S3
results = s3.Object(document['bucket'], document['homepage']).put(
    ACL='public-read', #make this viewable by everyone
    Body=put_Body,
    ContentEncoding='utf-8', #its a normal ascii file
    ContentType='text/html' #declare its HTML so the browser will open it
)

#print (results)

In [19]:
print (getKeyURL(document['bucket'], document['homepage']))

https://s3.amazonaws.com/documents.asyla.org/index.html
