In [None]:
from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject

import pickle

from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle


In [2]:
def _getFields(obj, tree=None, retval=None, fileobj=None):
    """
    Extracts field data if this PDF contains interactive form fields.
    The *tree* and *retval* parameters are for recursive use.

    :param fileobj: A file object (usually a text file) to write
        a report to on all interactive form fields found.
    :return: A dictionary where each key is a field name, and each
        value is a :class:`Field<PyPDF2.generic.Field>` object. By
        default, the mapping name is used for keys.
    :rtype: dict, or ``None`` if form data could not be located.
    """
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval

In [3]:
def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

In [4]:
def set_need_appearances_writer(writer: PdfFileWriter):
    # See 12.7.2 and 7.7.2 for more information: http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    try:
        catalog = writer._root_object
        # get the AcroForm tree
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)
            })

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        # del writer._root_object["/AcroForm"]['NeedAppearances']
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

In [5]:
def update_form_values(infile, outfile, newvals=None):
    pdf = PdfFileReader(open(infile, 'rb'))
    writer = PdfFileWriter()
    
    writer = set_need_appearances_writer(writer)

    for i in range(pdf.getNumPages()):
        page = pdf.getPage(i)
        try:
            if newvals:
                writer.updatePageFormFieldValues(page, newvals)
            else:
                writer.updatePageFormFieldValues(page,
                                                 {k: f'#{i} {k}={v}'
                                                  for i, (k, v) in enumerate(get_form_fields(infile).items())
                                                  })
            writer.addPage(page)
        except Exception as e:
            print(repr(e))
            writer.addPage(page)

    with open(outfile, 'wb') as out:
        writer.write(out)

In [6]:
def set_need_appearances_writer(writer: PdfFileWriter):
    # See 12.7.2 and 7.7.2 for more information: http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    try:
        catalog = writer._root_object
        # get the AcroForm tree
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)
            })

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        # del writer._root_object["/AcroForm"]['NeedAppearances']
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

In [7]:
# pkl files and data structures
pkl_path = 'pkl\\'

# load the events data structure (if it exists)
events_fn = 'events.pkl'
# verify if the file exists
try:
    with open(pkl_path+events_fn,"rb") as events_in:
        events = pickle.load(events_in)
except FileNotFoundError:
    events = []
print ('There are {0} events!'.format(len(events)))
#print (events)

# load the articles data structure (if it exists)
articles_fn = 'articles.pkl'
# verify if the file exists
try:
    with open(pkl_path+articles_fn,"rb") as articles_in:
        articles = pickle.load(articles_in)
except FileNotFoundError:
    articles = []
print ('There are {0} articles!'.format(len(articles)))
#print (articles)

# load the authors data structure (if it exists)
authors_fn = 'authors.pkl'
# verify if the file exists
try:
    with open(pkl_path+authors_fn,"rb") as authors_in:
        authors = pickle.load(authors_in)
except FileNotFoundError:
    authors = []
print ('There are {0} authors!'.format(len(authors)))
#print (authors)

# load the articles/authors data structure (if it exists)
art_aut_fn = 'art_aut.pkl'
# verify if the file exists
try:
    with open(pkl_path+art_aut_fn,"rb") as art_aut_in:
        art_aut = pickle.load(art_aut_in)
except FileNotFoundError:
    art_aut = []
print ('There are {0} articles-authors relationships!'.format(len(art_aut)))
#print (art_aut)

# load the forms data structure (if it exists)
forms_fn = 'forms.pkl'
# verify if the file exists
try:
    with open(pkl_path+forms_fn,"rb") as forms_in:
        forms = pickle.load(forms_in)
except FileNotFoundError:
    forms = []
print ('There are {0} forms!'.format(len(forms)))


There are 5 events!
There are 162 articles!
There are 341 authors!
There are 506 articles-authors relationships!
There are 321 forms!


In [None]:

template_fileName = 'EG-CC-BY.pdf' 

#print(get_form_fields(template_fileName))

forms_path = 'forms\\'
    
styles=getSampleStyleSheet()
styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY))
styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER))

# enter the main loop
for form in  forms:    

    # get the corresponding author
    lic_author = {}
    for author in authors:
        if author['ID']==form['ID']:
            lic_author['name'] = author['name']
            lic_author['email'] = author['email']
            break
    
    # fill in the main form the corresponding author data
    FieldValuesDict = {}
    FieldValuesDict['Authors name'] = lic_author['name']
    FieldValuesDict['Print name'] = lic_author['name']
    FieldValuesDict['Authors address 1'] = lic_author['email']
    
    # verify whether there is a single article or multiple articles
    # the former requires only EG-CC-BY
    # the latter requires an appendix
    
    if len(form['articles'])==1:  # single article
        
        #print ('author with single article')

        # get article data
        artID = form['articles'][0] 
        for article in articles:  # locate the article
            if article['ID']==artID:
                FieldValuesDict['Title of article Article'] = article['title']
                FieldValuesDict['Manuscript no if known'] = artID
                break

        # get authors data
        ptext = ''
        for bridge in art_aut:
            if bridge[0]==artID:
                authID = bridge[1]  # get the current author
                for author in authors:
                    if author['ID']==authID:
                        ptext = ptext + author['name'] + '; '
                        break
        FieldValuesDict['Names of all authors in the order in which they appear in the Article 2'] = ptext

        # get event data
        for event in events:
            if article['event']==event['acr']:
                FieldValuesDict['Name of the EG Publication event name'] = event['name'] + ' (' + event['year'] +')'
                break

        # create the form
        update_form_values(template_fileName, forms_path + form['file'],
                           FieldValuesDict)  # update the form fields

    else:  # multiple articles
        
        num_articles = len(form['articles'])
        FieldValuesDict['Title of article Article'] = 'see {0} articles in appendix'.format(num_articles)
        FieldValuesDict['Name of the EG Publication event name'] = 'see appendix'
        FieldValuesDict['Manuscript no if known'] = form['ID']

        # create the form
        update_form_values(template_fileName, forms_path + form['file'],
                           FieldValuesDict)  # update the form fields

        
        # prepare the appendix
        appendix_FN = form['file']
        appendix_FN = appendix_FN.replace("-form.","-form-app.")
        appendix_FN = forms_path + appendix_FN
        doc = SimpleDocTemplate(appendix_FN,pagesize=A4,
                        rightMargin=72,leftMargin=72,
                        topMargin=72,bottomMargin=18)
        Story=[]

        # HEader
        ptext = '<font size="14">Appendix to EUROGRAPHICS: License for Publishing CC-BY</font>'
        Story.append(Paragraph(ptext, styles["Center"]))
        Story.append(Spacer(1, 24))
        
        ptext = '<font size="12">'
        ptext = ptext + "Authors' name: {0}".format(lic_author['name'])
        ptext = ptext + '</font>'
        Story.append(Paragraph(ptext, styles["Justify"]))
        Story.append(Spacer(1, 12))
        ptext = '<font size="12">'
        ptext = ptext + "Contact: "
        ptext = ptext + '</font><font size="10">'
        ptext = ptext + lic_author['email']
        ptext = ptext + '</font>'
        Story.append(Paragraph(ptext, styles["Justify"]))
        Story.append(Spacer(1, 12))
        ptext = '<font size="12">'
        ptext = ptext + "Signature:"
        ptext = ptext + '</font>'
        Story.append(Paragraph(ptext, styles["Justify"]))
        Story.append(Spacer(1, 48))
        ptext = '<font size="14">List of licensed articles</font>'
        Story.append(Paragraph(ptext, styles["Center"]))
        Story.append(Spacer(1, 24))
        
        #for each article in this form
        for artID in form['articles']:
            for article in articles:  # locate the article
                if article['ID']==artID:
                    ptext = '<font size="12">'
                    ptext = ptext + "Title: " + article['title']
                    ptext = ptext + '</font>'
                    Story.append(Paragraph(ptext, styles["Justify"]))
                    Story.append(Spacer(1, 6))
                    break
            # authors: there can be several
            ptext = '<font size="12">Authors: '
            for bridge in art_aut:
                if bridge[0]==artID:
                    authID = bridge[1]  # get the current author
                    for author in authors:
                        if author['ID']==authID:
                            ptext = ptext + author['name'] + '; '
                            break                        
            ptext = ptext + '</font>'
            Story.append(Paragraph(ptext, styles["Justify"]))
            Story.append(Spacer(1, 6))
            # event
            for event in events:
                if article['event']==event['acr']:
                    ptext = '<font size="12">'
                    ptext = ptext + "EG Publication (event): " + event['name']  + ' (' + event['year'] +')'
                    ptext = ptext + '</font>'
                    Story.append(Paragraph(ptext, styles["Justify"]))
                    break
            # separate articles
            Story.append(Spacer(1, 18))

        
        ##############
        # finish form
        Story.append(Spacer(1, 12))
        doc.build(Story)

    #pprint(get_form_fields(pdf_file_name))

    #update_form_values(pdf_file_name, 'out-' + pdf_file_name)  # enumerate & fill the fields with their own names
    


In [None]:
print ('That\'s all, folks!')