In [1]:
from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject

import csv


In [2]:
def _getFields(obj, tree=None, retval=None, fileobj=None):
    """
    Extracts field data if this PDF contains interactive form fields.
    The *tree* and *retval* parameters are for recursive use.

    :param fileobj: A file object (usually a text file) to write
        a report to on all interactive form fields found.
    :return: A dictionary where each key is a field name, and each
        value is a :class:`Field<PyPDF2.generic.Field>` object. By
        default, the mapping name is used for keys.
    :rtype: dict, or ``None`` if form data could not be located.
    """
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval

In [3]:
def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

In [4]:
def set_need_appearances_writer(writer: PdfFileWriter):
    # See 12.7.2 and 7.7.2 for more information: http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    try:
        catalog = writer._root_object
        # get the AcroForm tree
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)
            })

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        # del writer._root_object["/AcroForm"]['NeedAppearances']
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

In [5]:
def update_form_values(infile, outfile, newvals=None):
    pdf = PdfFileReader(open(infile, 'rb'))
    writer = PdfFileWriter()
    
    writer = set_need_appearances_writer(writer)

    for i in range(pdf.getNumPages()):
        page = pdf.getPage(i)
        try:
            if newvals:
                writer.updatePageFormFieldValues(page, newvals)
            else:
                writer.updatePageFormFieldValues(page,
                                                 {k: f'#{i} {k}={v}'
                                                  for i, (k, v) in enumerate(get_form_fields(infile).items())
                                                  })
            writer.addPage(page)
        except Exception as e:
            print(repr(e))
            writer.addPage(page)

    with open(outfile, 'wb') as out:
        writer.write(out)

In [6]:
def set_need_appearances_writer(writer: PdfFileWriter):
    # See 12.7.2 and 7.7.2 for more information: http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    try:
        catalog = writer._root_object
        # get the AcroForm tree
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)
            })

        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        # del writer._root_object["/AcroForm"]['NeedAppearances']
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

In [7]:
if __name__ == '__main__':
    from pprint import pprint

    path = "2015\\"

    csv_fileName = '2015EPCGI' 

    pdf_fileName = 'EG-CC-BY' 

    pdf_path = path + pdf_fileName + '.pdf'
    csv_path = path + csv_fileName + '.csv'
    
    # enter the main loop
    with open(csv_path,"rt") as csv_in:
    
        # read one row at a time
        for rownum, row in enumerate(csv.reader(csv_in, delimiter=";")):
            #print (row)
    
            #print (rownum, row)
            # use row 0 (which holds the columns titles) to
            # identify the columns containing the relavant fields
            if rownum==0: 
                articleID_col = row.index ('ArticleID')
                type_col = row.index ('Type')
                title_col = row.index ('Title')
                authors_col = []
                au_n = 1
                finish = False
                while not finish:
                    LN = 'Author' + str(au_n) + 'LastName'
                    FN = 'Author' + str(au_n) + 'FirstName'
                    try:
                        rowLN_index = row.index (LN)
                        #print ('au_ndx={0}, rowLN_index={1}'.format(au_n,rowLN_index))
                        authors_col.append((rowLN_index, row.index (FN)))
                        au_n += 1
                    except ValueError:
                        finish = True
                #author1lastname_col = row.index ('Author1LastName')
                #author1firstname_col = row.index ('Author1FirstName')
                #author2lastname_col = row.index ('Author2LastName')
                #author2firstname_col = row.index ('Author2FirstName')
                #author3lastname_col = row.index ('Author3LastName')
                #author3firstname_col = row.index ('Author3FirstName')
                #author4lastname_col = row.index ('Author4LastName')
                #author4firstname_col = row.index ('Author4FirstName')
                event_col = row.index ('Event')            
                output_fileName_col = row.index ('LicenseFile')
            
            else:
                try:
                    articleID_int = int(row[articleID_col])
                except ValueError:
                    articleID_int = 0
                #print ('ArticleID=',articleID_int)
                if row[type_col]  =='Article':
                    #if articleID_int==28:
                    articleID = row[articleID_col]
                    authors = []
                    for au in authors_col:
                        LN_col = au[0]
                        FN_col = au[1]
                        if row[LN_col]=="": break
                        authors.append((row[LN_col],row[FN_col]))
                    title = row[title_col]
                    event = row[event_col]
                    output_fileName = path+row[output_fileName_col]
                    print ('{0} - {1} : {2}'.format(articleID, row[type_col], title))

                    FieldValuesDict = {}
                    #FieldValuesDict['Authors name'] = au1LN + ', ' + au1FN
                    FieldValuesDict['Title of article Article'] = title
                    FieldValuesDict['Manuscript no if known'] = articleID
                    FieldValuesDict['Names of all authors in the order in which they appear in the Article 1'] = authors[0][0] + ', ' + authors[0][1] 
                    auSTR = ""
                    for au in authors[1:]:
                        auSTR = auSTR + au[0] + ', ' + au[1] + '; '
                    FieldValuesDict['Names of all authors in the order in which they appear in the Article 2'] = auSTR
                    FieldValuesDict['Name of the EG Publication event name'] = event
                    update_form_values(pdf_path, output_fileName, FieldValuesDict)  # update the form fields

    #pprint(get_form_fields(pdf_file_name))

    #update_form_values(pdf_file_name, 'out-' + pdf_file_name)  # enumerate & fill the fields with their own names


7 - Article : Escultura Présence: um sistema tridimensional de interação sonoro-corporal
8 - Article : Design de Funções Transferência para Imagens Médicas 3D recorrendo a uma Interface baseada em Esboços
9 - Article : Enabling low-complexity devices for interaction with 3D media content via Android API
10 - Article : Acesso Multimodal em Dispositivos Móveis a Vídeo Georeferenciado através da Forma, Velocidade e Tempo 
11 - Article : Visualização tri-dimensional interativa de informação demográ?ca mundial
13 - Article : How to Deal with Motion Sickness in Virtual Reality
14 - Article : Incremental Reconstruction Approach for Telepresence or AR Applications
15 - Article : Modelo de descrição de experiências  multissensoriais multiutilizador
16 - Article : Interactive con?gurable virtual environment with Kinect navigation and interaction
17 - Article : Visualização de Objetos Off-Screen em Realidade Aumentada Móvel 
19 - Article : Visualização de Fluxos Migratórios: Os Resultados das Can

In [8]:
print ('That\'s all, folks!')

That's all, folks!
