In [1]:
from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader


def _getFields(obj, tree=None, retval=None, fileobj=None):
    """
    Extracts field data if this PDF contains interactive form fields.
    The *tree* and *retval* parameters are for recursive use.

    :param fileobj: A file object (usually a text file) to write
        a report to on all interactive form fields found.
    :return: A dictionary where each key is a field name, and each
        value is a :class:`Field<PyPDF2.generic.Field>` object. By
        default, the mapping name is used for keys.
    :rtype: dict, or ``None`` if form data could not be located.
    """
    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',
                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval


def get_form_fields(infile):
    infile = PdfFileReader(open(infile, 'rb'))
    fields = _getFields(infile)
    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())

In [3]:
from pprint import pprint

pdf_file_name = '../test-forms/labor_2hprotokoll_ncov2019_d_draft.pdf'
labDict=get_form_fields(pdf_file_name)
pprint(labDict)

OrderedDict([('Nasenrachenabstrich', '/True'),
             ('Mundrachenabstrich', ''),
             ('Sputum', ''),
             ('andere', '/True'),
             ('PLZ', '3033'),
             ('Arzt Auftraggeber',
              b'Arzt\rHinterkappellen Ring 111\r3032 Hinterkappellen\rTel 030 3'
              b'33 33 33\rFax 030 444 44 44'),
             ('Meldendes Labor',
              b'Labor Dieci\rSchwarztorstrasse 123\r3001 Bern\rTel 030 555 55 5'
              b'5\rFax 030 666 66 66'),
             ('Bemerkungen', 'keine Bemerkungen'),
             ('Patient-in_Familenname', 'Cow'),
             ('Patient-in_Vorname', 'Holy'),
             ('Nachweis_Tag', '21'),
             ('Nachweis_Monat', '03'),
             ('Nachweis_Jahr', '2020'),
             ('Andere_Entnahme', 'Augensekret'),
             ('Endotracheale_Absaugung', '/True'),
             ('Broncho-alveolaere_Lavage', ''),
             ('Blut_Serologie', ''),
             ('Patient-in_Strasse_Hausnummer', 'Weidestra

In [4]:
pdf_file_name = '../test-forms/ncov2019_d_optimized_draft.pdf'
meldDict=get_form_fields(pdf_file_name)
pprint(meldDict)

OrderedDict([('Kanton', 'BE'),
             ('Telefonnummer', '030 865 41 07'),
             ('Fieber', '/True'),
             ('Husten', '/True'),
             ('Atembeschwerden', '/True'),
             ('Durchfall', '/True'),
             ('ARDS', '/True'),
             ('Pneumonie', '/True'),
             ('Geburtstag_Tag', '12'),
             ('Geburtstag_Monat', '03'),
             ('Geburtstag_Jahr', '1989'),
             ('Andere Symptome', '/True'),
             ('Keine_Symptome', ''),
             ('Diabetes', '/Yes'),
             ('Herz-Kreislauf', ''),
             ('Immunsuppression', '/Yes'),
             ('Hypertonie', ''),
             ('Atemwegserkrankung', ''),
             ('Keine', ''),
             ('Krebs', ''),
             ('Andere_Symptome_Text', 'Kopfschmerz'),
             ('Andere', '/Yes'),
             ('Referenzlabor', '/Yes'),
             ('Andere_Grunderkrankungen', 'HIV'),
             ('Manifestation_Tag', '18'),
             ('Manifestation_Monat', 