In [544]:
import pdfrw


def _text_form(annotation, value):
    pdfstr = pdfrw.objects.pdfstring.PdfString.encode(value)
    annotation.update(pdfrw.PdfDict(V=pdfstr, AS=pdfstr))


def _checkbox(annotation, value, export=None):
    if export:
        export = '/' + export
    else:
        keys = annotation['/AP']['/N'].keys()
        if ['/Off'] in keys:
            keys.remove('/Off')
        export = keys[0]
    if value:
        annotation.update(pdfrw.PdfDict(V=export, AS=export))
    else:
        if '/V' in annotation:
            del annotation['/V']
        if '/AS' in annotation:
            del annotation['/AS']


def _radio_button(annotation, value):
    for each in annotation['/Kids']:
        # determine the export value of each kid
        keys = each['/AP']['/N'].keys()
        if ['/Off'] in keys:
            keys.remove('/Off')
        export = keys[0]

        if f'/{value}' == export:
            val_str = pdfrw.objects.pdfname.BasePdfName(f'/{value}')
        else:
            val_str = pdfrw.objects.pdfname.BasePdfName(f'/Off')
        each.update(pdfrw.PdfDict(AS=val_str))

    annotation.update(pdfrw.PdfDict(V=pdfrw.objects.pdfname.BasePdfName(f'/{value}')))


def _combobox(annotation, value):
    export = None
    for each in annotation['/Opt']:
        try:
            if each[1].to_unicode() == value:
                export = each[0].to_unicode()
        except:
            if each[1].encode('utf-8') == value:
                try:
                    export = each[0].to_unicode()
                except:
                    export = each[0].encode('utf-8')
    if export is None:
        raise KeyError(f"Export Value: {value} Not Found")
    pdfstr = pdfrw.objects.pdfstring.PdfString.encode(export)
    annotation.update(pdfrw.PdfDict(V=pdfstr, AS=pdfstr))


def _listbox(annotation, values):
    pdfstrs = []
    for value in values:
        export = None
        for each in annotation['/Opt']:
            if each[1].to_unicode() == value:
                export = each[0].to_unicode()
        if export is None:
            raise KeyError(f"Export Value: {value} Not Found")
        pdfstrs.append(pdfrw.objects.pdfstring.PdfString.encode(export))
    annotation.update(pdfrw.PdfDict(V=pdfstrs, AS=pdfstrs))


def _field_type(annotation):
    ft = annotation['/FT']
    ff = annotation['/Ff']

    if ft == '/Tx':
        return 'text'
    if ft == '/Ch':
        if ff and int(ff) & 1 << 17:  # test 18th bit
            return 'combo'
        else:
            return 'list'
    if ft == '/Btn':
        if ff and int(ff) & 1 << 15:  # test 16th bit
            return 'radio'
        else:
            return 'checkbox'


def _blank_page(w, h):
    blank = pdfrw.PageMerge()
    blank.mbox = [0, 0, w * 72, h * 72]
    blank = blank.render()
    return blank

def pdf_form_info(in_pdf):
    info = []
    for page in in_pdf.pages:
        annotations = page['/Annots']
        if annotations is None:
            continue
        for annotation in annotations:
            choices=None
            if annotation['/Subtype'] == '/Widget':
                if not annotation['/T']:
                    annotation = annotation['/Parent']
                key = annotation['/T'].to_unicode()
                ft = _field_type(annotation)
                value = annotation['/V']
                if ft =='radio':
                    try:
                        value = value[1:]
                        choices =[]
                        for each in annotation['/Kids']:
                            keys = each['/AP']['/N'].keys()
                            if not keys[0][1:] in choices:
                                choices.append(keys[0][1:])
                    except:
                        pass
                elif ft == 'list' or ft=='combo':
                    try:
                        choices = [each[1].to_unicode() for each in annotation['/Opt']]
                    except:
                        choices = [each[1].encode('utf-8') for each in annotation['/Opt']]
                    values=[]
                    for each in annotation['/Opt']:
                        if each[0] in value:
                            try:
                                values.append(each[1].to_unicode())
                            except:
                                values.append(each[1])
                    value=values
                else:
                    if value:
                        try:
                            value=value.to_unicode()
                        except:
                            value=value
                out = dict(name=key, type=ft)
                if value:
                    out['value']=value
                if choices:
                    out['choices']=choices
                info.append(out)
    return info


def fill_form(in_pdf, data, suffix=None):
    fillers = {'checkbox': _checkbox,
               'list': _listbox,
               'text': _text_form,
               'combo': _combobox,
               'radio': _radio_button}
    for page in in_pdf.pages:
        annotations = page['/Annots']
        if annotations is None:
            continue
        for annotation in annotations:

            if annotation['/Subtype'] == '/Widget':
                if not annotation['/T']:
                    annotation=annotation['/Parent']
                key = annotation['/T'].to_unicode()
                if key in data:
                    ft = _field_type(annotation)
                    fillers[ft](annotation, data[key])
                    if suffix:
                        new_T=pdfrw.objects.pdfstring.PdfString.encode(key+suffix)
                        annotation.update(pdfrw.PdfDict(T=new_T))
        in_pdf.Root.AcroForm.update(
            pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
    return in_pdf


def single_form_fill(in_file, data, out_file):
    pdf = pdfrw.PdfReader(in_file)
    out_pdf = fill_form(pdf, data)
    pdfrw.PdfWriter().write(out_file, out_pdf)


def batch_form_fill(template, data, out_file, double_sided=False, splice=-1):
    # deepcopy does not work on PdfDict so we keep the raw inputs
    # so we can may copies of the Pdf structures by reparsing the
    # raw data

    with open(template, 'rb') as f:
        pdf_data = f.read()
    writer = pdfrw.PdfWriter()
    for idx, record in enumerate(data):
        pdf = pdfrw.PdfReader(fdata=pdf_data)  # Create a new instance each time
        out_pdf = fill_form(pdf, record, f'{-idx:04d}')
        if len(out_pdf.pages) % 2 == 1 and double_sided:
            if splice != 0:
                writer.addpages(out_pdf.pages[0:splice])
            writer.addpage(_blank_page(8.5, 11))
            if splice != -1:
                writer.addpages(out_pdf.pages[splice:])
        else:
            writer.addpages(out_pdf.pages)
    writer.trailer.Root.AcroForm = pdfrw.PdfReader(fdata=pdf_data).Root.AcroForm
    writer.trailer.Root.AcroForm.update(pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
    writer.write(out_file)

In [884]:
pdf_form_info(pdfrw.PdfReader('sample_pdf.pdf'))

[{'name': 'Name', 'type': 'text'},
 {'name': 'Dropdown2',
  'type': 'combo',
  'value': ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'],
  'choices': [b'J',
   b'F',
   b'M',
   b'A',
   b'M',
   b'J',
   b'J',
   b'A',
   b'S',
   b'O',
   b'N',
   b'D']},
 {'name': 'Dropdown1',
  'type': 'combo',
  'value': ['1',
   '2',
   '3',
   '4',
   '5',
   '6',
   '7',
   '8',
   '9',
   '1',
   '1',
   '1',
   '1',
   '1',
   '1',
   '1',
   '1',
   '1',
   '1',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '3',
   '3'],
  'choices': [b'1',
   b'2',
   b'3',
   b'4',
   b'5',
   b'6',
   b'7',
   b'8',
   b'9',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'1',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'2',
   b'3',
   b'3']},
 {'name': 'Dropdown3',
  'type': 'combo',
  'value': ['2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2',
   '2'

In [62]:
## The Colors List Box should change from Yellow/Green to Red/Blue
import os
import json
with open('basic-data.json') as f:
    data = json.load(f)
    data['Dropdown2']=b'A'
single_form_fill('sample_pdf.pdf', data,
                 'newSamp.pdf')

In [64]:
s = "p"

In [66]:
s.decode('utf-8')

AttributeError: 'str' object has no attribute 'decode'

In [1]:
import pdfrw


def _text_form(annotation, value):
    pdfstr = pdfrw.objects.pdfstring.PdfString.encode(value)
    annotation.update(pdfrw.PdfDict(V=pdfstr, AS=pdfstr))


def _checkbox(annotation, value, export=None):
    if export:
        export = '/' + export
    else:
        keys = annotation['/AP']['/N'].keys()
        if ['/Off'] in keys:
            keys.remove('/Off')
        export = keys[0]
    if value:
        annotation.update(pdfrw.PdfDict(V=export, AS=export))
    else:
        if '/V' in annotation:
            del annotation['/V']
        if '/AS' in annotation:
            del annotation['/AS']


def _radio_button(annotation, value):
    for each in annotation['/Kids']:
        # determine the export value of each kid
        keys = each['/AP']['/N'].keys()
        if ['/Off'] in keys:
            keys.remove('/Off')
        export = keys[0]

        if f'/{value}' == export:
            val_str = pdfrw.objects.pdfname.BasePdfName(f'/{value}')
        else:
            val_str = pdfrw.objects.pdfname.BasePdfName(f'/Off')
        each.update(pdfrw.PdfDict(AS=val_str))

    annotation.update(pdfrw.PdfDict(V=pdfrw.objects.pdfname.BasePdfName(f'/{value}')))


def _combobox(annotation, value):
    export = None
    for each in annotation['/Opt']:
        if each[1].to_unicode() == value:
            export = each[0].to_unicode()
    if export is None:
        raise KeyError(f"Export Value: {value} Not Found")
    pdfstr = pdfrw.objects.pdfstring.PdfString.encode(export)
    annotation.update(pdfrw.PdfDict(V=pdfstr, AS=pdfstr))


def _listbox(annotation, values):
    pdfstrs = []
    for value in values:
        export = None
        for each in annotation['/Opt']:
            if each[1].to_unicode() == value:
                export = each[0].to_unicode()
        if export is None:
            raise KeyError(f"Export Value: {value} Not Found")
        pdfstrs.append(pdfrw.objects.pdfstring.PdfString.encode(export))
    annotation.update(pdfrw.PdfDict(V=pdfstrs, AS=pdfstrs))


def _field_type(annotation):
    ft = annotation['/FT']
    ff = annotation['/Ff']

    if ft == '/Tx':
        return 'text'
    if ft == '/Ch':
        if ff and int(ff) & 1 << 17:  # test 18th bit
            return 'combo'
        else:
            return 'list'
    if ft == '/Btn':
        if ff and int(ff) & 1 << 15:  # test 16th bit
            return 'radio'
        else:
            return 'checkbox'


def _blank_page(w, h):
    blank = pdfrw.PageMerge()
    blank.mbox = [0, 0, w * 72, h * 72]
    blank = blank.render()
    return blank

def pdf_form_info(in_pdf):
    info = []
    for page in in_pdf.pages:
        annotations = page['/Annots']
        if annotations is None:
            continue
        for annotation in annotations:
            choices=None
            if annotation['/Subtype'] == '/Widget':
                if not annotation['/T']:
                    annotation = annotation['/Parent']
                key = annotation['/T'].to_unicode()
                ft = _field_type(annotation)
                value = annotation['/V']
                if ft =='radio':
                    value = value[1:]
                    choices =[]
                    for each in annotation['/Kids']:
                        keys = each['/AP']['/N'].keys()
                        if not keys[0][1:] in choices:
                            choices.append(keys[0][1:])
                elif ft == 'list' or ft=='combo':
                    choices = [each[1].to_unicode() for each in annotation['/Opt']]
                    values=[]
                    for each in annotation['/Opt']:
                        if each[0] in value:
                            values.append(each[1].to_unicode())
                    value=values
                else:
                    if value:
                        value=value.to_unicode()
                out = dict(name=key, type=ft)
                if value:
                    out['value']=value
                if choices:
                    out['choices']=choices
                info.append(out)
    return info


def fill_form(in_pdf, data, suffix=None):
    fillers = {'checkbox': _checkbox,
               'list': _listbox,
               'text': _text_form,
               'combo': _combobox,
               'radio': _radio_button}
    for page in in_pdf.pages:
        annotations = page['/Annots']
        if annotations is None:
            continue
        for annotation in annotations:

            if annotation['/Subtype'] == '/Widget':
                if not annotation['/T']:
                    annotation=annotation['/Parent']
                key = annotation['/T'].to_unicode()
                if key in data:
                    ft = _field_type(annotation)
                    fillers[ft](annotation, data[key])
                    if suffix:
                        new_T=pdfrw.objects.pdfstring.PdfString.encode(key+suffix)
                        annotation.update(pdfrw.PdfDict(T=new_T))
        in_pdf.Root.AcroForm.update(
            pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
    return in_pdf


def single_form_fill(in_file, data, out_file):
    pdf = pdfrw.PdfReader(in_file)
    out_pdf = fill_form(pdf, data)
    pdfrw.PdfWriter().write(out_file, out_pdf)


def batch_form_fill(template, data, out_file, double_sided=False, splice=-1):
    # deepcopy does not work on PdfDict so we keep the raw inputs
    # so we can may copies of the Pdf structures by reparsing the
    # raw data

    with open(template, 'rb') as f:
        pdf_data = f.read()
    writer = pdfrw.PdfWriter()
    for idx, record in enumerate(data):
        pdf = pdfrw.PdfReader(fdata=pdf_data)  # Create a new instance each time
        out_pdf = fill_form(pdf, record, f'{-idx:04d}')
        if len(out_pdf.pages) % 2 == 1 and double_sided:
            if splice != 0:
                writer.addpages(out_pdf.pages[0:splice])
            writer.addpage(_blank_page(8.5, 11))
            if splice != -1:
                writer.addpages(out_pdf.pages[splice:])
        else:
            writer.addpages(out_pdf.pages)
    writer.trailer.Root.AcroForm = pdfrw.PdfReader(fdata=pdf_data).Root.AcroForm
    writer.trailer.Root.AcroForm.update(pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
    writer.write(out_file)

In [68]:
pdf_form_info(pdfrw.PdfReader('sample_pdf.pdf'))

AttributeError: 'str' object has no attribute 'to_unicode'

In [None]:
pdf = pdfrw.PdfReader('ex2.pdf')
for page in pdf.pages:
    annotations = page['/Annots']
    if annotations is None:
        continue

    for annotation in annotations:
        if annotation['/Subtype']=='/Widget':
            if not annotation['/T']:
                annotation=annotation['/Parent']
            if annotation['/T']:
                print(type(annotation['/T']))
                key = annotation['/T'].to_unicode()
                print(key)
                <class 'pdfrw.objects.pdfstring.PdfString'>

In [64]:
import pdfrw
from pdf2image import convert_from_path # Needs conda install -c conda-forge poppler
from PIL import Image
from collections import OrderedDict

ANNOT_KEY = '/Annots'               # key for all annotations within a page
ANNOT_FIELD_KEY = '/T'              # Name of field. i.e. given ID of field
ANNOT_FORM_type = '/FT'             # Form type (e.g. text/button)
ANNOT_FORM_button = '/Btn'          # ID for buttons, i.e. a checkbox
ANNOT_FORM_text = '/Tx'             # ID for textbox
ANNOT_FORM_options = '/Opt'
ANNOT_FORM_combo = '/Ch'
SUBTYPE_KEY = '/Subtype'
WIDGET_SUBTYPE_KEY = '/Widget'
ANNOT_FIELD_PARENT_KEY = '/Parent'  # Parent key for older pdf versions
ANNOT_FIELD_KIDS_KEY = '/Kids'      # Kids key for older pdf versions
ANNOT_VAL_KEY = '/V'
ANNOT_RECT_KEY = '/Rect'

def get_form_fields(input_pdf_path):
    """
    Retrieves the form fields from a pdf to then be stored as a dictionary and
    passed to the write_fillable_pdf() function. Uses pdfrw.
    Parameters
    ---------
    input_pdf_path: str
        Path to the pdf you want the fields from.
    Returns
    ---------
    A dictionary of form fields and their filled values.
    """
    data_dict = {}

    pdf = pdfrw.PdfReader(input_pdf_path)
    for page in pdf.pages:
        annotations = page[ANNOT_KEY]
        if annotations:
            for annotation in annotations:
                if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                    if annotation[ANNOT_FIELD_KEY]:
                        key = annotation[ANNOT_FIELD_KEY][1:-1]
                        data_dict[key] = ''
                        if annotation[ANNOT_VAL_KEY]:
                            value = annotation[ANNOT_VAL_KEY]
                            data_dict[key] = annotation[ANNOT_VAL_KEY]
                            try:
                                if type(annotation[ANNOT_VAL_KEY]) == pdfrw.objects.pdfstring.PdfString:
                                    data_dict[key] = pdfrw.objects.PdfString.decode(annotation[ANNOT_VAL_KEY])
                                elif type(annotation[ANNOT_VAL_KEY]) == pdfrw.objects.pdfname.BasePdfName:
                                    if '/' in annotation[ANNOT_VAL_KEY]:
                                        data_dict[key] = annotation[ANNOT_VAL_KEY][1:]
                            except:
                                pass
                    elif annotation['/AP']:
                        if not annotation['/T']:
                            annotation = annotation['/Parent']
                        key = annotation['/T'].to_unicode()
                        data_dict[key] = annotation[ANNOT_VAL_KEY]
                        try:
                            if type(annotation[ANNOT_VAL_KEY]) == pdfrw.objects.pdfstring.PdfString:
                                data_dict[key] = pdfrw.objects.PdfString.decode(annotation[ANNOT_VAL_KEY])
                            elif type(annotation[ANNOT_VAL_KEY]) == pdfrw.objects.pdfname.BasePdfName:
                                if '/' in annotation[ANNOT_VAL_KEY]:
                                    data_dict[key] = annotation[ANNOT_VAL_KEY][1:]
                        except:
                            pass
    return data_dict


def print_form_fields(input_pdf_path):
    """
    Retrieves the form fields from get_form_fields(), then pretty prints
    the data_dict. Uses pdfrw.
    Parameters
    ---------
    input_pdf_path: str
        Path to the pdf you want the fields from.
    Returns
    ---------
    """
    data_dict = get_form_fields(input_pdf_path)
    print("{" + ",\n".join("{!r}: {!r}".format(k, v) for k, v in data_dict.items()) + "}")


def flatten_pdf(input_pdf_path, output_pdf_path, as_images=False):
    """
    Flattens the pdf so each annotation becomes uneditable. This function provides
    two ways to do so, either with the pdfrw function annotation.update(pdfrw.PdfDict(Ff=1))
    or converting the pages to images then reinserting.
    Parameters
    ---------
    input_pdf_path: str
        Path to the pdf you want to flatten.
    output_pdf_path: str
        Path of the new pdf that is generated.
    as_images: bool
        Default is False meaning it will update each individual annotation and set
        it to False. True means it will convert to images and then reinsert into the
        pdf
    Returns
    ---------
    """
    if as_images == True:
        images = convert_from_path(input_pdf_path) 
        im1 = images[0]
        images.pop(0)

        pdf1_filename = output_pdf_path

        im1.save(pdf1_filename, "PDF" ,resolution=100.0, save_all=True, append_images=images)
    else:
        ANNOT_KEY = '/Annots'               # key for all annotations within a page

        template_pdf = pdfrw.PdfReader(input_pdf_path)
        for Page in template_pdf.pages:
            if Page[ANNOT_KEY]:
                for annotation in Page[ANNOT_KEY]:
                    annotation.update(pdfrw.PdfDict(Ff=1))
        template_pdf.Root.AcroForm.update(pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
        pdfrw.PdfWriter().write(output_pdf_path, template_pdf)
        

def convert_dict_values_to_string(dictionary):
    """
    Converts dictionary values to string including arrays and tuples.
    Parameters
    ---------
    dictionary: dict
        Any single level dictionary. Specifically made for the data_dict returned from
        the function get_form_fields() from the fillpdf library
    Returns
    ---------
    res: dict
        The resulting dictionary with only string values.
    """
    list_delim, tuple_delim = '-', '^'
  
    res = dict()
    for sub in dictionary:

        # checking data types
        if isinstance(dictionary[sub], list):
            res[sub] = dictionary[sub]
        elif isinstance(dictionary[sub], tuple):
            res[sub] = tuple_delim.join(list([str(ele) for ele in dictionary[sub]]))
        else:
            res[sub] = str(dictionary[sub])
            
    return res    
    
    
def write_fillable_pdf(input_pdf_path, output_pdf_path, data_dict, flatten=False):
    """
    Writes the dictionary values to the pdf. Currently supports text and buttons.
    Does so by updating each individual annotation with the contents of the dat_dict.
    Parameters
    ---------
    input_pdf_path: str
        Path to the pdf you want to flatten.
    output_pdf_path: str
        Path of the new pdf that is generated.
    data_dict: dict
        The data_dict returned from the function get_form_fields()
    flatten: bool
        Default is False meaning it will stay editable. True means the annotations
        will be uneditable.
    Returns
    ---------
    """
    data_dict = convert_dict_values_to_string(data_dict)

    template_pdf = pdfrw.PdfReader(input_pdf_path)
    for Page in template_pdf.pages:
        if Page[ANNOT_KEY]:
            for annotation in Page[ANNOT_KEY]:
                target = annotation if annotation[ANNOT_FIELD_KEY] else annotation[ANNOT_FIELD_PARENT_KEY]
                if annotation[ANNOT_FORM_type] == None:
                    pass
                if target and annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                    key = target[ANNOT_FIELD_KEY][1:-1] # Remove parentheses
                    if key in data_dict.keys():
                        if target[ANNOT_FORM_type] == ANNOT_FORM_button:
                            # button field i.e. a radiobuttons
                            if not annotation['/T']:
                                if annotation['/AP']:
                                    keys = annotation['/AP']['/N'].keys()
                                    if keys[0]:
                                        if keys[0][0] == '/':
                                            keys[0] = str(keys[0][1:])
                                    list_delim, tuple_delim = '-', '^'
                                    res = dict()
                                    for sub in data_dict:
                                        if isinstance(data_dict[sub], list):
                                            res[sub] = list_delim.join([str(ele) for ele in data_dict[sub]]) 
                                        else:
                                            res[sub] = str(data_dict[sub])
                                    temp_dict = res
                                    annotation = annotation['/Parent']
                                    options = []
                                    for each in annotation['/Kids']:
                                        keys2 = each['/AP']['/N'].keys()
                                        if ['/Off'] in keys:
                                            keys2.remove('/Off')
                                        export = keys2[0]
                                        if '/' in export:
                                            options.append(export[1:])
                                        else:
                                            options.append(export)
                                        if f'/{data_dict[key]}' == export:
                                            val_str = pdfrw.objects.pdfname.BasePdfName(f'/{data_dict[key]}')
                                        else:
                                            val_str = pdfrw.objects.pdfname.BasePdfName(f'/Off')
                                        if set(keys).intersection(set(temp_dict.values())):
                                            each.update(pdfrw.PdfDict(AS=val_str))
                                    if data_dict[key] not in options:
                                        if data_dict[key] != "None":
                                            raise KeyError(f"{data_dict[key]} Not An Option, Options are {options}")
                                    else:
                                        if set(keys).intersection(set(temp_dict.values())):
                                            annotation.update(pdfrw.PdfDict(V=pdfrw.objects.pdfname.BasePdfName(f'/{data_dict[key]}')))
                            else:
                                # button field i.e. a checkbox
                                target.update( pdfrw.PdfDict( V=pdfrw.PdfName(data_dict[key]) , AS=pdfrw.PdfName(data_dict[key]) ))
                                if target[ANNOT_FIELD_KIDS_KEY]:
                                    target[ANNOT_FIELD_KIDS_KEY][0].update( pdfrw.PdfDict( V=pdfrw.PdfName(data_dict[key]) , AS=pdfrw.PdfName(data_dict[key]) ))
                        elif target[ANNOT_FORM_type] == ANNOT_FORM_combo:
                            # Drop Down Combo Box
                            export = None
                            options = annotation[ANNOT_FORM_options]
                            if len(options) > 0:
                                if type(options[0]) == pdfrw.objects.pdfarray.PdfArray:
                                    options = list(options)
                                    options = [pdfrw.objects.pdfstring.PdfString.decode(x[0]) for x in options]
                                if type(options[0]) == pdfrw.objects.pdfstring.PdfString:
                                    options = [pdfrw.objects.pdfstring.PdfString.decode(x) for x in options]
                            if type(data_dict[key]) == list:
                                export = []
                                for each in options:
                                    if each in data_dict[key]:
                                        export.append(pdfrw.objects.pdfstring.PdfString.encode(each))
                                if export is None:
                                    if data_dict[key] != "None":
                                        raise KeyError(f"{data_dict[key]} Not An Option For {annotation[ANNOT_FIELD_KEY]}, Options are {options}")
                                pdfstr = pdfrw.objects.pdfarray.PdfArray(export)
                            else:
                                for each in options:
                                    if each == data_dict[key]:
                                        export = each
                                if export is None:
                                    if data_dict[key] != "None":
                                        raise KeyError(f"{data_dict[key]} Not An Option For {annotation[ANNOT_FIELD_KEY]}, Options are {options}")
                                pdfstr = pdfrw.objects.pdfstring.PdfString.encode(data_dict[key])
                            annotation.update(pdfrw.PdfDict(V=pdfstr, AS=pdfstr))
                        elif target[ANNOT_FORM_type] == ANNOT_FORM_text:
                            # regular text field
                            target.update( pdfrw.PdfDict( V=data_dict[key], AP=data_dict[key]) )
                            if target[ANNOT_FIELD_KIDS_KEY]:
                                target[ANNOT_FIELD_KIDS_KEY][0].update( pdfrw.PdfDict( V=data_dict[key], AP=data_dict[key]) )
                if flatten == True:
                    annotation.update(pdfrw.PdfDict(Ff=1))
    template_pdf.Root.AcroForm.update(pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
    pdfrw.PdfWriter().write(output_pdf_path, template_pdf)

In [24]:
from fillpdf import fillpdfs

In [25]:
fillpdfs.get_form_fields("ex2.pdf") 
# get_form_fields("new_samp.pdf")
# get_form_fields("full-sample.pdf")

{'Given Name Text Box': '',
 'Family Name Text Box': '',
 'Address 1 Text Box': '',
 'House nr Text Box': '',
 'Address 2 Text Box': '',
 'Postcode Text Box': '',
 'City Text Box': '',
 'Country Combo Box': '',
 'Gender List Box': 'Man',
 'Height Formatted Field': '150',
 'Driving License Check Box': 'Off',
 'Language 1 Check Box': 'Off',
 'Language 2 Check Box': 'Yes',
 'Language 3 Check Box': 'Off',
 'Language 4 Check Box': 'Off',
 'Language 5 Check Box': 'Off',
 'Favourite Colour List Box': 'Red'}

In [26]:
data_dict = {'Given Name Text Box': '',
 'Family Name Text Box': '',
 'Address 1 Text Box': '',
 'House nr Text Box': '',
 'Address 2 Text Box': '',
 'Postcode Text Box': '',
 'City Text Box': '',
 'Country Combo Box': 'Finland',
 'Gender List Box': 'Woman',
 'Height Formatted Field': '150',
 'Driving License Check Box': 'Yes',
 'Language 1 Check Box': 'Off',
 'Language 2 Check Box': 'Yes',
 'Language 3 Check Box': 'Yes',
 'Language 4 Check Box': 'Off',
 'Language 5 Check Box': 'Off',
 'Favourite Colour List Box': 'Blue'}

# data_dict = {'Name': '',
#  'Dropdown2': 'Feb',
#  'Dropdown1': '6',
#  'Dropdown3': '2025',
#  'Address': 'iejijiejfi',
#  'Check Box1': '',
#  'Check Box2': 'Yes',
#  'Check Box3': '',
#  'Check Box4': '',
#  'Text5': 'Yo',
#  'Group6': '3',
#  'Text6': 'Man',
#  'Button7': None}

# data_dict = {'Address': '',
#  'City': '',
#  'State': '',
#  'Zip': '',
#  'Opt in': 'Yes',
#  'Gender': 'Female-Value',
#  'Radio': 'Radio-2',
#  'Colors': ['Green-Value', 'Blue Value']}

In [27]:
fillpdfs.write_fillable_pdf('ex2.pdf', 'new.pdf', data_dict)

In [79]:
# write_fillable_pdf('sample_pdf.pdf', 'new_samp.pdf', data_dict)

In [80]:
# write_fillable_pdf('full-sample.pdf', 'new-samp.pdf', data_dict)

In [81]:
fillpdfs.write_fillable_pdf('new_samp.pdf', 'samp2.pdf', data_dict)

In [32]:
from PyPDF2.pdf import PdfFileReader
doc = PdfFileReader('new-samp.pdf')
doc.stream.seek(0) # Necessary since the comment is ignored for the PDF analysis
print(doc.stream.readline().decode())

%PDF-1.3



In [4]:
import pdfrw

In [18]:
pdf = pdfrw.PdfReader('samp2.pdf')

In [14]:
print(pdf)

{'/Size': '63', '/Root': {'/Type': '/Catalog', '/Pages': {'/Type': '/Pages', '/Resources': (37, 0), '/MediaBox': ['0', '0', '595', '842'], '/Kids': [{'/Type': '/Page', '/Parent': {...}, '/Resources': (37, 0), '/MediaBox': ['0', '0', '595', '842'], '/Annots': [(5, 0), (7, 0), (23, 0), (8, 0), (9, 0), (10, 0), (13, 0), (11, 0), (22, 0), (12, 0), (14, 0), (17, 0), (18, 0), (19, 0), (20, 0), (21, 0), (16, 0)], '/Group': {'/S': '/Transparency', '/CS': '/DeviceRGB', '/I': 'true'}, '/Contents': (2, 0)}], '/Count': '1'}, '/OpenAction': [(1, 0), '/XYZ', 'null', 'null', '0'], '/ViewerPreferences': {'/DisplayDocTitle': 'true'}, '/Lang': '(en-GB)', '/AcroForm': {'/Fields': [(5, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 0), (12, 0), (13, 0), (14, 0), (16, 0), (17, 0), (18, 0), (19, 0), (20, 0), (21, 0), (22, 0), (23, 0)], '/DR': (37, 0), '/NeedAppearances': 'true'}}, '/Info': {'/Title': '<FEFF00500044004600200046006F0072006D0020004500780061006D0070006C0065>', '/Keywords': '<FEFF00500044004600200046