### Define helper functions

In [3]:
import urllib.request
import json
import numpy as np
import pandas as pd
import altair
import pprint
import os, subprocess, sys,codecs, locale
import re
import traceback
from collections import namedtuple
import datetime
import pdfplumber
%matplotlib inline

In [4]:
pd.options.display.width = 250
pd.options.display.max_colwidth = 250
pd.options.display.max_rows = 250


In [5]:
# some helper functions to obtain the right output from API
def remove_spaces(input):
    return input.replace(' ', '%20')

def pdf_to_text(pdf_path):
    #sys.setdefaultencoding("utf-8")
    os_encoding = locale.getpreferredencoding()
    args = ["pdftotext.exe",pdf_path, "-"]
    res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output = (res.stdout).decode(os_encoding,'ignore')
    return(output)


# this is the preferred method, pdfplumber works better then pdftotext
def pdf_to_text2(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        first_page = pdf.pages[0]
        return first_page.extract_text(x_tolerance=2, y_tolerance=3)


def retrieve_and_save_pdf(id, overwrite=False):
    if not os.path.exists(path + id + '.pdf') or overwrite:
        base = 'https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0/'
        url = 'Document('+id+')/Resource'
        end = ''
        response = urllib.request.urlopen(remove_spaces(base+url))
        data = response.read()      # a `bytes` object
        f = open(path+id+'.pdf', 'w+b')
        f.write(data)
        f.close()
    else:
        pass

def query_API(skip):
    base = 'https://gegevensmagazijn.tweedekamer.nl/OData/v4/2.0/'
    url = "/Zaak?$filter= Soort eq 'Motie'&$expand=Besluit($expand=Stemming),Document,ZaakActor,Agendapunt&$count=true&$skip="+skip
    end = '&$format=application/json;odata.metadata=full'
    response = urllib.request.urlopen(remove_spaces(base+url))
    data = response.read()      # a `bytes` object
    return(json.loads(data))
#retrieve_and_save_pdf('1b7f8f8c-4579-4103-9f51-9280358f7b8a')
#a = pdf_to_text(path+'3c2dbc59-beb1-42d8-9be4-e09238f20a97.pdf')
#print(a)

### Process API response to dict

In [6]:
def add_API_to_dict(data):
    try:
        for z in data['value']:
            #print('\n\n','nummer',z['Nummer'])
            info[z['Nummer']]={'Titel':z['Titel'],
                              #'Status':z['Status'],
                              'Onderwerp':z['Onderwerp'],
                              'Vergaderjaar':z['Vergaderjaar'],
                              'GestartOp':z['GestartOp']}

            for b in z['Besluit']:
                info[z['Nummer']]['BesluitTekst']=b['BesluitTekst']
                info[z['Nummer']]['StemmingsSoort']=b['StemmingsSoort']
                info[z['Nummer']]['BesluitSoort']=b['BesluitSoort']

                #print('\n','besluit',b['BesluitTekst'])
                if b['BesluitSoort'] and b['BesluitSoort'] in ['Stemmen - aangenomen','Stemmen - verworpen', 'Stemmen - niet aangenomen']:        
                    for s in b['Stemming']:
                        #print(s)
                        if s['Soort'] in ['Voor', 'Tegen']:
                            info[z['Nummer']]['Stem_'+s['ActorFractie']]=(s['Soort'])
                            info[z['Nummer']]['Aantal_stemmen_'+str(s['ActorFractie'])]=(s['FractieGrootte'])
                        #info[z['Nummer']]['Vergissing_'+str(s['ActorFractie'])]=(s['Vergissing']) niet nodig, want Stem is altijd de gecorrigeerde stem (dus niet de vergissing)
                    #print('stemmingenverwerkt')
                    break
            for a in z['Agendapunt']:
                info[z['Nummer']]['AgendapuntOnderwerp']=a['Onderwerp']

            for d in z['Document']:
                if d['Soort'][:5]=='Motie':
                    #print('\n','doc',d)
                    info[z['Nummer']]['doc_Id']=d['Id']
                    info[z['Nummer']]['Volgnummer']=d['Volgnummer']
                    info[z['Nummer']]['Datum']=d['Datum']
                    if INCLUDE_PDF:
                        #print('doing pdf work')
                        retrieve_and_save_pdf(d['Id'])
                        info[z['Nummer']]['Text']=pdf_to_text(path+d['Id']+'.pdf')
            
            info[z['Nummer']]['Indiener_persoon'] = []
            info[z['Nummer']]['Medeindiener_persoon'] = []
            for za in z['ZaakActor']:
                #print('\n','za',za)
                if za['Relatie'] == 'Indiener' and za['ActorFractie']:
                    info[z['Nummer']]['Indiener_persoon'].append(za['ActorNaam'])
                    info[z['Nummer']]['Indiener_'+za['ActorFractie']]=1
                
                if za['Relatie'] == 'Medeindiener' and za['ActorFractie']:
                    info[z['Nummer']]['Medeindiener_persoon'].append(za['ActorNaam'])
                    info[z['Nummer']]['Medeindiener_'+za['ActorFractie']]=1
    except Exception as e: 
        print(e)
        pprint.pprint(z) 
        traceback.print_exc() 
        print(s)
        sys.exit()


### Get all moties

In [7]:
# main routine to query api
FIRST_ONLY = False
INCLUDE_PDF = False
START = 0
MAXIMUM = 100000
path = 'pdf/' # where to store pdfs of motions
skip = 0
info = {}
print ('started')
data = query_API(str(START))
count = data['@odata.count']
# add_API_to_dict(data)
skip = START +  250

started


In [8]:
if not FIRST_ONLY:
    while skip < count + 250 and skip < MAXIMUM:
        print('query for ',skip)
        data = query_API(str(skip))
        add_API_to_dict(data)
        skip +=250
print('finished')

query for  250
query for  500
query for  750
query for  1000
query for  1250
query for  1500
query for  1750
query for  2000
query for  2250
query for  2500
query for  2750
query for  3000
query for  3250
query for  3500
query for  3750
query for  4000
query for  4250
query for  4500
query for  4750
query for  5000
query for  5250
query for  5500
query for  5750
query for  6000
query for  6250
query for  6500
query for  6750
query for  7000
query for  7250
query for  7500
query for  7750
query for  8000
query for  8250
query for  8500
query for  8750
query for  9000
query for  9250
query for  9500
query for  9750
query for  10000
query for  10250
query for  10500
query for  10750
query for  11000
query for  11250
query for  11500
query for  11750
query for  12000
query for  12250
query for  12500
query for  12750
query for  13000
query for  13250
query for  13500
query for  13750
query for  14000
query for  14250
query for  14500
query for  14750
query for  15000
query for  15250
query

In [9]:
import pickle
with open('moties_unprocessed.pickle', 'wb') as handle:
    pickle.dump(info, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Download PDFs

In [10]:
import pickle
file = open("moties_unprocessed.pickle","rb")
info = pickle.load(file)

download_errors = set()
no_doc_id = set()

# takes a long time
for i, (k,v) in enumerate(info.items()):
    if i % 100 == 0: print(i)
    doc_id = v.get('doc_Id', None)
    if doc_id:
        try:
            retrieve_and_save_pdf(doc_id)
            info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
        except Exception as e:
            print('download or pdf_to_text error')
            traceback.print_exc() 
            download_errors.add((k,doc_id))
    else:
        no_doc_id.add(k)
df_text = pd.DataFrame(info).T

0
100
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

200
300
400
500
600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

700
800
900
1000
1100
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

1200
1300
1400
1500
1600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

1700
1800
1900
2000
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

3100
3200
3300
3400
3500
3600
3700
3800
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

3900
4000
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

4100
4200
4300
4400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

4500
4600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

4700
4800
4900
5000
5100
5200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

7300
7400
7500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

7600
7700
7800
7900
8000
8100
8200
8300
8400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

8500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900
10000
10100
10200
10300
10400
10500
10600
10700
10800
10900
11000
11100
11200
11300
11400
11500
11600
11700
download or pdf_to_text error
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

11800
11900
12000
12100
12200
12300
12400
12500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

12600
12700
12800
12900
download or pdf_to_text error
13000


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

13100
13200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

13300
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

13400
13500
13600
13700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

13800
13900
14000
14100
14200
14300
14400
14500
14600
14700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

14800
14900
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

15000
15100
15200
15300
15400
15500
15600
15700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

15800
15900
16000
16100
16200
16300
16400
16500
16600
16700
16800
16900
17000
17100
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

17200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

17300
17400
17500
17600
17700
17800
17900
18000
18100
18200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

18300
18400
18500
18600
18700
18800
18900
19000
19100
19200
19300
19400
19500
19600
19700
19800
19900
20000
20100
20200
20300
20400
20500
20600
20700
20800
20900
21000
21100
21200
21300
21400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

21500
21600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

21700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

21800
21900
22000
22100
22200
22300
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

22400
22500
22600
22700
download or pdf_to_text error
22800


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

22900
23000
23100
23200
23300
23400
23500
23600
23700
23800
23900
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

24000
24100
24200
24300
24400
24500
24600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

24700
24800
24900
25000
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

25100
25200
25300
25400
25500
25600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

25700
25800
25900
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

26000
26100
26200
26300
26400
26500
26600
26700
26800
26900
27000
27100
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

27200
27300
27400
27500
27600
27700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

27800
27900
28000
28100
28200
28300
28400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

28500
28600
28700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

28800
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

28900
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

29000
29100
29200
29300
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

29400
29500
29600
29700
29800
29900
30000
30100
30200
30300
30400
30500
30600
30700
30800
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

30900
31000
31100
31200
31300
31400
31500
31600
31700
31800
31900
32000
32100
32200
32300
32400
32500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

32600
32700
32800
32900
33000
33100
33200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

33300
33400
33500
33600
33700
33800
33900
34000
34100
34200
34300
34400
34500
34600
34700
34800
34900
35000
35100
35200
35300
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

35400
35500
35600
35700
35800
35900
36000
36100
36200
36300
36400
36500
36600
36700
36800
36900
37000
37100
37200
37300
37400
37500
37600
37700
37800
37900
38000
38100
38200
38300
38400
38500
38600
38700
38800
38900
39000
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

39100
39200
39300
39400
39500
39600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

39700
39800
39900
40000
40100
40200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

40300
40400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

40500
40600
40700
40800
40900
41000
41100
41200
41300
41400
41500
41600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

41700
41800
41900
42000
42100
42200
42300
42400
42500
42600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

42700
42800
42900
43000
43100
43200
43300
download or pdf_to_text error
43400


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

43500
43600
43700
43800
43900
44000
44100
44200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

44300
44400
44500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

44600
44700
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

44800
44900
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

45000
45100
45200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

45300
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

45400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

45500
45600
45700
45800
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

45900
46000
46100
46200
46300
46400
46500
46600
46700
46800
46900
47000
47100
47200
47300
47400
47500
47600
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

47700
47800
47900
48000
48100
48200
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

48300
48400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

48500
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

48600
48700
48800
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

48900
49000
49100
49200
49300
49400
download or pdf_to_text error


Traceback (most recent call last):
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\1447062882.py", line 15, in <module>
    info[k]['Text'] = pdf_to_text2(path + doc_id +'.pdf')
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Gebruiker\AppData\Local\Temp\ipykernel_15816\3182530635.py", line 16, in pdf_to_text2
    with pdfplumber.open(pdf_path) as pdf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 78, in open
    return cls(
           ^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfplumber\pdf.py", line 40, in __init__
    self.doc = PDFDocument(PDFParser(stream), password=password)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Programmeren\adventofcode\.aocenvpy311\Lib\site-packages\pdfminer\pdfdocument.py", line 752, in __init__
    raise PDFSyntaxError("No /Root object! - Is this really a PDF?")
pdfminer

49500
49600
49700
49800
49900
50000
50100


In [15]:
# Some pdf download may have been corrupted. By running this function, we try to redownload the pdf and update the text. Unfortunatly it seems the pdfs are simply corrupted
no_text_idx = df_text[(df_text['Text']=='') | (df_text['Text'].isna())].index
print(len(no_text_idx), 'no texts found')
texts = []
for index in no_text_idx:
    doc_Id = df_text.loc[index,'doc_Id']
    print(doc_Id, type(doc_Id))
    if not pd.isna(doc_Id):
        retrieve_and_save_pdf(doc_Id, overwrite=True)
        t = pdf_to_text(path + doc_Id + '.pdf')
        if t != '':
            df_text.loc[index,'Text'] = t
            texts.append(t)
print(len(texts), 'new pdfs found')

32 no texts found
9a16cbeb-8235-4ca3-9892-2f1383e8fa3b <class 'str'>
6caec6b4-3912-44a3-89c4-32633d52bf1f <class 'str'>
7b4fe8f5-d6e8-4edc-b0f3-0694cb548fc0 <class 'str'>
nan <class 'float'>
ce9ea371-87fa-425c-98e0-c875a44d7012 <class 'str'>
dd67a524-369b-4236-892a-d208459e1c7c <class 'str'>
76707131-c532-45e4-81d0-14f9494100bb <class 'str'>
2ad9fadd-ec8e-48fa-bf98-c6232f1aa68f <class 'str'>
63cb7f5f-7977-4953-a3eb-f7157237f189 <class 'str'>
ffd75a7f-add8-4e20-9cbb-add273d53035 <class 'str'>
494e26c5-5fa1-45ad-b626-4e8129dc2aef <class 'str'>
94f3842c-469c-4bd3-8945-3b19b49618f6 <class 'str'>
d1be751a-e59a-40c6-9ad0-63bfd8857059 <class 'str'>
f9f22364-c3dd-4803-b73e-35badb096e88 <class 'str'>
74a78127-8c7f-4b7b-bd0a-f2b4ebe7e169 <class 'str'>
222145de-9008-496a-a850-ec051d4feb1f <class 'str'>
383c8560-6a79-45f3-8354-54418baf9bc7 <class 'str'>
b0b66fd8-87eb-4d23-a413-5968502315f7 <class 'str'>
46753ded-bda3-4545-a9e3-fa5f087f7fcc <class 'str'>
55bf504a-6b5c-4f41-9e7b-dc0d19027bb4 <class 

In [16]:
import pickle
with open('df_text_before_preprocessing', 'wb') as handle:
    pickle.dump(df_text, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Make dataframe

In [17]:
import pickle
file = open("df_text_before_preprocessing","rb")
df = pickle.load(file)
print(len(df))
column_list = df.columns.values # current columns

50199


In [19]:
df.iloc[0]['Text']

'2\nTweede Kamer der Staten-Generaal\nVergaderjaar 2014–2015\n31 765 Kwaliteit van zorg\nNr. 98 MOTIE VAN HET LID AGEMA\nVoorgesteld 18 november 2014\nDe Kamer,\ngehoord de beraadslaging,\nconstaterende dat het Experiment regelarme instellingen de kwaliteit van\nzorg aanzienlijk verbetert;\noverwegende dat regelarm werken ook nog een forse besparing oplevert;\nverzoekt de regering, zo spoedig mogelijk de landelijke uitrol van het\nexperiment te bewerkstelligen,\nen gaat over tot de orde van de dag.\nAgema\nkst-31765-98ISSN\n0921 - 7371\n’s-Gravenhage 2014 Tweede Kamer, vergaderjaar 2014–2015, 31 765, nr. 98'

### Preprocessing

In [20]:
sum(df['doc_Id'].isnull())

1

In [21]:
def preprocessing_motie(text,doc_id,i): # remove start and end of document to only include relevant text
    try:
        if text and doc_id and text != 'nan':
            try:
                regex = re.findall(r"\d{4}(.*?)Nr.*gehoord de beraadslaging(.*)",text,re.DOTALL)
                regex = (' '.join(regex[0]))
            except Exception:
                regex = re.findall(r"\d{4}(.*?)",text,re.DOTALL)
                regex = (' '.join(regex[0]))
            return regex
        else:
            if not doc_id:
                doc_id.append(i)
            elif not text:
                no_text.append(i)
            return ""
    except Exception:
        print(traceback.print_exc())
        error_processing.append(i)
        return ""


In [22]:
# df['Text'] = df['Text'].astype(str)
# df['Text'] = df['Text'].str.replace('\xad', '')
# df['Text'] = df['Text'].str.replace('\n', '')
# df['Text'] = df['Text'].str.replace('\r', '')
# df['Text'] = df['Text'].str.replace('-', '')

In [23]:
# forget about text preprocessing, do that in later notebook with LDA
import numpy as np
no_text = []
error_processing = []
doc_id = []
# df['Text_processed'] = np.vectorize(preprocessing_motie)(df['Text'],df['doc_Id'],df.index)
len(no_text),len(error_processing),len(doc_id)

(0, 0, 0)

In [24]:
#verwijder hoofdelijke stemmingen
print (len(df), 'removing hoofdelijke stemmingen')
df.drop(df[df['StemmingsSoort'] == 'Hoofdelijk'].index, inplace=True)
print (len(df), '\n')

#verwijder moties zonder stemming
print (len(df), 'removing moties without vote')
print(df['BesluitSoort'].value_counts())
df = df[((df['BesluitSoort'] == 'Stemmen - aangenomen') | (df['BesluitSoort'] == 'Stemmen - verworpen'))]
print (len(df), '\n')

#recode besluitsoort naar -1 en 1
print(df['BesluitSoort'].value_counts())
df['BesluitSoort']=df['BesluitSoort'].replace({'Stemmen - verworpen':'0','Stemmen - aangenomen':'1'})
df['BesluitSoort']=pd.to_numeric(df['BesluitSoort'])
print(df['BesluitSoort'].value_counts(), '\n')

#recode voor en tegen naar 1 en -1
stem_column = [c for c in column_list if 'Stem_' in c]
print(df['Stem_50PLUS'].value_counts())
df[stem_column]=df[stem_column].replace({'Tegen':'-1','Voor':'1','Niet deelgenomen':np.nan})
print(df['Stem_50PLUS'].value_counts(), '\n')

#cast to datetime and sort old to new
df['Datum'] = pd.to_datetime(df['Datum'].str[:10]) # cut off the time & timezone info, we only need date
# df['Datum'] = pd.to_datetime(df['Datum'], utc=True)
df['GestartOp'] = pd.to_datetime(df['GestartOp'].str[:10])
# df['GestartOp'] = pd.to_datetime(df['GestartOp'], utc=True)
# df['Datum'] = df['Datum'].dt.tz_convert(None)
# df['GestartOp'] = df['GestartOp'].dt.tz_convert(None)
df.sort_values('Datum',inplace=True)

50199 removing hoofdelijke stemmingen
49906 

49906 removing moties without vote
Stemmen - verworpen                                       20552
Stemmen - aangenomen                                      18323
Ingediend                                                  4887
Stemmen - aangehouden                                      2467
Termijn - vervallen in verband met verstrijken termijn     1339
Stemmen - aangehouden (al tijdens debat)                    714
Stemmen - ingetrokken                                       552
Overgenomen                                                 359
Stemmen - ingetrokken (al tijdens debat)                    230
Stemmen - uitstellen                                        129
Agenderen - stemmingen                                        9
[Vrij tekstveld / geen Parlisproces]                          4
Ter informatie                                                4
Behandeling wordt voortgezet                                  2
Stemmen                

In [25]:
column_list = df.columns.values
indiener_column = [c for c in column_list if ('Indiener') in c and c != 'Indiener_persoon']
# make a column with the indienende partij
ind = pd.melt(df, value_vars=indiener_column, ignore_index=False)
ind = ind[ind['value'] == 1]
ind = ind.rename(columns={'variable':'Indienende_partij'})
ind['Indienende_partij'] = ind['Indienende_partij'].str[9:]
df['Indiener_persoon'] = df['Indiener_persoon'].map(lambda x: x[0] if x else '')
df = df.join(ind['Indienende_partij'], how='left')
df['Indienende_persoon_partij'] = df['Indiener_persoon'] + ', ' + df['Indienende_partij']

In [26]:
#bereken voor en tegenstemmen
aantal_stemmen_column = [c for c in column_list if 'Aantal' in c]

for i in range(len(stem_column)):
    df[stem_column[i]]=pd.to_numeric(df[stem_column[i]])
    df[aantal_stemmen_column[i]]=pd.to_numeric(df[aantal_stemmen_column[i]])

res = np.multiply(df[stem_column],df[aantal_stemmen_column])
voor = res[res > 0].sum(axis=1)
tegen = abs(res[res < 0].sum(axis=1))
df['Voor'], df['Tegen'] = voor,tegen
df['Delta'] = abs(df['Voor']- df['Tegen'])
df['Sum'] = (df['Voor']+ df['Tegen'])


  res = np.multiply(df[stem_column],df[aantal_stemmen_column])


In [27]:
df = df.replace(-1,0)

In [28]:
df['Jaar'] = df['Datum'].dt.year

In [29]:
# only 26 moties for first year, so let's chop
df = df[(df['Jaar'] > 2008) & (df['Jaar'] < 2025)]

In [30]:
from collections import namedtuple
import datetime
Cabinet = namedtuple('Cabinet', ['name','start', 'end', 'demissionair','coalition'])
Chamber = namedtuple('Chamber', ['name','start', 'end', 'numyears'])

chamber_input = [
    "Balkenende IV,01 01 2009,16 06 2010",# mark begin 2008 is not correct (should be 30 11 2006) but for dataset purpose it it
    "Rutte I,17 06 2010,19 09 2012",
    "Rutte II,20 09 2012,22 03 2017",
    "Rutte III,23 03 2017,30 03 2021", # mark end 2020 is not correct but for dataset purpose it it
    "Rutte IV,31 03 2021,31 12 2024"] 

chambers = {}
for c in chamber_input:
    c = c.split(',')
    name = c[0]
    start = list(map(int, c[1].split()))
    startdate = datetime.datetime(start[2], start[1], start[0])
    end = list(map(int, c[2].split()))
    enddate = datetime.datetime(end[2], end[1], end[0])
    numyears = (enddate - startdate).days/365
    chambers[name] = Chamber(name, startdate, enddate, numyears)


cabinets_input = [
    # "Balkenende III,07 07 2006,22 02 2007,CDA VVD",
    # "Balkenende IV,22 02 2007,14 10 2010,CDA PvdA ChristenUnie",
    "Balkenende IV,18 09 2008,13 10 2010,20 02 2010,CDA PvdA ChristenUnie",
    "Rutte I,14 10 2010,4 11 2012,23 04 2012,VVD CDA PVV",
    "Rutte II,5 11 2012,25 10 2017,14 03 2017,VVD PvdA",
    "Rutte III,26 10 2017,09 01 2022,15 01 2022,CDA VVD D66 ChristenUnie",
    "Rutte IV,10 01 2022, 01 01 2024,01 01 2024,VVD D66 CDA ChristenUnie"]

cabinets = {}
for c in cabinets_input:
    c = c.split(',')
    name = c[0]
    start = list(map(int, c[1].split()))
    startdate = datetime.datetime(start[2], start[1], start[0])
    end = list(map(int, c[2].split()))
    enddate = datetime.datetime(end[2], end[1], end[0])
    demissionair = list(map(int, c[3].split()))
    demissionair_date = datetime.datetime(demissionair[2], demissionair[1], demissionair[0])
    coalition = tuple(c[4].split())
    cabinets[name] = Cabinet(name, startdate, enddate, demissionair_date, coalition)
cabinets, chambers

({'Balkenende IV': Cabinet(name='Balkenende IV', start=datetime.datetime(2008, 9, 18, 0, 0), end=datetime.datetime(2010, 10, 13, 0, 0), demissionair=datetime.datetime(2010, 2, 20, 0, 0), coalition=('CDA', 'PvdA', 'ChristenUnie')),
  'Rutte I': Cabinet(name='Rutte I', start=datetime.datetime(2010, 10, 14, 0, 0), end=datetime.datetime(2012, 11, 4, 0, 0), demissionair=datetime.datetime(2012, 4, 23, 0, 0), coalition=('VVD', 'CDA', 'PVV')),
  'Rutte II': Cabinet(name='Rutte II', start=datetime.datetime(2012, 11, 5, 0, 0), end=datetime.datetime(2017, 10, 25, 0, 0), demissionair=datetime.datetime(2017, 3, 14, 0, 0), coalition=('VVD', 'PvdA')),
  'Rutte III': Cabinet(name='Rutte III', start=datetime.datetime(2017, 10, 26, 0, 0), end=datetime.datetime(2022, 1, 9, 0, 0), demissionair=datetime.datetime(2022, 1, 15, 0, 0), coalition=('CDA', 'VVD', 'D66', 'ChristenUnie')),
  'Rutte IV': Cabinet(name='Rutte IV', start=datetime.datetime(2022, 1, 10, 0, 0), end=datetime.datetime(2024, 1, 1, 0, 0), dem

In [31]:
import datetime
def get_period(entities, date):
    # date = datetime.datetime.strptime(date_string[:10], '%Y-%m-%d')
    # date = date.replace(tzinfo=None)
    for c in entities.values():
        if ((c.start <= date) or (date < datetime.datetime(2009,1,1))) and c.end >= date:
            return c.name
    else:
        return 'Error'


def get_demissionair(cabinets, date):
    # date = datetime.datetime.strptime(date_string[:10], '%Y-%m-%d')
    c = get_period(cabinets, date)
    # date = date.replace(tzinfo=None)

    return False if date < cabinets[c].demissionair else True
    
df['Kabinet'] = [get_period(cabinets, d) for d in df['Datum']]
df['Demissionair'] = [get_demissionair(cabinets, d) for d in df['Datum']]
df['Kamer'] = [get_period(chambers, d) for d in df['Datum']]
df['Kabinet'].value_counts()

Rutte III        14361
Rutte II         11469
Rutte IV          4744
Rutte I           4691
Balkenende IV     2701
Name: Kabinet, dtype: int64

In [32]:
INCLUDE_PDF = True

# sort columns
column_list = df.columns.values # current columns
column_order = ['Titel','AgendapuntOnderwerp','Onderwerp','Datum','GestartOp','Vergaderjaar','Jaar','Indiener_persoon', 'Indienende_partij','Indienende_persoon_partij','Kamer', 'Kabinet','Demissionair','StemmingsSoort','BesluitSoort','BesluitTekst','doc_Id','Volgnummer', 'Voor', 'Tegen', 'Delta', 'Sum', 'Text'] # desired columns

not_in_column_order = set(column_list) - set(column_order)
 # don't throw away columns
column_order += sorted(not_in_column_order)
df = df[column_order]

In [33]:
#df = df[df['Onderwerp'].str.contains('klimaat')]
import pickle
with open('moties_processed_df.pickle', 'wb') as handle:
    pickle.dump(df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [34]:
df.head()

Unnamed: 0,Titel,AgendapuntOnderwerp,Onderwerp,Datum,GestartOp,Vergaderjaar,Jaar,Indiener_persoon,Indienende_partij,Indienende_persoon_partij,...,Stem_PvdD,Stem_SGP,Stem_SP,Stem_VVD,Stem_Van Haga,Stem_Van Klaveren,Stem_Van Kooten-Arissen,Stem_Van Vliet,Stem_Verdonk,Stem_Volt
2009Z00493,De situatie in het Midden-Oosten,Stemmingen over: moties ingediend bij het VAO Gaza,Motie-Haverkamp c.s. over de noodzakelijke inspanningen die de mate van humanitaire hulp voor de Gazastreek vergroot,2009-01-14,2009-01-14,2008-2009,2009,M.C. Haverkamp,CDA,"M.C. Haverkamp, CDA",...,1.0,0.0,1.0,0.0,,,,,0.0,
2009Z00492,De situatie in het Midden-Oosten,Stemmingen over: moties ingediend bij het VAO Gaza,Motie-Peters over twee doelen voor een EU-grensbewakingsmissie,2009-01-14,2009-01-14,2008-2009,2009,M. Peters,GroenLinks,"M. Peters, GroenLinks",...,1.0,0.0,1.0,0.0,,,,,0.0,
2009Z00496,De situatie in het Midden-Oosten,Stemmingen over: moties ingediend bij het VAO Gaza,Motie-Verdonk over geen Nederlandse Defensie-inzet in of rond Gaza,2009-01-14,2009-01-14,2008-2009,2009,M.C.F. Verdonk,Verdonk,"M.C.F. Verdonk, Verdonk",...,0.0,0.0,0.0,0.0,,,,,1.0,
2009Z00491,De situatie in het Midden-Oosten,Stemmingen over: moties ingediend bij het VAO Gaza,Motie-Peters/Van Bommel over de EU-Israël-gesprekken die alsnog zijn bevroren vanwege de oorlog in Gaza,2009-01-14,2009-01-14,2008-2009,2009,M. Peters,GroenLinks,"M. Peters, GroenLinks",...,1.0,0.0,1.0,0.0,,,,,0.0,
2009Z00457,Positionering algemene ziekenhuizen,Stemmingen over: moties ingediend bij het debat over de verloskunde in Zeeuwse ziekenhuizen,Motie Agema over garanderen van de keuzevrijheid voor thuis bevallen,2009-01-14,2009-01-14,2008-2009,2009,M. Agema,PVV,"M. Agema, PVV",...,1.0,1.0,1.0,0.0,,,,,1.0,
