In [1]:
import sys
sys.path.append('../../packages')

from textservice import segmentedtext
from annotation import asearch

datadir = '../../data/1728/9dec21/'
sessions_folder = 'CAF-sessions-1728-031221/'
resolutions_folder = 'CAF-resolutions-1728-031221/'
text_store = '1728-textstore-211209.json'
annotation_store = '1728-annotationstore-211209.json'
resource_id = 'volume-1728'

In [2]:
import json
import glob
import re

# read files

all_textlines=segmentedtext.IndexedSegmentedText(resource_id)
all_annotations=[]

def text_region_handler(node, begin_index, end_index, annotations):
    # text_region['metadata'] contains enough info to construct annotations for page and scan.
    # this will result in duplicates, so deduplication at a later stage is necessary.

    if 'iiif_url' in node['metadata']:
        scan_annot_info = {'resource_id': resource_id, 'label':'scan','iiif_url': node['metadata']['iiif_url'],\
                               'begin_anchor': begin_index,'end_anchor': end_index}
        scan_annot_info['id'] = node['metadata']['scan_id']
        annotations.append(scan_annot_info)
        
        page_annot_info = {'resource_id': resource_id, 'label':'page','iiif_url': node['metadata']['iiif_url'],\
                               'begin_anchor': begin_index,'end_anchor': end_index}
        page_annot_info['id'] = node['metadata']['page_id']
        page_annot_info['metadata'] = {'page_id': node['metadata']['page_id'], 'scan_id': node['metadata']['scan_id'],\
                                    'iiif_url': node['metadata']['iiif_url'],}
        page_annot_info['coords'] = node['coords']
        annotations.append(page_annot_info)
        
    return

def paragraph_handler(node):
    print(f"paragraph_handler called for {node['id']}")
    return

untanngle_config = {
    "session": {
        "child_key": "text_regions",
        "child_type": "text_region",
        "extra_fields": ["evidence"]
    },
    "text_region": {
        "child_key": "lines",
        "child_type": "line",
        "extra_fields": [],
        "additional_processing": text_region_handler
    },
    "line": {
        "child_key": None,
        "child_type": None,
        "extra_fields": ["baseline"]
    },
    "resolution": {
        "child_key": "paragraphs",
        "child_type": "republic_paragraph",
        "extra_fields": ["evidence"],
    },
    "republic_paragraph": {
        "child_key": None,
        "child_type":None,
        "extra_fields": ["line_ranges"],
#        "additional_processing": paragraph_handler
    }
}

# We want to load 'text containers' that contain more or less contiguous text and are as long as practically
# possible. Container size is determined by pragmatic reasons, e.g. technical (performance) or user driven
# (corresponding with all scans in a book or volume). This functions returns all component files IN TEXT ORDER.
# Examples: sorted list of files, part of IIIF manifest.

def get_file_sequence_for_container(text_container):
#    path = datadir + sessions_folder + "session-1728-01-15-num*.json"
    path = datadir + sessions_folder + "session-*-num*.json"
    session_file_names = (f for f in glob.glob(path))
    return sorted(session_file_names)

# Many file types contain a hierarchy of ordered text and/or annotation elements of different types. Some form of
# depth-first, post order traversal is necessary. Examples: processing a json hierarchy with dictionaries
# and lists (republic) or parsing TEI XML (DBNL document).

def traverse(node,node_label,text,annotations):
    # find the list that represents the children, each child is a dict
    config = untanngle_config[node_label]
    key_of_children = config['child_key']
    type_of_children = config['child_type']    
    
    coords = None if not 'coords' in node else node['coords']
    metadata = None if not 'metadata' in node else node['metadata']
        
    begin_index = text.len()
    annotation_info = {'resource_id': resource_id, 'label': node_label, 'coords': coords,\
                        'metadata': metadata, 'id':node['id'], 'begin_anchor': begin_index}
    
    # add selected extra_fields to annotation_info
    extra_fields = config['extra_fields']
    for f in extra_fields:
        annotation_info[f] = node[f]
    
    children = [] if key_of_children == None else node[key_of_children]    
    if len(children) == 0:        # if no children, do your 'leaf node thing'
        
        annotation_info['end_anchor'] = text.len()
        node_text = node['text']
        
        if node_text is None:
            node_text = '\n'

        text.append(node_text)
    else:                         # if non-leaf node, first visit children     
        for child in children:
            traverse(child,type_of_children,text,annotations)
        
        end_index = text.len()-1
        annotation_info['end_anchor'] = end_index    # after child text segments are added     
        
    annotations.append(annotation_info)
    
    if 'additional_processing' in config:
        config['additional_processing'](node, begin_index, end_index, annotations)
    
    return

# In case of presence of a hierarchical structure, processing/traversal typically starts from a root element.

def get_root_tree_element(file):
    with open(file, 'r') as myfile:
        session_file=myfile.read() 
        
    session_data = json.loads(session_file)      
    return session_data['_source']

# Rudimentary version of a scanpage_handler

def deduplicate_annotations(a_array, type):
    # filter scanpage annotation_info dicts
    scan_annots = [ann_info for ann_info in a_array if ann_info['label'] == type]
        
    # use groupBy on a list of dicts (zie Python cookbook 1.15)
    from operator import itemgetter
    from itertools import groupby

    # first sort on scans' id
    scan_annots.sort(key=itemgetter('id'))

    # iterate in groups
    aggregated_scan_annots = []

    for id, items in groupby(scan_annots, key=itemgetter('id')):
        # first, convert the 'items' iterator to a list, to able to use it twice (iterators can be used once)
        itemlist = list(items)
    
        # copy the item with the lowest begin_index
        aggr_scan_annot = min(itemlist, key=itemgetter('begin_anchor')).copy()
    
        # replace 'end_anchor' with the highest end_index in the group
        max_end_index = max(itemlist, key=itemgetter('end_anchor'))['end_anchor']
        aggr_scan_annot['end_anchor'] = max_end_index
        
        # add to result
        aggregated_scan_annots.append(aggr_scan_annot)
        
    # replace old scan annotations with correct aggregated ones
    for old_scan in scan_annots:
        a_array.remove(old_scan)
        
    a_array.extend(aggregated_scan_annots)
           
    return

def correct_scan_imageurls(a_array):
    scan_annots = [ann_info for ann_info in a_array if ann_info['label'] == 'scan']
    
    for scan_ann in scan_annots:
        scan_ann['iiif_url'] = re.sub(r'(\d+),(\d+),(\d+),(\d+)/(full)', r'\5/,\4', scan_ann['iiif_url'])
        
    return

# Rudimentary version of a page_handler

#def add_page_annotations(source_data, ann_array):
#    page_data = source_data['page_versions']
        
    # generator
#    page_identifiers = (pg['page_id'] for pg in page_data)
#    page_annots = [{'resource_id': resource_id, 'label' : 'pages','id' : page_id} for page_id in page_identifiers]
        
#    for pa in page_annots:
#        scan_num = int(re.search(r'(\d+)-page-', pa['id']).group(1))
#        scanpage_for_scan_num = [ai for ai in annotation_array if 'scan_num' in ai.keys() and ai['scan_num'] == \
#                                 scan_num]
#        pa['begin_anchor'] = scanpage_for_scan_num[0]['begin_anchor']
#        pa['end_anchor'] = scanpage_for_scan_num[0]['end_anchor']
#        pa['indexesByContainment'] = True
     
#    ann_array.extend(page_annots)
#    return

# Process per file, properly concatenate results, maintaining proper referencing the baseline text elements
for f_name in get_file_sequence_for_container(resource_id):
    text_array = segmentedtext.IndexedSegmentedText()
    annotation_array = []
            
    source_data = get_root_tree_element(f_name)

    traverse(source_data,'session',text_array,annotation_array)
           
    # properly concatenate annotation info taking ongoing line indexes into account
    for ai in annotation_array:
        ai['begin_anchor'] += all_textlines.len()
        ai['end_anchor'] += all_textlines.len()
    
    all_textlines.extend(text_array)
    all_annotations.extend(annotation_array)

deduplicate_annotations(all_annotations, 'scan') 
correct_scan_imageurls(all_annotations)
    
deduplicate_annotations(all_annotations, 'page')
    
for a in asearch.get_annotations_of_type('session', all_annotations):
    print(a)

{'resource_id': 'volume-1728', 'label': 'session', 'coords': None, 'metadata': {'id': 'session-1728-01-02-num-1', 'type': 'session', 'inventory_num': 3783, 'session_date': '1728-01-02', 'session_year': 1728, 'session_month': 1, 'session_day': 2, 'session_weekday': 'Veneris', 'date_shift_status': 'normal', 'session_num': 1, 'president': None, 'attendants_list_id': None, 'resolution_ids': [], 'is_workday': True, 'has_session_date_element': True, 'lines_include_rest_day': False}, 'id': 'session-1728-01-02-num-1', 'begin_anchor': 0, 'evidence': [{'type': 'PhraseMatch', 'phrase': 'Jovis den 1 Januarii', 'variant': 'Jovis den 1 Januarii', 'string': 'Jovis den 1. Januarii', 'offset': 0, 'label': 'session_date', 'text_id': 'NL-HaNA_1.01.02_3783_0051-line-2938-1582-522-64', 'match_scores': {'char_match': 1.0, 'ngram_match': 0.9, 'levenshtein_similarity': 0.9523809523809523}}, {'type': 'PhraseMatch', 'phrase': 'Veneris den 2 Januarii', 'variant': 'Veneris den 2 Januarii', 'string': 'Veneris den 

In [3]:
all_textlines.len()

111124

In [4]:
len(all_annotations)

114940

In [5]:
resolution_annotations=[]

def get_resolution_files_for_container(text_container):
    path = datadir + resolutions_folder + 'session-*-resolutions.json'
#    path = datadir + resolutions_folder + 'session-1728-01-15-num*-resolutions.json'

    resolution_file_names = (f for f in glob.glob(path))
    return sorted(resolution_file_names)

def res_traverse(node, node_label):
    # find the list that represents the children, each child is a dict, assume first list is the correct one
    config = untanngle_config[node_label]
    key_of_children = config['child_key']
    type_of_children = config['child_type']  
    
    children = [] if key_of_children == None else node[key_of_children]
    
    if len(children) == 0:        # if no children, do your 'leaf node thing'
        if len(node['line_ranges']) == 0:  # no associated lines, skip this node
            return
        else:
            begin_line_id = node['line_ranges'][0]['line_id']
            end_line_id = node['line_ranges'][-1]['line_id']
        
    else:  # if non-leaf node, first visit children     
        begin_line_id = children[0]['line_ranges'][0]['line_id']
        end_line_id = children[-1]['line_ranges'][-1]['line_id']
        for child in children:
            res_traverse(child, type_of_children)
    
    if 'additional_processing' in config:
        config['additional_processing'](node)
        
    annotation_info = {'resource_id': resource_id, 'label' : node['type'][-1],\
                        'begin_anchor' : begin_line_id, \
                        'end_anchor': end_line_id, \
                        'metadata': node['metadata'], \
                        'id': node['id']}
    
    # add selected extra_fields to annotation_info
    extra_fields = config['extra_fields']
    for f in extra_fields:
        annotation_info[f] = node[f]
        
    resolution_annotations.append(annotation_info)

    return

# In case of presence of a hierarchical structure, processing/traversal typically starts from a root element.

def get_res_root_element(file):
    with open(file, 'r') as myfile:
        resolution_file=myfile.read() 
        
    resolution_data = json.loads(resolution_file)      
    return resolution_data['hits']['hits']

for f_name in get_resolution_files_for_container(resource_id):    
    # get list of resolution 'hits'
    hits = get_res_root_element(f_name)
    for hit in hits:
        # each hit corresponds with a resolution
        resolution_line_ids = []
        res_traverse(hit['_source'],'resolution')

In [6]:
len(resolution_annotations)

8080

In [7]:
resolution_annotations[20:30]

[{'resource_id': 'volume-1728',
  'label': 'republic_paragraph',
  'begin_anchor': 'NL-HaNA_1.01.02_3783_0051-line-2852-2418-534-59',
  'end_anchor': 'NL-HaNA_1.01.02_3783_0051-line-3671-2550-893-58',
  'metadata': {'inventory_num': 3783,
   'source_id': 'session-1728-01-02-num-1',
   'type': 'resolution_paragraph',
   'id': 'session-1728-01-02-num-1-para-19',
   'text_page_num': [1],
   'page_num': [101],
   'start_offset': 941,
   'doc_id': 'session-1728-01-02-num-1',
   'paragraph_index': 0},
  'id': 'session-1728-01-02-num-1-para-19',
  'line_ranges': [{'start': 0,
    'end': 26,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-2852-2418-534-59',
    'text_page_num': 1,
    'page_num': 101},
   {'start': 26,
    'end': 60,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-3683-2403-883-95',
    'text_page_num': 1,
    'page_num': 101},
   {'start': 60,
    'end': 95,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-3786-2449-778-61',
    'text_page_num': 1,
    'page_num': 101},
   {'star

In [8]:
line_ids_vs_indexes = {}
for line in all_annotations:
    if line['label'] == 'line':
        line_ids_vs_indexes.update({line['id'] : line['begin_anchor']})

In [9]:
len(line_ids_vs_indexes)

111124

In [10]:
num_errors = 0
for res in resolution_annotations:    
    try:
        # res['begin_anchor'] = re.sub(r'-column-', r'-col-', res['begin_anchor'])
        # res['end_anchor'] = re.sub(r'-column-', r'-col-', res['end_anchor'])
        res['begin_anchor'] = line_ids_vs_indexes[res['begin_anchor']]
        res['end_anchor'] = line_ids_vs_indexes[res['end_anchor']]
    except:
        res['begin_anchor'] = 0
        res['end_anchor'] = 0
        num_errors += 1
        
if num_errors > 0:
    print(f"number of lookup errors: {num_errors}")
else:
    print("no lookup errors")

no lookup errors


In [11]:
resolution_annotations[20:30]

[{'resource_id': 'volume-1728',
  'label': 'republic_paragraph',
  'begin_anchor': 44,
  'end_anchor': 50,
  'metadata': {'inventory_num': 3783,
   'source_id': 'session-1728-01-02-num-1',
   'type': 'resolution_paragraph',
   'id': 'session-1728-01-02-num-1-para-19',
   'text_page_num': [1],
   'page_num': [101],
   'start_offset': 941,
   'doc_id': 'session-1728-01-02-num-1',
   'paragraph_index': 0},
  'id': 'session-1728-01-02-num-1-para-19',
  'line_ranges': [{'start': 0,
    'end': 26,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-2852-2418-534-59',
    'text_page_num': 1,
    'page_num': 101},
   {'start': 26,
    'end': 60,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-3683-2403-883-95',
    'text_page_num': 1,
    'page_num': 101},
   {'start': 60,
    'end': 95,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-3786-2449-778-61',
    'text_page_num': 1,
    'page_num': 101},
   {'start': 95,
    'end': 134,
    'line_id': 'NL-HaNA_1.01.02_3783_0051-line-2795-2470-844-56',
    

In [12]:
all_annotations.extend(resolution_annotations)

In [13]:
len(all_annotations)

123020

In [14]:
sessions = asearch.get_annotations_of_type('republic_paragraph', all_annotations, resource_id) 
for s in sessions:
    print(s)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




{'resource_id': 'volume-1728', 'label': 'republic_paragraph', 'begin_anchor': 101700, 'end_anchor': 101712, 'metadata': {'inventory_num': 3783, 'source_id': 'session-1728-11-23-num-1', 'type': 'resolution_paragraph', 'id': 'session-1728-11-23-num-1-para-9', 'text_page_num': [881], 'page_num': [981], 'start_offset': 2739, 'doc_id': 'session-1728-11-23-num-1', 'paragraph_index': 0}, 'id': 'session-1728-11-23-num-1-para-9', 'line_ranges': [{'start': 0, 'end': 34, 'line_id': 'NL-HaNA_1.01.02_3783_0491-line-2476-1893-885-141', 'text_page_num': 881, 'page_num': 981}, {'start': 34, 'end': 75, 'line_id': 'NL-HaNA_1.01.02_3783_0491-line-3375-1911-897-60', 'text_page_num': 881, 'page_num': 981}, {'start': 75, 'end': 108, 'line_id': 'NL-HaNA_1.01.02_3783_0491-line-2580-1965-781-65', 'text_page_num': 881, 'page_num': 981}, {'start': 108, 'end': 151, 'line_id': 'NL-HaNA_1.01.02_3783_0491-line-3373-1961-899-57', 'text_page_num': 881, 'page_num': 981}, {'start': 151, 'end': 189, 'line_id': 'NL-HaNA_

In [15]:
all_textlines.slice(25218, 25856)

['Deaartis den 30, Maart',
 'dens dertien stuyvers, sonder meer, ingaande',
 '1728.',
 'met den eersten Januarii deses jaars; met last,',
 'PRAESIDE,',
 'om het geen sy verder souden imogen noo-',
 'Den Heere Van Lynden.',
 'digh hebben, te vinden by een personeelen',
 'PRAESENTIBUS,',
 'omslagh over haare Ingezetenen , ende om',
 'De Heeren Van Singendonck, van Heucke-',
 'dese haar Hoogh Mogende Concessie te doen',
 'som, van Heeckeren tot den Brandtzna-',
 'registreeren ten Comptoire van de Domey-',
 'borgh, van Woiynbergen , met twee extra-',
 'nen van Brabandt, om aan den Raadt ende',
 'ordinaris Gedeputeerden uyt de Provincie',
 'Rentmeester generaal der voorschrove Do-',
 'van Gelderlandt.',
 'meynen te dienen tot narichtinge.',
 'Eelbo, Six, Bors van Waveren, van Cat-',
 'ONtfangen een Missive van den Raudt',
 'tenborgb, de Raadt.',
 'Bout, Noey, van Hoorn.',
 'van Staate, geschreven alhier in den',
 'Taats van Amerongen.',
 'Hage den vyf en twintighsten deser',
 'Van Schwartze

Reconstrueer attendant_lists uit sessions en resolutions

Lees 1728_pres.json in uit Rik's bestand

In [16]:
with open(datadir + '1728_pres_211209.json', 'r') as filehandle:
    pres_data = json.loads(filehandle.read())
    
len(pres_data)

298

In [17]:
attendance_lists = asearch.get_annotations_of_type('attendance_list', all_annotations, resource_id)
alists = list(attendance_lists)
alists

[{'resource_id': 'volume-1728',
  'label': 'attendance_list',
  'begin_anchor': 0,
  'end_anchor': 33,
  'metadata': {'inventory_num': 3783,
   'source_id': 'session-1728-01-02-num-1',
   'type': 'attendance_list',
   'id': 'session-1728-01-02-num-1-attendance_list',
   'session_date': '1728-01-02',
   'session_id': 'session-1728-01-02-num-1',
   'session_num': 1,
   'president': None,
   'session_year': 1728,
   'session_month': 1,
   'session_day': 2,
   'session_weekday': 'Veneris',
   'text_page_num': [1],
   'index_timestamp': '2021-12-09T16:02:59.956499'},
  'id': 'session-1728-01-02-num-1-attendance_list',
  'evidence': []},
 {'resource_id': 'volume-1728',
  'label': 'attendance_list',
  'begin_anchor': 573,
  'end_anchor': 593,
  'metadata': {'inventory_num': 3783,
   'source_id': 'session-1728-01-03-num-1',
   'type': 'attendance_list',
   'id': 'session-1728-01-03-num-1-attendance_list',
   'session_date': '1728-01-03',
   'session_id': 'session-1728-01-03-num-1',
   'session

Doorloop  pres_data per zittingsdag. 1) vul attendents_list voor die zittingsdag aan met metadata 2) maak attendants voor iedere aangetroffen span. Reken offset + end om naar line_id(s)

In [18]:
def get_lines_for_offset(offset, end, meeting_lines, offset_in_resource):
    result = []
    line_start = 0
    for line_index, line_text in enumerate(meeting_lines):
        line_end = line_start + len(line_text)+1
        if (offset >= line_start and offset < line_end) or\
            (end >= line_start and end < line_end) or\
            (offset <= line_start and end >= line_end):
            
            result.append(line_index)
        line_start = line_end
    result = [line_index + offset_in_resource for line_index in result]

    return result

In [19]:
attendant_classes = ('president', 'delegate', 'raadpensionaris')

def create_attendants_for_attlist(attlist, att_list_annot, session_id, meeting_lines):
    attendant_annots = []
    spans = attlist['spans']
    for index, s in enumerate(spans):
        if s['class'] in attendant_classes:
            attendant = {'resource_id': resource_id, 'label': 'attendant'}
            attendant['id'] = session_id + '-attendant-' + str(index)
            attendant['metadata'] = s

            overlapping_lines = get_lines_for_offset(s['offset'], s['end'], meeting_lines, \
                            att_list_annot['begin_anchor'])

            if len(overlapping_lines) == 0:
                print(f"Attendant not overlapping with attendance_list for: {session_id}")
                print(f"   non-overlapping attendant: {s}")
            else:
                attendant['begin_anchor'] = min(overlapping_lines)
                attendant['end_anchor'] = max(overlapping_lines)
        
                attendant_annots.append(attendant)

    return attendant_annots

In [20]:
pres_data[2]

{'metadata': {'inventory_num': 3783,
  'coords': [],
  'text': '1728.PRAESIDE, Den Heere Van Wassenaer. PRAESENTIBUS, De Heeren Van Singendonck , van Heeckeren tot Barlham, van Heeckeren tot den Brandizenborgh, van Wynbergen, Torck, Umbgroeven, met ceu extraordinaris Gedeputeerden yt de Provincie van Gelderlandt. Six, Bors van Waveren, de Raadt. Velters, Noey, van Hoorn. Van Renswoude. Van Schwartzenberg, Vegilin. Vaan Haarsolte. Van Tamminga.DE Nesostutien, gisteren genomen , ',
  'zittingsdag_id': 'session-1728-01-03-num-1',
  'url': None},
 'spans': [{'offset': 0,
   'end': 4,
   'class': 'preamble',
   'pattern': '1728',
   'delegate_id': 0,
   'delegate_name': '',
   'delegate_score': 0},
  {'offset': 15,
   'end': 24,
   'class': 'heere',
   'pattern': 'Den Heere',
   'delegate_id': 0,
   'delegate_name': '',
   'delegate_score': 0},
  {'offset': 25,
   'end': 38,
   'class': 'president',
   'pattern': 'Van Wassenaer',
   'delegate_id': 0,
   'delegate_name': '',
   'delegate_sco

Verken even welke 'class' waarden over attendants gaan, filter die dan.

In [21]:
classes = {}
for p in pres_data:
    for span in p['spans']:
        c = span['class']
        if c in classes.keys():
            classes[c] += 1
        else:
            classes[c] = 1

print(classes)

{'nihil': 48, 'heere': 581, 'president': 256, 'presentibus': 286, 'province': 315, 'resumption': 84, 'delegate': 3287, 'preamble': 213, 'resolution_summarized': 185, 'pre': 26, 'raadpensionaris': 111}


Rik's meeting_lines zijn verdwenen, en moet ik nu ergens anders vandaan zien te halen.

Rik gebruikt offset en end tov een concatenatie van tekstregels. Check wat hij precies doet door 'meeting_lines' exact te vergelijken met 'text' uit 'pres_data'. Doe dat voor een random sessie: session-1728-02-20-num-1

In [22]:
sess_02_20 = asearch.get_annotation_by_id('session-1728-02-25-num-1', all_annotations)
att_list_02_20 = asearch.get_annotations_of_type_overlapping('attendance_list', \
                            sess_02_20['begin_anchor'], sess_02_20['end_anchor'], all_annotations, resource_id)

In [23]:
alist = list(att_list_02_20)
print(alist)
lines = all_textlines.slice(alist[0]['begin_anchor'], alist[0]['end_anchor'])
lines

[{'resource_id': 'volume-1728', 'label': 'attendance_list', 'begin_anchor': 14747, 'end_anchor': 14772, 'metadata': {'inventory_num': 3783, 'source_id': 'session-1728-02-25-num-1', 'type': 'attendance_list', 'id': 'session-1728-02-25-num-1-attendance_list', 'session_date': '1728-02-25', 'session_id': 'session-1728-02-25-num-1', 'session_num': 1, 'president': None, 'session_year': 1728, 'session_month': 2, 'session_day': 25, 'session_weekday': 'Mercurii', 'text_page_num': [128], 'index_timestamp': '2021-12-09T16:00:12.481552'}, 'id': 'session-1728-02-25-num-1-attendance_list', 'evidence': []}]


['Mercurii den 25. Februarii',
 '1728.',
 'PRAESIDE,',
 'Den Heere Bout.',
 'PRAESENTIBUS,',
 'De Heeren Van Lynden, van Singendonck,',
 'yvan Heuckelom, van Heeckeren tot den',
 "Brandtzenborgh, van W'ynbergen , met",
 'een extraordinaris Gedeputeerde uyt de Pro-',
 'vincie van Gelderlandt.',
 'Van Wassenaer, vanden Boetzelaar, van-',
 'der Dussen, Bors van Waveren, van',
 'Cattenborgh, Veckhoven, Raadtpensiona-',
 'ris van Slingelandt.',
 'Noey, van Hoorn , met tqwee extraordina-',
 'ris Gedeputeerden uyt de Provincie van',
 'Zeelandt.',
 'Taats van Amerongen, van Rensqwoude.',
 'Van Sehwartzenbergh, Vegilin.',
 'Van Haarsolte, van Isselmuden.',
 'Van Tamminga.',
 'DE Resrolutien , gueeren zeno-',
 'men, zyn gelesen en geresumeert,',
 'gelijck oock geresumeert ende ge-',
 'arresteert zyn de Depesches daar uyt resul-',
 'teerende.']

In [24]:
pres_02_20 = [a for a in pres_data[1:] if a['metadata']['zittingsdag_id'] == 'session-1728-02-25-num-1']
riktext_02_20 = pres_02_20[0]['metadata']['text']
riktext_02_20

"Mercurii den 25. Februarii1728.PRAESIDE, Den Heere Bout. PRAESENTIBUS, De Heeren Van Lynden, van Singendonck, yvan Heuckelom, van Heeckeren tot den Brandtzenborgh, van W'ynbergen , met een extraordinaris Gedeputeerde uyt de Provincie van Gelderlandt. Van Wassenaer, vanden Boetzelaar, vander Dussen, Bors van Waveren, van Cattenborgh, Veckhoven, Raadtpensionaris van Slingelandt. Noey, van Hoorn , met tqwee extraordinaris Gedeputeerden uyt de Provincie van Zeelandt. Taats van Amerongen, van Rensqwoude. Van Sehwartzenbergh, Vegilin. Van Haarsolte, van Isselmuden. Van Tamminga. DE Resrolutien , gueeren zenomen, "

In [25]:
text_02_20 = "Mercurii den 25. Februarii 1728. PRAESIDE, Den Heere Bout. PRAESENTIBUS, De Heeren Van Lynden, van Singendonck, yvan Heuckelom, van Heeckeren tot den Brandtzenborgh, van W'ynbergen , met een extraordinaris Gedeputeerde uyt de Provincie van Gelderlandt. Van Wassenaer, vanden Boetzelaar, vander Dussen, Bors van Waveren, van Cattenborgh, Veckhoven, Raadtpensionaris van Slingelandt. Noey, van Hoorn , met tqwee extraordinaris Gedeputeerden uyt de Provincie van Zeelandt. Taats van Amerongen, van Rensqwoude. Van Sehwartzenbergh, Vegilin. Van Haarsolte, van Isselmuden. Van Tamminga."

Retrieve line_ranges van met attendance_list overlappende paragraph(s)

In [43]:
paragraphs_02_20 = asearch.get_annotations_of_type_overlapping('republic_paragraph', \
                            alist[0]['begin_anchor'], alist[0]['end_anchor'], all_annotations, resource_id)
paras = list(paragraphs_02_20)

offset_table = {}


print(paras[0]['id'])
for range in paras[0]['line_ranges']:
    offset_table[range['line_id']] = (range['start'], range['end'])
       
offset_table

session-1728-02-25-num-1-para-1


{'NL-HaNA_1.01.02_3783_0115-line-495-2211-634-78': (0, 27),
 'NL-HaNA_1.01.02_3783_0115-line-735-2286-143-57': (27, 33),
 'NL-HaNA_1.01.02_3783_0115-line-541-2358-521-61': (33, 43),
 'NL-HaNA_1.01.02_3783_0115-line-412-2406-357-51': (43, 59),
 'NL-HaNA_1.01.02_3783_0115-line-472-2471-657-54': (59, 73),
 'NL-HaNA_1.01.02_3783_0115-line-410-2520-841-59': (73, 112),
 'NL-HaNA_1.01.02_3783_0115-line-458-2574-793-50': (112, 150),
 'NL-HaNA_1.01.02_3783_0115-line-458-2616-791-59': (150, 187),
 'NL-HaNA_1.01.02_3783_0115-line-456-2665-797-55': (187, 229),
 'NL-HaNA_1.01.02_3783_0115-line-456-2716-435-46': (229, 253),
 'NL-HaNA_1.01.02_3783_0115-line-412-2760-841-56': (253, 290),
 'NL-HaNA_1.01.02_3783_0115-line-458-2812-795-54': (290, 324),
 'NL-HaNA_1.01.02_3783_0115-line-461-2858-790-63': (324, 361),
 'NL-HaNA_1.01.02_3783_0115-line-463-2907-380-55': (361, 382),
 'NL-HaNA_1.01.02_3783_0115-line-415-2954-838-54': (382, 421),
 'NL-HaNA_1.01.02_3783_0115-line-458-3001-793-56': (421, 460),
 'NL

 iterate over lines. For each line.id, find offset and end in text.
 Then, for each attendants' offset and end, find matching line(s) plus local char offsets

In [45]:
line_annots = asearch.get_annotations_of_type_overlapping('line', \
                            paras[0]['begin_anchor'], paras[0]['end_anchor'], all_annotations, resource_id)
line_annots_02_20 = list(line_annots)
for l in line_annots_02_20:
    line_text = all_textlines.slice(l['begin_anchor'], l['end_anchor'])
    span = offset_table[l['id']]
    print(f"{span}\t{text_02_20[span[0]:span[1]]}\t{line_text}")

    

(0, 27)	Mercurii den 25. Februarii 	['Mercurii den 25. Februarii']
(27, 33)	1728. 	['1728.']
(33, 43)	PRAESIDE, 	['PRAESIDE,']
(43, 59)	Den Heere Bout. 	['Den Heere Bout.']
(59, 73)	PRAESENTIBUS, 	['PRAESENTIBUS,']
(73, 112)	De Heeren Van Lynden, van Singendonck, 	['De Heeren Van Lynden, van Singendonck,']
(112, 150)	yvan Heuckelom, van Heeckeren tot den 	['yvan Heuckelom, van Heeckeren tot den']
(150, 187)	Brandtzenborgh, van W'ynbergen , met 	["Brandtzenborgh, van W'ynbergen , met"]
(187, 229)	een extraordinaris Gedeputeerde uyt de Pro	['een extraordinaris Gedeputeerde uyt de Pro-']
(229, 253)	vincie van Gelderlandt. 	['vincie van Gelderlandt.']
(253, 290)	Van Wassenaer, vanden Boetzelaar, van	['Van Wassenaer, vanden Boetzelaar, van-']
(290, 324)	der Dussen, Bors van Waveren, van 	['der Dussen, Bors van Waveren, van']
(324, 361)	Cattenborgh, Veckhoven, Raadtpensiona	['Cattenborgh, Veckhoven, Raadtpensiona-']
(361, 382)	ris van Slingelandt. 	['ris van Slingelandt.']
(382, 421)	Noey, v

haal pres_data attendants voor 25 februari op

In [46]:
attendants_02_20 = pres_02_20[0]['spans']
attendants_02_20 = [att for att in attendants_02_20 if att['class'] in attendant_classes]
print(len(attendants_02_20))
attendants_02_20

15


[{'offset': 51,
  'end': 55,
  'class': 'president',
  'pattern': 'Bout',
  'delegate_id': 0,
  'delegate_name': '',
  'delegate_score': 0},
 {'offset': 346,
  'end': 362,
  'class': 'raadpensionaris',
  'pattern': 'Raadtpensionaris',
  'delegate_id': 0,
  'delegate_name': '',
  'delegate_score': 0},
 {'offset': 468,
  'end': 487,
  'class': 'delegate',
  'pattern': 'Taats van Amerongen',
  'delegate_id': 17821,
  'delegate_name': 'Borre van Amerongen',
  'delegate_score': 0},
 {'offset': 300,
  'end': 316,
  'class': 'delegate',
  'pattern': 'Bors van Waveren',
  'delegate_id': 14821,
  'delegate_name': 'Bors van Waveren',
  'delegate_score': 0},
 {'offset': 526,
  'end': 533,
  'class': 'delegate',
  'pattern': 'Vegilin',
  'delegate_id': 12332,
  'delegate_name': 'Vegelin van Claerbergen',
  'delegate_score': 0},
 {'offset': 363,
  'end': 378,
  'class': 'delegate',
  'pattern': 'van Slingelandt',
  'delegate_id': 14079,
  'delegate_name': 'van Slingelandt',
  'delegate_score': 0},


In [47]:
for a in attendants_02_20:
    print(f"{a['pattern']}")
    print(f"\tmatching riktext-segment:    {riktext_02_20[a['offset']:a['end']]}")
    print(f"\tmatching marijntext-segment: {text_02_20[a['offset']+2:a['end']+2]}")

Bout
	matching riktext-segment:    Bout
	matching marijntext-segment: Bout
Raadtpensionaris
	matching riktext-segment:    Raadtpensionaris
	matching marijntext-segment: Raadtpensionaris
Taats van Amerongen
	matching riktext-segment:    Taats van Amerongen
	matching marijntext-segment: Taats van Amerongen
Bors van Waveren
	matching riktext-segment:    Bors van Waveren
	matching marijntext-segment: Bors van Waveren
Vegilin
	matching riktext-segment:    Vegilin
	matching marijntext-segment: Vegilin
van Slingelandt
	matching riktext-segment:    van Slingelandt
	matching marijntext-segment: van Slingelandt
Noey
	matching riktext-segment:    Noey
	matching marijntext-segment: Noey
Van Sehwartzenbergh
	matching riktext-segment:    Van Sehwartzenbergh
	matching marijntext-segment: Van Sehwartzenbergh
vander Dussen
	matching riktext-segment:    vander Dussen
	matching marijntext-segment: vander Dussen
Veckhoven
	matching riktext-segment:    Veckhoven
	matching marijntext-segment: Veckhoven
van 

In [48]:
marijn_offsets = [(a['offset']+2, a['end']+2) for a in attendants_02_20]
marijn_offsets

[(53, 57),
 (348, 364),
 (470, 489),
 (302, 318),
 (528, 535),
 (365, 380),
 (382, 386),
 (507, 526),
 (287, 300),
 (337, 346),
 (128, 164),
 (112, 126),
 (388, 397),
 (268, 285),
 (320, 335)]

In [49]:
for os in marijn_offsets:
    print(text_02_20[os[0]:os[1]])

Bout
Raadtpensionaris
Taats van Amerongen
Bors van Waveren
Vegilin
van Slingelandt
Noey
Van Sehwartzenbergh
vander Dussen
Veckhoven
van Heeckeren tot den Brandtzenborgh
yvan Heuckelom
van Hoorn
vanden Boetzelaar
van Cattenborgh


Find line_ids from offset_table

In [66]:
def is_overlapping(interval1, interval2):
    if (interval1[0] >= interval2[0] and interval1[0] < interval2[1]) or\
        (interval1[1] >= interval2[0] and interval1[1] < interval2[1]) or\
        (interval1[0] <= interval2[0] and interval1[1] >= interval2[1]):
        
        return True
    else:
        return False

for os in marijn_offsets:
    a_lines = [l for l in offset_table.keys() if is_overlapping(offset_table[l], os)]
    first_index = a_lines[0]
    last_index = a_lines[0] if len(a_lines) == 1 else a_lines[-1]
    
    for al in a_lines:
        l_offset = offset_table[al]
        print(all_textlines.slice(line_ids_vs_indexes[first_index], line_ids_vs_indexes[last_index])[0][l_offset[1]-os[0]:])
    print(all_textlines.slice(line_ids_vs_indexes[first_index], line_ids_vs_indexes[last_index]))
    print('\n')

ere Bout.
['Den Heere Bout.']


Veckhoven, Raadtpensiona-
ona-
['Cattenborgh, Veckhoven, Raadtpensiona-', 'ris van Slingelandt.']


Zeelandt.

['Zeelandt.', 'Taats van Amerongen, van Rensqwoude.']


averen, van
['der Dussen, Bors van Waveren, van']


rtzenbergh, Vegilin.
['Van Sehwartzenbergh, Vegilin.']


dt.
['ris van Slingelandt.']


ris van Slingelandt.

['ris van Slingelandt.', 'Noey, van Hoorn , met tqwee extraordina-']


Taats van Amerongen, van Rensqwoude.
woude.
['Taats van Amerongen, van Rensqwoude.', 'Van Sehwartzenbergh, Vegilin.']


 Wassenaer, vanden Boetzelaar, van-
-
['Van Wassenaer, vanden Boetzelaar, van-', 'der Dussen, Bors van Waveren, van']


Raadtpensiona-
['Cattenborgh, Veckhoven, Raadtpensiona-']


eckeren tot den

['yvan Heuckelom, van Heeckeren tot den', "Brandtzenborgh, van W'ynbergen , met"]


De Heeren Van Lynden, van Singendonck,

['De Heeren Van Lynden, van Singendonck,', 'yvan Heuckelom, van Heeckeren tot den']


ordina-
['Noey, van Hoorn , met tqwee ext

Nu nog de local offsets bepalen: begin_char_offset, end_char_offset

In [53]:
for l in vddussen_lines:
    print(offset_table[l])

NameError: name 'vddussen_lines' is not defined

In [55]:
vddussen_marijn_offsets

NameError: name 'vddussen_marijn_offsets' is not defined

In [99]:
all_textlines.slice(14757, 14757)[0][287-253:]

'van-'

In [100]:
all_textlines.slice(14758, 14758)[0][:300-290]

'der Dussen'

In [120]:
attendant_annots = []

for attlist in pres_data[1:]: # skip the first, seems to be non-existing
# for attlist in pres_data:
    session_id = attlist['metadata']['zittingsdag_id']
    sess_annot = asearch.get_annotation_by_id(session_id, all_annotations)
    if sess_annot == None:
        print(f"no session found for: {session_id}")
        continue
    
    overlapping_att_lists = asearch.get_annotations_of_type_overlapping('attendance_list', \
                            sess_annot['begin_anchor'], sess_annot['end_anchor'], all_annotations, resource_id)
    
    overlapping_alists = list(overlapping_att_lists) # generator to list
    if len(overlapping_alists) == 0:
        print(f"no overlapping att_lists for session: {session_id}")
        continue
    else:
        att_list_annot = overlapping_alists[0]
        meeting_lines = all_textlines.slice(att_list_annot['begin_anchor'], att_list_annot['end_anchor'])
        results = create_attendants_for_attlist(attlist, att_list_annot, session_id, meeting_lines)
        attendant_annots.extend(results)
    
attendant_annots[:10]

no session found for: session-1728-01-23-num-1
no session found for: session-1728-02-17-num-1
Attendant not overlapping with attendance_list for: session-1728-03-30-num-1
   non-overlapping attendant: {'offset': 746, 'end': 750, 'class': 'delegate', 'pattern': 'Bout', 'delegate_id': 16901, 'delegate_name': 'Bout', 'delegate_score': 0}
Attendant not overlapping with attendance_list for: session-1728-03-30-num-1
   non-overlapping attendant: {'offset': 752, 'end': 756, 'class': 'delegate', 'pattern': 'Noey', 'delegate_id': 13278, 'delegate_name': 'Boner', 'delegate_score': 0}
Attendant not overlapping with attendance_list for: session-1728-03-30-num-1
   non-overlapping attendant: {'offset': 758, 'end': 767, 'class': 'delegate', 'pattern': 'van Hoorn', 'delegate_id': 14078, 'delegate_name': 'van Hoornbeek', 'delegate_score': 0}
Attendant not overlapping with attendance_list for: session-1728-03-30-num-1
   non-overlapping attendant: {'offset': 737, 'end': 745, 'class': 'delegate', 'patte

[{'resource_id': 'volume-1728',
  'label': 'attendant',
  'id': 'session-1728-01-02-num-1-attendant-2',
  'metadata': {'offset': 117,
   'end': 130,
   'class': 'president',
   'pattern': 'Van Wassenaer',
   'delegate_id': 0,
   'delegate_name': '',
   'delegate_score': 0},
  'begin_anchor': 7,
  'end_anchor': 7},
 {'resource_id': 'volume-1728',
  'label': 'attendant',
  'id': 'session-1728-01-02-num-1-attendant-5',
  'metadata': {'offset': 214,
   'end': 219,
   'class': 'delegate',
   'pattern': 'Torck',
   'delegate_id': 17866,
   'delegate_name': 'Torck',
   'delegate_score': 0},
  'begin_anchor': 10,
  'end_anchor': 10},
 {'resource_id': 'volume-1728',
  'label': 'attendant',
  'id': 'session-1728-01-02-num-1-attendant-6',
  'metadata': {'offset': 221,
   'end': 231,
   'class': 'delegate',
   'pattern': 'Umigroeven',
   'delegate_id': 17268,
   'delegate_name': 'Umbgrove',
   'delegate_score': 0},
  'begin_anchor': 10,
  'end_anchor': 11},
 {'resource_id': 'volume-1728',
  'label

In [121]:
len(attendant_annots)

3607

In [122]:
all_textlines.slice(579, 580)

['Brandizenborgh, van Wynbergen, Torck,',
 'Umbgroeven, met ceu extraordinaris Ge-']

In [124]:
all_annotations.extend(attendant_annots)

In [125]:
len(all_annotations)

126627

In [24]:
# voor iedere annotatie, bepaal image_range en voeg deze toe

def get_bounding_box_for(annotations): 
    ann_list = list(annotations) # because a generator can only be used once
    
    min_left = min([ann['image_coords']['left'] for ann in ann_list if 'image_coords' in ann])
    max_right = max([ann['image_coords']['right'] for ann in ann_list if 'image_coords' in ann])
    min_top = min([ann['image_coords']['top'] for ann in ann_list if 'image_coords' in ann])
    max_bottom = max([ann['image_coords']['bottom'] for ann in ann_list if 'image_coords' in ann])
    height = max_bottom - min_top
    width = max_right - min_left

    return {'left': min_left, 'right': max_right, 'top': min_top, 'bottom': max_bottom, 'height': height, 'width': width}

def add_image_range(ann):
    ann['image_range'] = []
    
    ann_begin=ann['begin_anchor']
    ann_end=ann['end_anchor']
        
    # loop over scans die overlappen met de annotatie
    for a in asearch.get_annotations_of_type_overlapping('scanpage',ann_begin,ann_end,all_annotations, resource_id):
        bounding_boxes = []
        image_url = a['iiif_url']
        
        scan_begin=a['begin_anchor']
        scan_end=a['end_anchor']
        
        # loop over alle kolommen op de betreffende scan. Per kolom, bereken bounding box voor 
        # overlappende resolutieregels
        for clm in asearch.get_annotations_of_type_overlapping('columns',scan_begin,scan_end,all_annotations, resource_id):
            clm_begin=clm['begin_anchor']
            clm_end=clm['end_anchor']
            
            # bepaal overlap_begin en overlap_end indexes voor kolom
            overlap_begin=max(ann_begin, clm_begin)
            overlap_end=min(ann_end, clm_end)
                        
            # bepaal hieruit de bounding box coords voor deze kolom
            if overlap_end-overlap_begin >= 0: # resolution and column are overlapping
                bounding_box=get_bounding_box_for(asearch.get_annotations_of_type_overlapping('lines',\
                                                        overlap_begin,overlap_end,all_annotations, resource_id))
                bounding_boxes.append(bounding_box)
        
        ann['image_range'].append((image_url, bounding_boxes))
    return       

In [25]:
def add_region_links(ann):
    region_links = []
    try:
        for image_url, regions in ann['image_range']:
            for coords in regions:
                # construct iiif_url from image_url and coords
                coord_str = f"{coords['left']},{coords['top']},{coords['width']},{coords['height']}"
                region_url = re.sub(r'(full)/(,\d+)', rf'{coord_str}/\1', image_url)
                region_links.append(region_url)
    except:
        print('error: annotation without image range')
        
    ann['region_links'] = region_links
    return

In [26]:
# for a in asearch.get_annotations_of_type('attendantslists', all_annotations):
#    add_image_range(a)  
#    add_region_links(a)
    
# for a in asearch.get_annotations_of_type('resolutions', all_annotations):
#    add_image_range(a)  
#    add_region_links(a)

for i, a in enumerate(all_annotations):
    add_image_range(a)
    add_region_links(a)
    if i % 100 == 0 and 'id' in a:
        print(f"{i}: {a['id']}")

0: NL-HaNA_1.01.02_3783_0089-page-176-column-0-tr-1-line-0
100: NL-HaNA_1.01.02_3783_0089-page-176-column-1-tr-2-line-22
200: NL-HaNA_1.01.02_3783_0089-page-177-column-1-tr-0-line-4
300: NL-HaNA_1.01.02_3783_0090-page-178-column-0-tr-0-line-20
400: NL-HaNA_1.01.02_3783_0090-page-178-column-1-tr-0-line-56
500: NL-HaNA_1.01.02_3783_0051-page-101-column-0-tr-1-line-3
600: NL-HaNA_1.01.02_3783_0052-page-102-column-0-tr-0-line-5
700: NL-HaNA_1.01.02_3783_0052-page-102-column-0-tr-2-line-4
800: NL-HaNA_1.01.02_3783_0052-page-102-column-1-tr-4-line-0
900: NL-HaNA_1.01.02_3783_0052-page-103-column-0-tr-0-line-35
1000: NL-HaNA_1.01.02_3783_0052-page-103-column-1-tr-1-line-10
1100: NL-HaNA_1.01.02_3783_0053-page-104-column-0-tr-0-line-14
1200: NL-HaNA_1.01.02_3783_0053-page-104-column-1-tr-0-line-3
1300: NL-HaNA_1.01.02_3783_0053-page-104-column-1-tr-1-line-48
1400: NL-HaNA_1.01.02_3783_0053-page-105-column-0-tr-2-line-22
1500: NL-HaNA_1.01.02_3783_0053-page-105-column-1-tr-1-line-23
1600: NL-Ha

13300: NL-HaNA_1.01.02_3783_0078-page-154-column-0-tr-1-line-21
13400: NL-HaNA_1.01.02_3783_0078-page-154-column-1-tr-0-line-23
13500: NL-HaNA_1.01.02_3783_0078-page-155-column-0-tr-1-line-4
13600: NL-HaNA_1.01.02_3783_0078-page-155-column-1-tr-0-line-7
13700: NL-HaNA_1.01.02_3783_0079-page-156-column-0-tr-0-line-4
13800: NL-HaNA_1.01.02_3783_0079-page-156-column-0-tr-2-line-16
13900: NL-HaNA_1.01.02_3783_0079-page-156-column-1-tr-3-line-4
14000: NL-HaNA_1.01.02_3783_0079-page-157-column-0-tr-0-line-36
14100: NL-HaNA_1.01.02_3783_0079-page-157-column-1-tr-0-line-26
14200: NL-HaNA_1.01.02_3783_0080-page-158-column-0-tr-0-line-15
14300: NL-HaNA_1.01.02_3783_0080-page-158-column-1-tr-0-line-5
14400: NL-HaNA_1.01.02_3783_0080-page-158-column-1-tr-0-line-55
14500: NL-HaNA_1.01.02_3783_0080-page-159-column-0-tr-0-line-44
14600: NL-HaNA_1.01.02_3783_0080-page-159-column-1-tr-1-line-17
14700: NL-HaNA_1.01.02_3783_0081-page-160-column-0-tr-2-line-2
14800: NL-HaNA_1.01.02_3783_0081-page-160-colu

26400: NL-HaNA_1.01.02_3783_0104-page-207-column-1-tr-7-line-5
26500: NL-HaNA_1.01.02_3783_0105-page-208-column-0-tr-2-line-0
26600: NL-HaNA_1.01.02_3783_0105-page-208-column-1-tr-0-line-28
26700: NL-HaNA_1.01.02_3783_0105-page-209-column-0-tr-1-line-4
26800: NL-HaNA_1.01.02_3783_0105-page-209-column-1-tr-0-line-13
26900: NL-HaNA_1.01.02_3783_0106-page-210-column-0-tr-0-line-6
27000: NL-HaNA_1.01.02_3783_0106-page-210-column-0-tr-0-line-56
27100: NL-HaNA_1.01.02_3783_0106-page-210-column-1-tr-1-line-41
27200: NL-HaNA_1.01.02_3783_0106-page-211-column-0-tr-1-line-8
27300: NL-HaNA_1.01.02_3783_0106-page-211-column-1-tr-0-line-21
27400: NL-HaNA_1.01.02_3783_0107-page-212-column-0-tr-0-line-10
27500: meeting-1728-02-02-session-1-column-2
27600: NL-HaNA_1.01.02_3783_0107-page-212-column-1-tr-0-line-49
27700: NL-HaNA_1.01.02_3783_0107-page-213-column-0-tr-1-line-2
27800: NL-HaNA_1.01.02_3783_0107-page-213-column-1-tr-2-line-0
27900: NL-HaNA_1.01.02_3783_0108-page-214-column-0-tr-0-line-18
28

39600: NL-HaNA_1.01.02_3783_0132-page-262-column-0-tr-0-line-4
39700: NL-HaNA_1.01.02_3783_0132-page-262-column-0-tr-2-line-27
39800: NL-HaNA_1.01.02_3783_0132-page-262-column-1-tr-1-line-33
39900: NL-HaNA_1.01.02_3783_0132-page-263-column-0-tr-0-line-33
40000: NL-HaNA_1.01.02_3783_0132-page-263-column-1-tr-1-line-10
40100: NL-HaNA_1.01.02_3783_0133-page-264-column-0-tr-0-line-13
40200: NL-HaNA_1.01.02_3783_0133-page-264-column-2-tr-0-line-1
40300: NL-HaNA_1.01.02_3783_0133-page-264-column-2-tr-3-line-24
40400: NL-HaNA_1.01.02_3783_0133-page-265-column-0-tr-1-line-24
40500: NL-HaNA_1.01.02_3783_0133-page-265-column-1-tr-2-line-5
40600: NL-HaNA_1.01.02_3783_0134-page-266-column-0-tr-0-line-14
40700: NL-HaNA_1.01.02_3783_0134-page-266-column-1-tr-0-line-3
40800: NL-HaNA_1.01.02_3783_0134-page-266-column-1-tr-0-line-53
40900: NL-HaNA_1.01.02_3783_0134-page-267-column-0-tr-2-line-9
41000: NL-HaNA_1.01.02_3783_0134-page-267-column-1-tr-2-line-24
41100: NL-HaNA_1.01.02_3783_0135-page-268-col

53000: NL-HaNA_1.01.02_3783_0159-page-317-column-0-tr-0-line-14
53100: NL-HaNA_1.01.02_3783_0159-page-317-column-1-tr-0-line-2
53200: NL-HaNA_1.01.02_3783_0159-page-317-column-1-tr-2-line-12
53300: NL-HaNA_1.01.02_3783_0160-page-318-column-0-tr-2-line-1
53400: NL-HaNA_1.01.02_3783_0160-page-318-column-1-tr-0-line-7
53500: NL-HaNA_1.01.02_3783_0160-page-318-column-1-tr-3-line-3
53600: NL-HaNA_1.01.02_3783_0160-page-319-column-0-tr-3-line-3
53700: NL-HaNA_1.01.02_3783_0160-page-319-column-1-tr-0-line-13
53800: NL-HaNA_1.01.02_3783_0161-page-320-column-0-tr-0-line-2
53900: NL-HaNA_1.01.02_3783_0161-page-320-column-1-tr-0-line-0
54000: NL-HaNA_1.01.02_3783_0161-page-320-column-1-tr-1-line-16
54100: NL-HaNA_1.01.02_3783_0161-page-321-column-0-tr-1-line-23
54200: NL-HaNA_1.01.02_3783_0161-page-321-column-1-tr-0-line-32
54300: NL-HaNA_1.01.02_3783_0162-page-322-column-0-tr-1-line-12
54400: NL-HaNA_1.01.02_3783_0162-page-322-column-1-tr-0-line-12
54500: NL-HaNA_1.01.02_3783_0162-page-323-colum

66000: NL-HaNA_1.01.02_3783_0186-page-370-column-0-tr-2-line-11
66100: NL-HaNA_1.01.02_3783_0186-page-370-column-1-tr-4-line-9
66200: NL-HaNA_1.01.02_3783_0186-page-371-column-0-tr-2-line-11
66300: NL-HaNA_1.01.02_3783_0186-page-371-column-1-tr-1-line-16
66400: NL-HaNA_1.01.02_3783_0187-page-372-column-0-tr-0-line-10
66500: NL-HaNA_1.01.02_3783_0187-page-372-column-1-tr-0-line-0
66600: NL-HaNA_1.01.02_3783_0187-page-372-column-1-tr-2-line-35
66700: NL-HaNA_1.01.02_3783_0187-page-373-column-0-tr-4-line-1
66800: NL-HaNA_1.01.02_3783_0187-page-373-column-1-tr-0-line-31
66900: NL-HaNA_1.01.02_3783_0188-page-374-column-0-tr-0-line-21
67000: NL-HaNA_1.01.02_3783_0188-page-374-column-1-tr-0-line-10
67200: NL-HaNA_1.01.02_3783_0188-page-375-column-0-tr-3-line-9
67300: NL-HaNA_1.01.02_3783_0188-page-375-column-1-tr-2-line-3
67400: NL-HaNA_1.01.02_3783_0189-page-376-column-0-tr-1-line-23
67500: NL-HaNA_1.01.02_3783_0189-page-376-column-1-tr-1-line-3
67600: NL-HaNA_1.01.02_3783_0189-page-377-colu

79100: NL-HaNA_1.01.02_3783_0213-page-424-column-1-tr-0-line-28
79200: NL-HaNA_1.01.02_3783_0213-page-425-column-0-tr-1-line-6
79300: NL-HaNA_1.01.02_3783_0213-page-425-column-1-tr-0-line-5
79400: NL-HaNA_1.01.02_3783_0213-page-425-column-1-tr-0-line-55
79500: NL-HaNA_1.01.02_3783_0214-page-426-column-0-tr-1-line-4
79600: NL-HaNA_1.01.02_3783_0214-page-426-column-1-tr-1-line-0
79700: NL-HaNA_1.01.02_3783_0214-page-427-column-0-tr-0-line-0
79800: NL-HaNA_1.01.02_3783_0214-page-427-column-0-tr-4-line-16
79900: NL-HaNA_1.01.02_3783_0214-page-427-column-1-tr-2-line-0
80000: NL-HaNA_1.01.02_3783_0215-page-428-column-0-tr-1-line-14
80100: NL-HaNA_1.01.02_3783_0215-page-428-column-1-tr-1-line-8
80200: NL-HaNA_1.01.02_3783_0215-page-429-column-0-tr-1-line-6
80300: NL-HaNA_1.01.02_3783_0215-page-429-column-1-tr-1-line-2
80400: NL-HaNA_1.01.02_3783_0216-page-430-column-0-tr-0-line-3
80500: NL-HaNA_1.01.02_3783_0216-page-430-column-0-tr-0-line-53
80600: NL-HaNA_1.01.02_3783_0216-page-430-column-1

92200: NL-HaNA_1.01.02_3783_0240-page-478-column-0-tr-1-line-25
92300: NL-HaNA_1.01.02_3783_0240-page-478-column-2-tr-1-line-5
92400: NL-HaNA_1.01.02_3783_0240-page-479-column-0-tr-0-line-20
92500: NL-HaNA_1.01.02_3783_0240-page-479-column-1-tr-0-line-9
92600: meeting-1728-05-05-session-1-column-13
92700: NL-HaNA_1.01.02_3783_0241-page-480-column-0-tr-0-line-49
92800: NL-HaNA_1.01.02_3783_0241-page-480-column-1-tr-0-line-38
92900: NL-HaNA_1.01.02_3783_0241-page-481-column-0-tr-0-line-27
93000: NL-HaNA_1.01.02_3783_0241-page-481-column-1-tr-0-line-16
93100: NL-HaNA_1.01.02_3783_0242-page-482-column-0-tr-0-line-5
93200: NL-HaNA_1.01.02_3783_0242-page-482-column-0-tr-1-line-49
93300: NL-HaNA_1.01.02_3783_0242-page-482-column-1-tr-0-line-44
93400: NL-HaNA_1.01.02_3783_0242-page-483-column-0-tr-0-line-33
93500: NL-HaNA_1.01.02_3783_0242-page-483-column-1-tr-0-line-22
93600: NL-HaNA_1.01.02_3783_0243-page-484-column-0-tr-0-line-11
93700: NL-HaNA_1.01.02_3783_0243-page-484-column-2-tr-0-line-

105200: NL-HaNA_1.01.02_3783_0266-page-531-column-1-tr-0-line-0
105300: NL-HaNA_1.01.02_3783_0266-page-531-column-1-tr-3-line-3
105400: NL-HaNA_1.01.02_3783_0267-page-532-column-0-tr-2-line-14
105500: NL-HaNA_1.01.02_3783_0267-page-532-column-1-tr-0-line-36
105600: NL-HaNA_1.01.02_3783_0267-page-533-column-0-tr-1-line-6
105700: NL-HaNA_1.01.02_3783_0267-page-533-column-1-tr-0-line-14
105800: NL-HaNA_1.01.02_3783_0268-page-534-column-0-tr-0-line-8
105900: NL-HaNA_1.01.02_3783_0268-page-534-column-1-tr-0-line-1
106000: NL-HaNA_1.01.02_3783_0268-page-534-column-1-tr-1-line-22
106100: NL-HaNA_1.01.02_3783_0268-page-535-column-0-tr-1-line-22
106200: NL-HaNA_1.01.02_3783_0268-page-535-column-1-tr-3-line-3
106300: NL-HaNA_1.01.02_3783_0269-page-536-column-0-tr-1-line-14
106400: NL-HaNA_1.01.02_3783_0269-page-536-column-1-tr-0-line-6
106500: NL-HaNA_1.01.02_3783_0269-page-536-column-1-tr-3-line-13
106600: NL-HaNA_1.01.02_3783_0269-page-537-column-0-tr-2-line-14
106700: NL-HaNA_1.01.02_3783_026

118100: NL-HaNA_1.01.02_3783_0293-page-585-column-0-tr-2-line-6
118200: NL-HaNA_1.01.02_3783_0293-page-585-column-1-tr-0-line-32
118300: NL-HaNA_1.01.02_3783_0294-page-586-column-0-tr-0-line-22
118400: NL-HaNA_1.01.02_3783_0294-page-586-column-1-tr-2-line-2
118500: NL-HaNA_1.01.02_3783_0294-page-587-column-0-tr-0-line-1
118600: NL-HaNA_1.01.02_3783_0294-page-587-column-0-tr-4-line-26
118700: NL-HaNA_1.01.02_3783_0294-page-587-column-1-tr-1-line-2
118800: NL-HaNA_1.01.02_3783_0295-page-588-column-0-tr-3-line-5
118900: NL-HaNA_1.01.02_3783_0295-page-588-column-1-tr-0-line-22
119000: NL-HaNA_1.01.02_3783_0295-page-589-column-0-tr-0-line-11
119100: NL-HaNA_1.01.02_3783_0295-page-589-column-1-tr-0-line-2
119200: NL-HaNA_1.01.02_3783_0295-page-589-column-1-tr-2-line-27
119300: NL-HaNA_1.01.02_3783_0296-page-590-column-0-tr-1-line-20
119400: NL-HaNA_1.01.02_3783_0296-page-590-column-1-tr-0-line-31
119500: NL-HaNA_1.01.02_3783_0296-page-591-column-0-tr-0-line-19
119600: NL-HaNA_1.01.02_3783_02

131100: NL-HaNA_1.01.02_3783_0323-page-644-column-0-tr-0-line-14
131200: NL-HaNA_1.01.02_3783_0323-page-644-column-1-tr-0-line-3
131300: NL-HaNA_1.01.02_3783_0323-page-644-column-1-tr-1-line-0
131400: NL-HaNA_1.01.02_3783_0323-page-645-column-0-tr-1-line-22
131500: NL-HaNA_1.01.02_3783_0323-page-645-column-1-tr-1-line-14
131600: NL-HaNA_1.01.02_3783_0324-page-646-column-0-tr-0-line-20
131700: NL-HaNA_1.01.02_3783_0324-page-646-column-1-tr-0-line-8
131800: NL-HaNA_1.01.02_3783_0324-page-646-column-1-tr-2-line-25
131900: NL-HaNA_1.01.02_3783_0324-page-647-column-0-tr-1-line-14
132000: NL-HaNA_1.01.02_3783_0324-page-647-column-1-tr-2-line-1
132100: NL-HaNA_1.01.02_3783_0325-page-648-column-0-tr-3-line-4
132200: NL-HaNA_1.01.02_3783_0325-page-648-column-2-tr-0-line-15
132300: NL-HaNA_1.01.02_3783_0325-page-649-column-0-tr-0-line-5
132400: NL-HaNA_1.01.02_3783_0325-page-649-column-0-tr-4-line-8
132500: NL-HaNA_1.01.02_3783_0325-page-649-column-1-tr-2-line-5
132600: NL-HaNA_1.01.02_3783_0326

144100: NL-HaNA_1.01.02_3783_0349-page-697-column-1-tr-0-line-7
144200: NL-HaNA_1.01.02_3783_0349-page-697-column-1-tr-2-line-23
144300: NL-HaNA_1.01.02_3783_0350-page-698-column-0-tr-1-line-20
144400: NL-HaNA_1.01.02_3783_0350-page-698-column-1-tr-2-line-10
144500: NL-HaNA_1.01.02_3783_0350-page-699-column-0-tr-0-line-21
144600: NL-HaNA_1.01.02_3783_0350-page-699-column-0-tr-1-line-11
144700: NL-HaNA_1.01.02_3783_0350-page-699-column-1-tr-5-line-2
144800: NL-HaNA_1.01.02_3783_0351-page-700-column-0-tr-1-line-6
144900: NL-HaNA_1.01.02_3783_0351-page-700-column-1-tr-1-line-5
145000: NL-HaNA_1.01.02_3783_0351-page-701-column-0-tr-1-line-0
145100: NL-HaNA_1.01.02_3783_0351-page-701-column-0-tr-1-line-50
145200: NL-HaNA_1.01.02_3783_0351-page-701-column-1-tr-5-line-5
145300: NL-HaNA_1.01.02_3783_0352-page-702-column-0-tr-4-line-8
145400: NL-HaNA_1.01.02_3783_0352-page-702-column-1-tr-2-line-1
145500: NL-HaNA_1.01.02_3783_0352-page-703-column-0-tr-1-line-3
145600: NL-HaNA_1.01.02_3783_0352-

156900: NL-HaNA_1.01.02_3783_0376-page-751-column-0-tr-0-line-14
157000: NL-HaNA_1.01.02_3783_0376-page-751-column-1-tr-0-line-4
157100: NL-HaNA_1.01.02_3783_0376-page-751-column-1-tr-3-line-13
157200: NL-HaNA_1.01.02_3783_0377-page-752-column-0-tr-0-line-45
157300: NL-HaNA_1.01.02_3783_0377-page-752-column-1-tr-0-line-35
157400: NL-HaNA_1.01.02_3783_0377-page-753-column-0-tr-1-line-0
157500: NL-HaNA_1.01.02_3783_0377-page-753-column-1-tr-1-line-0
157600: NL-HaNA_1.01.02_3783_0378-page-754-column-0-tr-0-line-4
157700: NL-HaNA_1.01.02_3783_0378-page-754-column-0-tr-2-line-22
157800: NL-HaNA_1.01.02_3783_0378-page-754-column-1-tr-6-line-2
157900: NL-HaNA_1.01.02_3783_0378-page-755-column-0-tr-1-line-2
158000: NL-HaNA_1.01.02_3783_0378-page-755-column-1-tr-1-line-9
158100: NL-HaNA_1.01.02_3783_0379-page-756-column-0-tr-1-line-0
158200: NL-HaNA_1.01.02_3783_0379-page-756-column-1-tr-0-line-6
158300: NL-HaNA_1.01.02_3783_0379-page-756-column-1-tr-1-line-39
158400: NL-HaNA_1.01.02_3783_0379-

169600: NL-HaNA_1.01.02_3783_0402-page-803-column-0-tr-2-line-5
169700: NL-HaNA_1.01.02_3783_0402-page-803-column-1-tr-0-line-24
169800: NL-HaNA_1.01.02_3783_0403-page-804-column-0-tr-2-line-6
169900: NL-HaNA_1.01.02_3783_0403-page-804-column-1-tr-1-line-2
170000: NL-HaNA_1.01.02_3783_0403-page-804-column-1-tr-2-line-47
170100: NL-HaNA_1.01.02_3783_0403-page-805-column-0-tr-2-line-11
170200: NL-HaNA_1.01.02_3783_0403-page-805-column-1-tr-2-line-0
170300: NL-HaNA_1.01.02_3783_0404-page-806-column-0-tr-3-line-3
170400: NL-HaNA_1.01.02_3783_0404-page-806-column-1-tr-2-line-6
170500: NL-HaNA_1.01.02_3783_0404-page-807-column-0-tr-0-line-8
170600: NL-HaNA_1.01.02_3783_0404-page-807-column-0-tr-0-line-58
170700: NL-HaNA_1.01.02_3783_0404-page-807-column-1-tr-0-line-46
170800: NL-HaNA_1.01.02_3783_0405-page-808-column-0-tr-0-line-35
170900: NL-HaNA_1.01.02_3783_0405-page-808-column-1-tr-1-line-14
171000: NL-HaNA_1.01.02_3783_0405-page-809-column-0-tr-0-line-17
171100: NL-HaNA_1.01.02_3783_040

182500: NL-HaNA_1.01.02_3783_0429-page-856-column-0-tr-1-line-23
182600: NL-HaNA_1.01.02_3783_0429-page-856-column-1-tr-0-line-32
182700: NL-HaNA_1.01.02_3783_0429-page-857-column-0-tr-0-line-21
182800: NL-HaNA_1.01.02_3783_0429-page-857-column-1-tr-0-line-9
182900: NL-HaNA_1.01.02_3783_0429-page-857-column-1-tr-0-line-59
183000: NL-HaNA_1.01.02_3783_0430-page-858-column-0-tr-0-line-48
183100: NL-HaNA_1.01.02_3783_0430-page-858-column-1-tr-0-line-37
183200: NL-HaNA_1.01.02_3783_0430-page-859-column-0-tr-1-line-3
183300: NL-HaNA_1.01.02_3783_0430-page-859-column-1-tr-1-line-10
183400: NL-HaNA_1.01.02_3783_0431-page-860-column-0-tr-0-line-5
183500: NL-HaNA_1.01.02_3783_0431-page-860-column-0-tr-1-line-29
183600: NL-HaNA_1.01.02_3783_0431-page-860-column-1-tr-0-line-45
183700: NL-HaNA_1.01.02_3783_0431-page-861-column-0-tr-1-line-15
183800: NL-HaNA_1.01.02_3783_0431-page-861-column-1-tr-0-line-25
183900: NL-HaNA_1.01.02_3783_0432-page-862-column-0-tr-2-line-2
184000: NL-HaNA_1.01.02_3783_

195300: NL-HaNA_1.01.02_3783_0455-page-908-column-0-tr-1-line-28
195400: NL-HaNA_1.01.02_3783_0455-page-908-column-1-tr-0-line-38
195500: NL-HaNA_1.01.02_3783_0455-page-909-column-0-tr-1-line-0
195600: NL-HaNA_1.01.02_3783_0455-page-909-column-1-tr-0-line-16
195700: NL-HaNA_1.01.02_3783_0456-page-910-column-0-tr-0-line-6
195800: NL-HaNA_1.01.02_3783_0456-page-910-column-0-tr-2-line-5
195900: NL-HaNA_1.01.02_3783_0456-page-910-column-1-tr-4-line-4
196000: NL-HaNA_1.01.02_3783_0456-page-911-column-0-tr-1-line-5
196100: NL-HaNA_1.01.02_3783_0456-page-911-column-2-tr-0-line-17
196200: NL-HaNA_1.01.02_3783_0457-page-912-column-0-tr-0-line-4
196300: NL-HaNA_1.01.02_3783_0457-page-912-column-0-tr-5-line-1
196400: NL-HaNA_1.01.02_3783_0457-page-912-column-1-tr-2-line-19
196500: NL-HaNA_1.01.02_3783_0457-page-913-column-0-tr-0-line-37
196600: NL-HaNA_1.01.02_3783_0457-page-913-column-1-tr-0-line-27
196700: NL-HaNA_1.01.02_3783_0458-page-914-column-0-tr-0-line-18
196800: NL-HaNA_1.01.02_3783_045

208500: NL-HaNA_1.01.02_3783_0482-page-962-column-0-tr-1-line-0
208600: NL-HaNA_1.01.02_3783_0482-page-962-column-0-tr-2-line-22
208700: NL-HaNA_1.01.02_3783_0482-page-962-column-1-tr-1-line-23
208800: NL-HaNA_1.01.02_3783_0482-page-963-column-0-tr-1-line-15
208900: NL-HaNA_1.01.02_3783_0482-page-963-column-1-tr-2-line-7
209000: NL-HaNA_1.01.02_3783_0483-page-964-column-0-tr-0-line-6
209100: NL-HaNA_1.01.02_3783_0483-page-964-column-0-tr-0-line-56
209200: NL-HaNA_1.01.02_3783_0483-page-964-column-1-tr-2-line-16
209300: NL-HaNA_1.01.02_3783_0483-page-965-column-1-tr-1-line-1
209400: NL-HaNA_1.01.02_3783_0483-page-965-column-1-tr-1-line-28
209500: NL-HaNA_1.01.02_3783_0483-page-965-column-3-tr-0-line-32
209600: NL-HaNA_1.01.02_3783_0484-page-966-column-0-tr-0-line-12
209700: NL-HaNA_1.01.02_3783_0484-page-966-column-1-tr-0-line-1
209800: NL-HaNA_1.01.02_3783_0484-page-966-column-1-tr-0-line-51
209900: NL-HaNA_1.01.02_3783_0484-page-967-column-0-tr-3-line-5
210000: NL-HaNA_1.01.02_3783_04

221200: NL-HaNA_1.01.02_3783_0508-page-1014-column-0-tr-0-line-42
221300: NL-HaNA_1.01.02_3783_0508-page-1014-column-1-tr-3-line-4
221400: NL-HaNA_1.01.02_3783_0508-page-1015-column-0-tr-0-line-18
221500: NL-HaNA_1.01.02_3783_0508-page-1015-column-1-tr-1-line-3
221600: meeting-1728-12-12-session-1-column-3
221700: NL-HaNA_1.01.02_3783_0509-page-1016-column-0-tr-0-line-49
221800: NL-HaNA_1.01.02_3783_0509-page-1016-column-1-tr-0-line-41
221900: NL-HaNA_1.01.02_3783_0509-page-1017-column-0-tr-1-line-2
222000: NL-HaNA_1.01.02_3783_0509-page-1017-column-1-tr-1-line-9
222100: NL-HaNA_1.01.02_3783_0510-page-1018-column-0-tr-2-line-0
222200: NL-HaNA_1.01.02_3783_0510-page-1018-column-1-tr-1-line-8
222300: meeting-1728-12-12-session-1-column-0
222400: NL-HaNA_1.01.02_3783_0510-page-1019-column-0-tr-6-line-7
222500: NL-HaNA_1.01.02_3783_0510-page-1019-column-1-tr-1-line-18
222600: NL-HaNA_1.01.02_3783_0511-page-1020-column-0-tr-1-line-18
222700: NL-HaNA_1.01.02_3783_0511-page-1020-column-1-tr-1

234200: meeting-1728-06-21-session-1-resolution-11
234300: meeting-1728-07-05-session-1-resolution-4
234400: meeting-1728-07-19-session-1-resolution-1
234500: meeting-1728-07-31-session-1-resolution-10
234600: meeting-1728-08-14-session-1-resolution-3
234700: meeting-1728-08-27-session-1-resolution-10
234800: meeting-1728-09-11-session-1-resolution-4
234900: meeting-1728-09-25-session-1-resolution-4
235000: meeting-1728-10-11-session-1-resolution-3
235100: meeting-1728-10-26-session-1-resolution-4
235200: meeting-1728-11-11-session-1-resolution-3
235300: meeting-1728-11-26-session-1-resolution-9
235400: meeting-1728-12-13-session-1-resolution-18
235500: meeting-1728-12-30-session-1-resolution-3
235600: meeting-1728-03-27-session-1-attendantslist-1
235700: meeting-1728-07-05-session-1-attendantslist-1
235800: meeting-1728-10-13-session-1-attendantslist-1
235900: meeting-1728-01-02-session-1-attendant-19
236000: meeting-1728-01-07-session-1-attendant-12
236100: meeting-1728-01-12-session

In [41]:
for a in asearch.get_annotations_of_type('lines', all_annotations):
    print(a)

{'resource_id': 'volume-1728', 'label': 'lines', 'image_coords': {'left': 1451, 'right': 1962, 'top': 2124, 'bottom': 2184, 'height': 60, 'width': 511}, 'begin_anchor': 55952, 'id': 'NL-HaNA_1.01.02_3783_0285-page-568-column-1-tr-2-line-0', 'end_anchor': 55952, 'image_range': [('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0285.jpg/full/,1316/0/default.jpg', [{'left': 1451, 'right': 1962, 'top': 2124, 'bottom': 2184, 'height': 60, 'width': 511}])], 'region_links': ['https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0285.jpg/1451,2124,511,60/full/0/default.jpg']}
{'resource_id': 'volume-1728', 'label': 'lines', 'image_coords': {'left': 1645, 'right': 1787, 'top': 2183, 'bottom': 2244, 'height': 61, 'width': 142}, 'begin_anchor': 55953, 'id': 'NL-HaNA_1.01.02_3783_0285-page-568-column-1-tr-2-line-1', 'end_anchor': 55953, 'image_range': [('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0285.jpg/full/,1316/0

In [31]:
for a in asearch.get_annotations_of_type('resolutions', all_annotations):
    print(a)

{'resource_id': 'volume-1728', 'label': 'resolutions', 'begin_anchor': 434, 'end_anchor': 441, 'id': 'meeting-1728-01-02-session-1-resolution-1', 'proposition_type': 'missive', 'image_range': [('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0051.jpg/full/,2000/0/default.jpg', [{'left': 2814, 'right': 3628, 'top': 3223, 'bottom': 3385, 'height': 162, 'width': 814}]), ('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0051.jpg/full/,1949/0/default.jpg', [{'left': 3685, 'right': 4568, 'top': 1582, 'bottom': 1789, 'height': 207, 'width': 883}])], 'region_links': ['https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0051.jpg/2814,3223,814,162/full/0/default.jpg', 'https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0051.jpg/3685,1582,883,207/full/0/default.jpg']}
{'resource_id': 'volume-1728', 'label': 'resolutions', 'begin_anchor': 442, 'end_anchor': 446, 'id': 'meeting-1728-01-02-session-1

{'resource_id': 'volume-1728', 'label': 'resolutions', 'begin_anchor': 86484, 'end_anchor': 86521, 'id': 'meeting-1728-09-18-session-1-resolution-5', 'proposition_type': None, 'image_range': [('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0416.jpg/full/,3107/0/default.jpg', [{'left': 2455, 'right': 3328, 'top': 3086, 'bottom': 3348, 'height': 262, 'width': 873}]), ('https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0416.jpg/full/,3098/0/default.jpg', [{'left': 3324, 'right': 4224, 'top': 437, 'bottom': 1991, 'height': 1554, 'width': 900}])], 'region_links': ['https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0416.jpg/2455,3086,873,262/full/0/default.jpg', 'https://images.diginfra.net/iiif/NL-HaNA_1.01.02/3783/NL-HaNA_1.01.02_3783_0416.jpg/3324,437,900,1554/full/0/default.jpg']}
{'resource_id': 'volume-1728', 'label': 'resolutions', 'begin_anchor': 86523, 'end_anchor': 86537, 'id': 'meeting-1728-09-18-sessio

In [27]:
def add_segmented_text_to_store(segmented_text, store_name):
    try:
        with open(datadir+store_name, 'r') as filehandle:
            data = json.loads(filehandle.read())
    except FileNotFoundError:
        data = {'_resources' : []}
        
    data['_resources'].append(segmented_text)
    
    with open(datadir+store_name, 'w') as filehandle:
        json.dump(data, filehandle, indent=4, cls=segmentedtext.SegmentEncoder)     

In [30]:
add_segmented_text_to_store(all_textlines, text_store)

In [31]:
def add_annotations_to_store(annotations, store_name):
    try:
        with open('../../data/'+store_name, 'r') as filehandle:
            data = json.loads(filehandle.read())
    except FileNotFoundError:
        data = []
        
    data.extend(annotations)
    
    with open('../../data/'+store_name, 'w') as filehandle:
        json.dump(data, filehandle, indent=4, cls=segmentedtext.AnchorEncoder)   

In [32]:
add_annotations_to_store(all_annotations, annotation_store)

In [50]:
len(all_annotations)

1472

In [32]:
all_textlines.slice(1005, 1012)

['\\Ntfangen een Miflive van den Refi-',
 'J dent van Affendelft , gefchreven te Kop-',
 'penhage den {even en twintighften',
 'der voorlede maandt , geaddrefTeert aan',
 'den Griffier Fagel, houdende adverten-',
 'tie.',
 'WAAR op geen refolutie is',
 'gevallen.']

In [None]:
for a in asearch.get_annotations_of_type('columns',all_annotations, resource_id):
    print(a)