This notebook creates a JSON file which describes all the books. The output file lists the Heurist IDs of these books and the books contained in the various books. Texts in manuscript books and in printed books can be exported separately. 

In [1]:
book_type = 'manuscript'

In [2]:
import pandas as pd
from collections import Counter
import itertools
from os.path import join
from itertools import combinations
from tqdm import tqdm
import re
import json
import prayer_leiden

In [3]:
path = 'prayer_leiden.json'
json_file = open(path)
json_data = json.load(json_file)

print(f'Total number of books: {len(json_data)}')

Total number of books: 680


In [4]:
texts_to_exclude = [ 'G151','G152','G153','G153a','G154','G155a','G155b',
                    'G155c','G155d','G155e','G156','G156b','G157','G158',
                    'G158b' ] 

## Exclude texts without identification like Calendars

## Merge following Heurist IDs

merge_hids = {
    
    544:540,
    547:540,
    549:540,
    550:540,
    135517:540,

    132476:132477,
    135524:132477,
    137964:132477,
    135511:132477
}

In [5]:
titles = dict()
book_texts = dict()

total_nr_texts = 0
nr_texts_analysed = 0
nr_books = 0 

for book in json_data:
    
    book_id = int(book['id'])
    
    found_book_type = 'Unknown'
    

    if 'production_layers' in book and len(book['production_layers'])>0:
        if 'type' in book['production_layers'][0]:
            found_book_type = book['production_layers'][0]['type']
        
    if book_type.lower() == found_book_type.lower():
        
        
        if 'title' in book:
            titles[ book_id ] = book['title']
        elif 'shelfmark' in book:
            titles[ book_id ] = book['shelfmark']
        else:
            titles[ book_id ] = '[Untitled]'

        texts = book['texts']

        all_texts = []
        for text in texts:
            total_nr_texts += 1

            if 'text_id' in text and 'prayer_id' in text:
                if text['prayer_id'] not in texts_to_exclude and len(text['prayer_id'].strip()) > 0:
                    text_id = int(text['text_id'])

                    if text_id in merge_hids:
                        text_id = merge_hids[text_id]

                    all_texts.append(text_id)                
                    if 'title' in text and re.search(r'\d',str(text_id)):
                        titles[text_id] = f"{text['prayer_id']}: {text['title']}"

        if len(all_texts)>0:
            book_texts[book_id] = all_texts
            nr_texts_analysed += len(all_texts)
            nr_books += 1
            
            

print(f'Nr priunted books: {nr_books}')
print(f'Total number of texts: {total_nr_texts}')
print(f'Nr texts analysed {nr_texts_analysed}')


## Manual assignment of merged titles
titles[540] = "G004: Prayer of St. Gregory to the Arma Christi"
titles[132477] = "G155_G189c: Long Hours of the Holy Cross with prologue B interwoven with Prayer on Mary's Compassion "

Nr priunted books: 272
Total number of texts: 7963
Nr texts analysed 4085


In [6]:
data = dict()
data['titles'] = titles
data['book_texts'] = book_texts


json_str = json.dumps(data, indent=4)

with open(f'{book_type}_books.json','w',encoding='utf-8') as out:
    out.write(json_str)