In [1]:
import glob
import os
from collections import Counter
from itertools import combinations

import numpy as np
np.random.seed(18012023)

import pandas as pd
pd.set_option('display.max_colwidth', 0)

import seaborn as sb

from sklearn.metrics import pairwise_distances
import lxml.etree
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from sklearn.feature_extraction.text import TfidfVectorizer

import matplotlib.pyplot as plt
import matplotlib
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.family'] = 'Arial'

In this notebook, we run the intertext retrieval (with the calibrated parameters) on all of the narrative (epic) texts available on the *Cd-rom Middelnederlands*. We dump the results to a spreadsheet that we will statistically analyze in the next notebook.

First we make sure that the output direcotry exists:

In [2]:
fig_dir = '../figures'
if not os.path.isdir(fig_dir):
    os.mkdir(fig_dir)

We load the metadata (with the subgenre information etc.):

In [3]:
meta_df = pd.read_excel('../data/metadata_corrected.xlsx')
meta_df = meta_df[meta_df['exclude'] != 'x']
meta_df.sample(10)

Unnamed: 0,id,title,author,provenance,date_range,genre,subgenre,exclude
267,van_smeinscen_lede,Van smeinscen lede,,cdrom-mnl,1300-1400,Epiek,Didactiek,
63,florigout_fragm_l,Florigout,,cdrom-mnl,1375-1400,Epiek,Ridder,
111,loyhier_en_malaert_fragm_a,Loyhier en Malaert,,cdrom-mnl,1350-1400,Epiek,Karel,
86,historie_van_gaver_capeel,Historie van Gaver Capeel,,cdrom-mnl,1500-1520,Epiek,Ridder,
168,renout_van_montalbaen_fragm_l,Renout van Montalbaen,,cdrom-mnl,1300-1400,Epiek,Karel,
215,spiegel_historiael__1_3_4_maerlant,Spiegel historiael (P1-P4),Jacob van Maerlant,cdrom-mnl,1300-1325,Epiek,Historiografie,
211,spiegel_der_sonden,Spiegel der sonden,,cdrom-mnl,1440-1460,Epiek,Didactiek,
123,madelgijs_fragm_d,Madelgijs,,cdrom-mnl,1340-1360,Epiek,Karel,
166,Reinout van Montalbaen,Reinout van Montalbaen,,CG1,1300-1276,Epiek,Karel,
240,theophilus,Theophilus,,cdrom-mnl,1400-1420,Epiek,Heiligenleven,


Next, we redefine the extraction functions from the previous notebooks that we'll need again:

In [4]:
def get_verse_groups(verses, size=2, intertexts=False):
    for i in range(len(verses) - (size - 1)):
        if not intertexts:
            yield ' / '.join(verses[i : i + size])
        else:
            its = Counter(verses[i : i + size])
            if None in its:
                yield None
            elif len(its) > 1:
                yield 'overlap'
            else:
                yield list(its.keys())[0]

In [5]:
def parse_xml(fn, rm_interpol=False):
    try:
        tree = lxml.etree.parse(fn)
    except OSError:
        print(f'- Could not load {fn}')
        return None
        
    if rm_interpol:
        for interpolation in tree.xpath("//interpolation"):
            interpolation.getparent().remove(interpolation)
        
    for line_node in tree.iterfind('.//l'):
        try:
            intertext_id = line_node.attrib['intertext']
        except KeyError:
            intertext_id = None
        
        tokens_ = line_node.attrib['tokens'].split()
        lemmas_ = []
        
        lemma_tags = [l.text for l in line_node.iterfind('.//lemma')]
        pos_tags = [p.text for p in line_node.iterfind('.//pos')]
        
        for lemma, pos in zip(lemma_tags, pos_tags):
            for l, p in zip(lemma.split('+'), pos.split('+')):
                    if p == 'n(prop)':
                        lemmas_.append('n(prop)')
                    else:
                        lemmas_.append(l)
    
        yield tokens_, lemmas_, intertext_id

In [6]:
GROUP_SIZE = 2

titles, tokens, lemmas, intertexts = [], [], [], []

for title, group in tqdm(meta_df.groupby('title')):
    work_tokens, work_lemmas, work_intertexts = [], [], []
    
    for id_ in sorted(group['id']):
        for tok, lem, intertext_id in parse_xml(f'../data/xml/{id_}.xml', rm_interpol=True):
            work_tokens.append(tok)
            work_lemmas.append(lem)
            work_intertexts.append(intertext_id)
    
    verse_tokens = [' '.join(v) for v in work_tokens]
    verse_lemmas = [' '.join(v) for v in work_lemmas]

    verse_group_tokens = list(get_verse_groups(verse_tokens, size=GROUP_SIZE))
    verse_group_lemmas = list(get_verse_groups(verse_lemmas, size=GROUP_SIZE))
    verse_group_intertexts = list(get_verse_groups(work_intertexts, size=GROUP_SIZE, intertexts=True))

    tokens.extend(verse_group_tokens)
    lemmas.extend(verse_group_lemmas)
    intertexts.extend(verse_group_intertexts)
    titles.extend([title] * len(verse_group_lemmas))

  for line_node in tree.iterfind('//l'):
100%|██████████| 205/205 [00:15<00:00, 13.35it/s]


Now, we can load all the verse groups from all the texts, as well as the associated metdata into a single dataframe:

In [7]:
df = pd.DataFrame(zip(titles, tokens, lemmas, intertexts), columns=('title', 'tokens', 'lemmas', 'intertext'))

Finally, we can add the separate column for the lemmas in rhyming position:

In [8]:
def tokenizer(text):
    return text.replace(' / ', ' ').lower().strip().split()

def add_rhyme_column(df):
    rhyme_words = []
    for lemmas in df['lemmas']:
        rhymes = []
        for verse in lemmas.split(' / '):
            rhymes.append(verse.strip().split()[-1])
        rhyme_words.append(' '.join(rhymes))
    df['rhyme'] = rhyme_words
    return df

In [9]:
df = add_rhyme_column(df)
df.head()

Unnamed: 0,title,tokens,lemmas,intertext,rhyme
0,AB recht ende averecht,Aensiet dese vrouwen hoe si gaen / Besiet hoe haer tuten staen,aanzien deze vrouw hoe zij gaan / bezien hoe zij de staan,,gaan staan
1,AB recht ende averecht,Besiet hoe haer tuten staen / Claer dat si hen blanketten,bezien hoe zij de staan / klaar dat zij zij n(prop),,staan n(prop)
2,AB recht ende averecht,Claer dat si hen blanketten / Die cleeder soe lanc dat si hen letten,klaar dat zij zij n(prop) / de kleed zo lang dat zij zij letten,,n(prop) letten
3,AB recht ende averecht,Die cleeder soe lanc dat si hen letten / Ende sleypen hen nae al op die eerde,de kleed zo lang dat zij zij letten / en slapen zij na al op de aarde,,letten aarde
4,AB recht ende averecht,Ende sleypen hen nae al op die eerde / Fi diere vuylder hoverde,en slapen zij na al op de aarde / fi duur vouwer hovaardij,,aarde hovaardij


In the previous notebook, these turned out to be the optimal hyperparameters for the retrieval system:

In [10]:
optim_vs = 9000
optim_rw = 0.1693877551020408
optim_th = 0.4217159759743446

In [11]:
def batch(iterable, n=1):
    l = iterable.shape[0]
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

def pairwise(AX, BX, batch_size=5000):
    all_distances = None

    for ax in batch(AX, batch_size):
        if all_distances is None:
            all_distances = pairwise_distances(ax, BX, metric='cosine')
        else:
            all_distances = np.append(all_distances,
                                      pairwise_distances(ax, BX, metric='cosine'),
                                      axis=0)
    
    return all_distances

In [12]:
def retrieve_intertexts(title1, title2, distances, threshold):
    A = df[df['title'] == title1]
    B = df[df['title'] == title2]
    
    intertexts = []
    for idx1, idx2 in np.transpose(np.nonzero(distances < threshold)):
        tokens1 = A.iloc[idx1]['tokens']
        tokens2 = B.iloc[idx2]['tokens']
        distance = distances[idx1, idx2]
        intertexts.append((title1, title2, tokens1, tokens2, distance))
        
    return pd.DataFrame(intertexts, columns=['title1', 'title2', 'tokens1', 'tokens2', 'distance'])

def plot_min_distance_nn(t1, t2, min_dists, threshold, rolling_size=1000):
    mu = min_dists.rolling(rolling_size).mean()
    fig, ax = plt.subplots()
    mu.plot(ax=ax)
    ax.axhline(threshold, ls='--', c='grey')
    ax.set_ylim(0, 1)
    ax.set_xlabel(t1)
    ax.set_ylabel('Distance to NN in ' + t2)
    return ax

We well restrict the analysis to all narrative texts (in rhyming couplets), or the so-called "Epiek":

In [13]:
chiv_titles = meta_df[meta_df['genre'] == 'Epiek']['title'].unique()

In [14]:
vec = TfidfVectorizer(max_features=optim_vs, min_df=2,
                      tokenizer=tokenizer, token_pattern=None).fit(df['lemmas'])

We will exhaustively combine all texts, but we make sure to only combine two texts once (because our retrieval is non-directional):

In [15]:
combs = sorted([sorted(comb) for comb in combinations(chiv_titles, 2)])
combs

[['Alexanders geesten', 'Alexiuslegende'],
 ['Alexanders geesten', 'Anatomie van de mens'],
 ['Alexanders geesten', 'Arturs doet'],
 ['Alexanders geesten', 'Aubri de Borgengoen'],
 ['Alexanders geesten', 'Barlaam en Josaphat'],
 ['Alexanders geesten', 'Beatrijs'],
 ['Alexanders geesten', 'Bediedenisse van der missen'],
 ['Alexanders geesten', 'Beerte metten breden voeten'],
 ['Alexanders geesten', 'Beginsel der oorlogen van Luyck'],
 ['Alexanders geesten', 'Boec Exemplaer'],
 ['Alexanders geesten', 'Boec van den houte'],
 ['Alexanders geesten', 'Boec vander wraken'],
 ['Alexanders geesten', 'Boeve van Hamtone'],
 ['Alexanders geesten', 'Borchgrave van Couchi'],
 ['Alexanders geesten', 'Borchgravinne van Vergi (1)'],
 ['Alexanders geesten', 'Borchgravinne van Vergi (2)'],
 ['Alexanders geesten', 'Boudewijn van Seborch'],
 ['Alexanders geesten', 'Brabantsche yeesten (B1-5)'],
 ['Alexanders geesten', 'Brabantsche yeesten (B6)'],
 ['Alexanders geesten', 'Brabantsche yeesten (B7)'],
 ['Alex

In [16]:
lookup = {}
for t1, t2 in combs:
    try:
        lookup[t1].append(t2)
    except KeyError:
        lookup[t1] = [t2]
lookup

{'Alexanders geesten': ['Alexiuslegende',
  'Anatomie van de mens',
  'Arturs doet',
  'Aubri de Borgengoen',
  'Barlaam en Josaphat',
  'Beatrijs',
  'Bediedenisse van der missen',
  'Beerte metten breden voeten',
  'Beginsel der oorlogen van Luyck',
  'Boec Exemplaer',
  'Boec van den houte',
  'Boec vander wraken',
  'Boeve van Hamtone',
  'Borchgrave van Couchi',
  'Borchgravinne van Vergi (1)',
  'Borchgravinne van Vergi (2)',
  'Boudewijn van Seborch',
  'Brabantsche yeesten (B1-5)',
  'Brabantsche yeesten (B6)',
  'Brabantsche yeesten (B7)',
  "Chanson d'Aspremont",
  'Collectief lunarium I',
  'Collectief lunarium II',
  'Collectief lunarium III',
  'Commentaar op het Hooglied',
  'Cyromanchie van den pape van den Hamme',
  'Der leken spieghel',
  'Der mannen ende vrouwen heimelijcheit',
  'Der minnen loep',
  'Der naturen bloeme',
  'Der vrouwen heimelijcheit',
  'Der ystorien bloeme',
  'Dietsche Lucidarius',
  'Dietsche doctrinale',
  'Dystorie van Saladine',
  'Enaamse Code

The following block (which takes a while to run) runs over all text combinations and calculates the distance between all of the respective verse groups. We keep track of the length of both texts in the pair and count the number of "hits", or combinations of verse groups that have a smaller distance than the threshold value, which we found to be optimal in the manually annotated data:

In [17]:
results, intertexts = [], []

for cnt, t1 in enumerate(lookup):
    print(t1, f'{cnt+1}/{len(lookup)}')
    a1 = meta_df[meta_df['title'] == t1]['author'].iloc[0]
    g1 = meta_df[meta_df['title'] == t1]['genre'].iloc[0]
    sg1 = meta_df[meta_df['title'] == t1]['subgenre'].iloc[0]
    
    A = df[df['title'] == t1]
    AX = vec.transform(A['lemmas']) + optim_rw * vec.transform(A['rhyme'])
    
    for t2 in tqdm(lookup[t1]):
        a2 = meta_df[meta_df['title'] == t2]['author'].iloc[0] 
        g2 = meta_df[meta_df['title'] == t2]['genre'].iloc[0]
        sg2 = meta_df[meta_df['title'] == t2]['subgenre'].iloc[0]
        
        B = df[df['title'] == t2]
        BX = vec.transform(B['lemmas']) + optim_rw * vec.transform(B['rhyme'])
        
        # get NNs for statistics
        distances = pairwise(AX, BX, batch_size=10000)
        l1, l2 = len(df[df['title'] == t1]), len(df[df['title'] == t2])
        hits = np.sum(distances <= optim_th)
        results.append((t1, t2, l1, l2, hits, g1, g2, sg1, sg2, a1, a2))
    
        # collect actual intertexts
        #intertexts.append(retrieve_intertexts(t1, t2, distances=distances, threshold=threshold))
    
#intertexts = pd.concat(intertexts)
#intertexts.to_excel('../figures/intertexts.xlsx', index=False)

results = pd.DataFrame(results, columns=['t1', 't2',
                                         'l1', 'l2', 'hits',
                                         'genre1', 'genre2',
                                         'subgenre1', 'subgenre2',
                                         'author1', 'author2'])
results['hit_ratio'] = results['hits'] / (results['l1'] * results['l2'])
results = results.sort_values(by='hit_ratio', ascending=False)
results.to_excel('../figures/statistics.xlsx', index=False)
results

Alexanders geesten 1/160


100%|██████████| 160/160 [02:37<00:00,  1.02it/s]


Alexiuslegende 2/160


100%|██████████| 159/159 [00:16<00:00,  9.43it/s]


Anatomie van de mens 3/160


100%|██████████| 158/158 [00:17<00:00,  8.94it/s]


Arturs doet 4/160


100%|██████████| 157/157 [02:20<00:00,  1.12it/s]


Aubri de Borgengoen 5/160


100%|██████████| 156/156 [00:18<00:00,  8.25it/s]


Barlaam en Josaphat 6/160


100%|██████████| 155/155 [00:17<00:00,  8.84it/s]


Beatrijs 7/160


100%|██████████| 154/154 [00:24<00:00,  6.38it/s]


Bediedenisse van der missen 8/160


100%|██████████| 153/153 [00:26<00:00,  5.84it/s]


Beerte metten breden voeten 9/160


100%|██████████| 152/152 [00:17<00:00,  8.88it/s]


Beginsel der oorlogen van Luyck 10/160


100%|██████████| 151/151 [00:20<00:00,  7.49it/s]


Boec Exemplaer 11/160


100%|██████████| 150/150 [00:17<00:00,  8.33it/s]


Boec van den houte 12/160


100%|██████████| 149/149 [00:22<00:00,  6.58it/s]


Boec vander wraken 13/160


100%|██████████| 148/148 [01:01<00:00,  2.41it/s]


Boeve van Hamtone 14/160


100%|██████████| 147/147 [00:16<00:00,  8.70it/s]


Borchgrave van Couchi 15/160


100%|██████████| 146/146 [00:47<00:00,  3.07it/s]


Borchgravinne van Vergi (1) 16/160


100%|██████████| 145/145 [00:24<00:00,  5.98it/s]


Borchgravinne van Vergi (2) 17/160


100%|██████████| 144/144 [00:18<00:00,  7.61it/s]


Boudewijn van Seborch 18/160


100%|██████████| 143/143 [00:18<00:00,  7.89it/s]


Brabantsche yeesten (B1-5) 19/160


100%|██████████| 142/142 [01:10<00:00,  2.03it/s]


Brabantsche yeesten (B6) 20/160


100%|██████████| 141/141 [02:10<00:00,  1.08it/s]


Brabantsche yeesten (B7) 21/160


100%|██████████| 140/140 [03:11<00:00,  1.36s/it]


Chanson d'Aspremont 22/160


100%|██████████| 139/139 [00:15<00:00,  9.17it/s]


Collectief lunarium I 23/160


100%|██████████| 138/138 [00:15<00:00,  8.90it/s]


Collectief lunarium II 24/160


100%|██████████| 137/137 [00:17<00:00,  7.74it/s]


Collectief lunarium III 25/160


100%|██████████| 136/136 [00:17<00:00,  7.72it/s]


Commentaar op het Hooglied 26/160


100%|██████████| 135/135 [01:45<00:00,  1.28it/s]


Cyromanchie van den pape van den Hamme 27/160


100%|██████████| 134/134 [00:18<00:00,  7.27it/s]


Der leken spieghel 28/160


100%|██████████| 133/133 [04:40<00:00,  2.11s/it]


Der mannen ende vrouwen heimelijcheit 29/160


100%|██████████| 132/132 [00:31<00:00,  4.21it/s]


Der minnen loep 30/160


100%|██████████| 131/131 [01:46<00:00,  1.23it/s]


Der naturen bloeme 31/160


100%|██████████| 130/130 [02:31<00:00,  1.17s/it]


Der vrouwen heimelijcheit 32/160


100%|██████████| 129/129 [00:25<00:00,  5.15it/s]


Der ystorien bloeme 33/160


100%|██████████| 128/128 [00:44<00:00,  2.88it/s]


Dietsche Lucidarius 34/160


100%|██████████| 127/127 [00:57<00:00,  2.20it/s]


Dietsche doctrinale 35/160


100%|██████████| 126/126 [00:59<00:00,  2.12it/s]


Dystorie van Saladine 36/160


100%|██████████| 125/125 [00:24<00:00,  5.01it/s]


Enaamse Codex 37/160


100%|██████████| 124/124 [00:37<00:00,  3.28it/s]


Esopet 38/160


100%|██████████| 123/123 [00:23<00:00,  5.22it/s]


Expositie vanden viere vingheren ende vanden dume 39/160


100%|██████████| 122/122 [00:14<00:00,  8.55it/s]


Ferguut 40/160


100%|██████████| 121/121 [00:50<00:00,  2.40it/s]


Fierabras 41/160


100%|██████████| 120/120 [00:14<00:00,  8.43it/s]


Flandrijs 42/160


100%|██████████| 119/119 [00:25<00:00,  4.76it/s]


Florent ende Durant 43/160


100%|██████████| 118/118 [00:13<00:00,  8.94it/s]


Florigout 44/160


100%|██████████| 117/117 [00:19<00:00,  6.05it/s]


Florimont 45/160


100%|██████████| 116/116 [00:14<00:00,  8.26it/s]


Floris ende Blancefloer 46/160


100%|██████████| 115/115 [00:40<00:00,  2.87it/s]


Flovent 47/160


100%|██████████| 114/114 [00:15<00:00,  7.18it/s]


Floyris ende Blantseflur 48/160


100%|██████████| 113/113 [00:13<00:00,  8.37it/s]


Fysionomie 49/160


100%|██████████| 112/112 [00:13<00:00,  8.30it/s]


Gedicht over de hemeltekenen 50/160


100%|██████████| 111/111 [00:12<00:00,  8.89it/s]


Geraert van Viane 51/160


100%|██████████| 110/110 [00:12<00:00,  8.75it/s]


Godevaert metten baerde 52/160


100%|██████████| 109/109 [00:11<00:00,  9.24it/s]


Godevaerts kintshede 53/160


100%|██████████| 108/108 [00:11<00:00,  9.28it/s]


Grimbergse oorlog 54/160


100%|██████████| 107/107 [01:42<00:00,  1.04it/s]


Gwidekijn van Sassen 55/160


100%|██████████| 106/106 [01:48<00:00,  1.02s/it]


Heymelijchede der heymelijcheit 56/160


100%|██████████| 105/105 [00:36<00:00,  2.90it/s]


Historie van Gaver Capeel 57/160


100%|██████████| 104/104 [00:11<00:00,  8.73it/s]


Historie van Troyen 58/160


100%|██████████| 103/103 [15:38<00:00,  9.11s/it] 


Huge van Bordeeus 59/160


100%|██████████| 102/102 [00:16<00:00,  6.26it/s]


Iechemas 60/160


100%|██████████| 101/101 [00:13<00:00,  7.60it/s]


Jan Splinters testament 61/160


100%|██████████| 100/100 [00:11<00:00,  8.49it/s]


Jans teesteye 62/160


100%|██████████| 99/99 [00:34<00:00,  2.86it/s]


Jonathas ende Rosafiere 63/160


100%|██████████| 98/98 [00:18<00:00,  5.25it/s]


Karel ende Elegast 64/160


100%|██████████| 97/97 [00:18<00:00,  5.31it/s]


Korte kroniek van Brabant (korte versie 1) 65/160


100%|██████████| 96/96 [00:12<00:00,  7.68it/s]


Korte kroniek van Brabant (lange versie) 66/160


100%|██████████| 95/95 [00:22<00:00,  4.25it/s]


Lanceloet en het hert met de witte voet 67/160


100%|██████████| 94/94 [00:15<00:00,  6.25it/s]


Lancelot 68/160


100%|██████████| 93/93 [09:27<00:00,  6.11s/it] 


Lantsloot van der Haghedochte 69/160


100%|██████████| 92/92 [00:42<00:00,  2.14it/s]


Lapidarijs 70/160


100%|██████████| 91/91 [00:11<00:00,  8.06it/s]


Leven van Sint Trudo (berijming) 71/160


100%|██████████| 90/90 [00:11<00:00,  8.09it/s]


Limborch 72/160


100%|██████████| 89/89 [03:35<00:00,  2.42s/it]


Limburgse Aiol 73/160


100%|██████████| 88/88 [00:11<00:00,  7.34it/s]


Lion van Bourges 74/160


100%|██████████| 87/87 [00:09<00:00,  8.79it/s]


Loyhier en Malaert 75/160


100%|██████████| 86/86 [00:11<00:00,  7.71it/s]


Madelgijs 76/160


100%|██████████| 85/85 [00:20<00:00,  4.20it/s]


Melibeus 77/160


100%|██████████| 84/84 [01:02<00:00,  1.34it/s]


Merlijn 78/160


100%|██████████| 83/83 [50:50<00:00, 36.75s/it]   


Merlijn-continuatie 79/160


100%|██████████| 82/82 [1:10:45<00:00, 51.78s/it]   


Moriaen 80/160


100%|██████████| 81/81 [00:28<00:00,  2.80it/s]


Natuurkunde van het geheelal 81/160


100%|██████████| 80/80 [00:15<00:00,  5.00it/s]


Nevelingenlied 82/160


100%|██████████| 79/79 [00:08<00:00,  8.87it/s]


Nieuwe doctrinael 83/160


100%|██████████| 78/78 [00:21<00:00,  3.68it/s]


Noch die tien ghebode 84/160


100%|██████████| 77/77 [00:08<00:00,  8.92it/s]


Noch meer van wiven 85/160


100%|██████████| 76/76 [00:08<00:00,  8.80it/s]


Noch van Salladine 86/160


100%|██████████| 75/75 [00:08<00:00,  9.19it/s]


O crux lignum triumphale 87/160


100%|██████████| 74/74 [00:11<00:00,  6.45it/s]


Ogier van Denemarken 88/160


100%|██████████| 73/73 [00:10<00:00,  7.03it/s]


Ongeïdentificeerd (1) 89/160


100%|██████████| 72/72 [00:08<00:00,  8.66it/s]


Ongeïdentificeerd (2) 90/160


100%|██████████| 71/71 [00:07<00:00,  9.12it/s]


Ongeïdentificeerd (3) 91/160


100%|██████████| 70/70 [02:08<00:00,  1.83s/it]


Ongeïdentificeerd (4) 92/160


100%|██████████| 69/69 [00:07<00:00,  9.02it/s]


Ongeïdentificeerd (6) 93/160


100%|██████████| 68/68 [01:39<00:00,  1.47s/it]


Ongeïdentificeerd fragment 94/160


100%|██████████| 67/67 [00:07<00:00,  8.92it/s]


Ons heren passie 95/160


100%|██████████| 66/66 [01:27<00:00,  1.33s/it]


Parthonopeus van Bloys 96/160


100%|██████████| 65/65 [00:48<00:00,  1.35it/s]


Perchevael 97/160


100%|██████████| 64/64 [00:24<00:00,  2.66it/s]


Queeste van den Grale 98/160


100%|██████████| 63/63 [01:02<00:00,  1.01it/s]


Reinaerts historie 99/160


100%|██████████| 62/62 [00:25<00:00,  2.45it/s]


Reinout van Montalbaen 100/160


100%|██████████| 61/61 [00:06<00:00,  8.80it/s]


Renout van Montalbaen 101/160


100%|██████████| 60/60 [00:16<00:00,  3.59it/s]


Riddere metter mouwen 102/160


100%|██████████| 59/59 [00:21<00:00,  2.78it/s]


Rijmbijbel 103/160


100%|██████████| 58/58 [07:41<00:00,  7.96s/it] 


Rijmkroniek van Holland 104/160


100%|██████████| 57/57 [01:05<00:00,  1.15s/it]


Rijmkroniek van Woeringen 105/160


100%|██████████| 56/56 [00:39<00:00,  1.42it/s]


Roelantslied 106/160


100%|██████████| 55/55 [00:10<00:00,  5.47it/s]


Roman der Lorreinen I 107/160


100%|██████████| 54/54 [00:08<00:00,  6.16it/s]


Roman der Lorreinen II 108/160


100%|██████████| 53/53 [00:39<00:00,  1.34it/s]


Roman van Antiochië 109/160


100%|██████████| 52/52 [00:06<00:00,  8.47it/s]


Roman van Caesar 110/160


100%|██████████| 51/51 [00:08<00:00,  5.68it/s]


Roman van Cassamus (kort) 111/160


100%|██████████| 50/50 [00:11<00:00,  4.28it/s]


Roman van Cassant 112/160


100%|██████████| 49/49 [00:05<00:00,  9.03it/s]


Rose 113/160


100%|██████████| 48/48 [00:58<00:00,  1.22s/it]


Rubben 114/160


100%|██████████| 47/47 [00:05<00:00,  7.97it/s]


Saladin 115/160


100%|██████████| 46/46 [00:05<00:00,  8.39it/s]


Schepping 116/160


100%|██████████| 45/45 [00:05<00:00,  7.69it/s]


Segheliin van Jerusalem 117/160


100%|██████████| 44/44 [00:41<00:00,  1.06it/s]


Sint Patricius 118/160


100%|██████████| 43/43 [00:06<00:00,  7.10it/s]


Sint Servaes legende 119/160


100%|██████████| 42/42 [00:22<00:00,  1.84it/s]


Sinte Franciscus leven 120/160


100%|██████████| 41/41 [00:37<00:00,  1.09it/s]


Sinte Jans ewangelium alsoe 't Augustijnken gheexponeert heeft 121/160


100%|██████████| 40/40 [00:07<00:00,  5.48it/s]


Sinte Kerstine 122/160


100%|██████████| 39/39 [00:10<00:00,  3.86it/s]


Sinte Lutgard 123/160


100%|██████████| 38/38 [01:47<00:00,  2.84s/it]


Slag van Crecy 124/160


100%|██████████| 37/37 [00:04<00:00,  8.26it/s]


Speghel der wijsheit 125/160


100%|██████████| 36/36 [00:15<00:00,  2.35it/s]


Spiegel der jongers 126/160


100%|██████████| 35/35 [00:05<00:00,  6.81it/s]


Spiegel der sonden 127/160


100%|██████████| 34/34 [00:53<00:00,  1.56s/it]


Spiegel historiael (P1-P4) 128/160


100%|██████████| 33/33 [09:11<00:00, 16.71s/it]  


Spiegel historiael (P2) 129/160


100%|██████████| 32/32 [00:44<00:00,  1.38s/it]


Spiegel historiael (P4) 130/160


100%|██████████| 31/31 [00:08<00:00,  3.48it/s]


Spiegel historiael (P5) 131/160


100%|██████████| 30/30 [00:16<00:00,  1.78it/s]


Spieghel der menscheliker behoudenesse 132/160


100%|██████████| 29/29 [00:10<00:00,  2.80it/s]


Theophilus 133/160


100%|██████████| 28/28 [00:03<00:00,  8.47it/s]


Tien plaghen ende die tien ghebode 134/160


100%|██████████| 27/27 [00:03<00:00,  7.89it/s]


Torec 135/160


100%|██████████| 26/26 [00:03<00:00,  7.45it/s]


Tristant 136/160


100%|██████████| 25/25 [00:02<00:00, 11.49it/s]


Valentijn en Nameloos 137/160


100%|██████████| 24/24 [00:02<00:00, 10.31it/s]


Van Saladijn 138/160


100%|██████████| 23/23 [00:02<00:00, 10.90it/s]


Van den VII Vroeden van binnen Rome 139/160


100%|██████████| 22/22 [00:03<00:00,  6.00it/s]


Van den derden Eduwaert 140/160


100%|██████████| 21/21 [00:02<00:00,  8.26it/s]


Van den neghen besten (kort) 141/160


100%|██████████| 20/20 [00:01<00:00, 11.53it/s]


Van den neghen besten (lang) 142/160


100%|██████████| 19/19 [00:01<00:00, 10.08it/s]


Van den vijf vrouden 143/160


100%|██████████| 18/18 [00:01<00:00, 11.48it/s]


Van den vos Reynaerde 144/160


100%|██████████| 17/17 [00:02<00:00,  6.54it/s]


Van der manen zeden 145/160


100%|██████████| 16/16 [00:01<00:00, 10.83it/s]


Van der wive wonderlijcheit (lange versie) 146/160


100%|██████████| 15/15 [00:01<00:00, 11.06it/s]


Van ghevene 147/160


100%|██████████| 14/14 [00:01<00:00, 11.18it/s]


Van sente Brandane 148/160


100%|██████████| 13/13 [00:01<00:00,  7.36it/s]


Van ses vaerwen ende twaelf outheyden 149/160


100%|██████████| 12/12 [00:01<00:00, 10.63it/s]


Van smeinscen lede 150/160


100%|██████████| 11/11 [00:01<00:00,  7.26it/s]


Vande Hertogen ende Heeren van Brabant 151/160


100%|██████████| 10/10 [00:00<00:00, 10.18it/s]


Vanden levene ons heren 152/160


100%|██████████| 9/9 [00:01<00:00,  4.73it/s]


Vander rijcheit ende vander doot 153/160


100%|██████████| 8/8 [00:00<00:00, 10.87it/s]


Vlaamse Aiol 154/160


100%|██████████| 7/7 [00:00<00:00,  7.88it/s]


Vlaamse Rose 155/160


100%|██████████| 6/6 [00:01<00:00,  5.79it/s]


Walewein (Penninc) 156/160


100%|██████████| 5/5 [00:01<00:00,  4.15it/s]


Walewein (Vostaert) 157/160


100%|██████████| 4/4 [00:00<00:00,  7.20it/s]


Walewein ende Keye 158/160


100%|██████████| 3/3 [00:00<00:00,  8.95it/s]


Willem van Oringen 159/160


100%|██████████| 2/2 [00:00<00:00, 11.47it/s]


Wisselau 160/160


100%|██████████| 1/1 [00:00<00:00, 10.16it/s]


Unnamed: 0,t1,t2,l1,l2,hits,genre1,genre2,subgenre1,subgenre2,author1,author2,hit_ratio
10014,Noch meer van wiven,Van der wive wonderlijcheit (lange versie),111,225,149,Epiek,Epiek,Didactiek,Didactiek,,,0.005966
7245,Gwidekijn van Sassen,Ongeïdentificeerd (4),199,139,35,Epiek,Epiek,Karel,Karel,,,0.001265
10484,Ongeïdentificeerd (4),Roman van Cassant,139,29,5,Epiek,Epiek,Karel,Kruisvaart,,,0.001240
7265,Gwidekijn van Sassen,Roman van Cassant,199,29,5,Epiek,Epiek,Karel,Kruisvaart,,,0.000866
10481,Ongeïdentificeerd (4),Roman van Antiochië,139,168,16,Epiek,Epiek,Karel,Kruisvaart,,,0.000685
...,...,...,...,...,...,...,...,...,...,...,...,...
7086,Godevaerts kintshede,Van der wive wonderlijcheit (lange versie),55,225,0,Epiek,Epiek,Kruisvaart,Didactiek,,,0.000000
7085,Godevaerts kintshede,Van der manen zeden,55,301,0,Epiek,Epiek,Kruisvaart,Didactiek,,,0.000000
7083,Godevaerts kintshede,Van den vijf vrouden,55,72,0,Epiek,Epiek,Kruisvaart,Didactiek,,,0.000000
7078,Godevaerts kintshede,Van Saladijn,55,289,0,Epiek,Epiek,Kruisvaart,Kruisvaart,,Hein van Aken,0.000000
