In [1]:
import glob
import os
from collections import Counter
from itertools import combinations

import numpy as np
np.random.seed(18012023)

import pandas as pd
pd.set_option('display.max_colwidth', 0)

import seaborn as sb

from sklearn.metrics import pairwise_distances
import lxml.etree
from scipy.spatial.distance import pdist, squareform
from tqdm import tqdm
from sklearn.feature_extraction.text import TfidfVectorizer

import matplotlib.pyplot as plt
import matplotlib
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.family'] = 'Arial'

In this notebook, we run the intertext retrieval (with the calibrated parameters) on all of the narrative (epic) texts available on the *Cd-rom Middelnederlands*. We dump the results to a spreadsheet that we will statistically analyze in the next notebook.

First we make sure that the output direcotry exists:

In [2]:
fig_dir = '../figures'
if not os.path.isdir(fig_dir):
    os.mkdir(fig_dir)

We load the metadata (with the subgenre information etc.):

In [3]:
meta_df = pd.read_excel('../data/metadata_corrected.xlsx')
meta_df = meta_df[meta_df['exclude'] != 'x']
meta_df.sample(10)

Unnamed: 0,id,title,author,provenance,date_range,genre,subgenre,exclude
201,glose_op_het_sint-jansevangelie,Sinte Jans ewangelium alsoe 't Augustijnken gheexponeert heeft,Augustijnken,cdrom-mnl,1400-1420,Epiek,Didactiek,
64,florigout_fragm_db,Florigout,,cdrom-mnl,1375-1400,Epiek,Ridder,
112,loyhier_en_malaert_fragm_g,Loyhier en Malaert,,cdrom-mnl,1350-1400,Epiek,Karel,
87,historie_van_troyen,Historie van Troyen,Jacob van Maerlant,cdrom-mnl,1465-1485,Epiek,Ridder,
169,renout_van_montalbaen_fragm_s,Renout van Montalbaen,,cdrom-mnl,1490-1510,Epiek,Karel,
215,spiegel_historiael__1_3_4_maerlant,Spiegel historiael (P1-P4),Jacob van Maerlant,cdrom-mnl,1300-1325,Epiek,Historiografie,
211,spiegel_der_sonden,Spiegel der sonden,,cdrom-mnl,1440-1460,Epiek,Didactiek,
124,madelgijs_fragm_r,Madelgijs,,cdrom-mnl,1340-1360,Epiek,Karel,
167,renout_van_montalbaen_fragm_be,Renout van Montalbaen,,cdrom-mnl,1340-1360,Epiek,Karel,
240,theophilus,Theophilus,,cdrom-mnl,1400-1420,Epiek,Heiligenleven,


Next, we redefine the extraction functions from the previous notebooks that we'll need again:

In [4]:
def get_verse_groups(verses, size=2, intertexts=False):
    for i in range(len(verses) - (size - 1)):
        if not intertexts:
            yield ' / '.join(verses[i : i + size])
        else:
            its = Counter(verses[i : i + size])
            if None in its:
                yield None
            elif len(its) > 1:
                yield 'overlap'
            else:
                yield list(its.keys())[0]

In [5]:
def parse_xml(fn, rm_interpol=False):
    try:
        tree = lxml.etree.parse(fn)
    except OSError:
        print(f'- Could not load {fn}')
        return None
        
    if rm_interpol:
        for interpolation in tree.xpath("//interpolation"):
            interpolation.getparent().remove(interpolation)
        
    for line_node in tree.iterfind('.//l'):
        try:
            intertext_id = line_node.attrib['intertext']
        except KeyError:
            intertext_id = None
        
        tokens_ = line_node.attrib['tokens'].split()
        lemmas_ = []
        
        lemma_tags = [l.text for l in line_node.iterfind('.//lemma')]
        pos_tags = [p.text for p in line_node.iterfind('.//pos')]
        
        for lemma, pos in zip(lemma_tags, pos_tags):
            for l, p in zip(lemma.split('+'), pos.split('+')):
                    if p == 'n(prop)':
                        lemmas_.append('n(prop)')
                    else:
                        lemmas_.append(l)
    
        yield tokens_, lemmas_, intertext_id

In [6]:
GROUP_SIZE = 2

titles, tokens, lemmas, intertexts = [], [], [], []

for title, group in tqdm(meta_df.groupby('title')):
    work_tokens, work_lemmas, work_intertexts = [], [], []
    
    for id_ in sorted(group['id']):
        for tok, lem, intertext_id in parse_xml(f'../data/xml/{id_}.xml', rm_interpol=True):
            work_tokens.append(tok)
            work_lemmas.append(lem)
            work_intertexts.append(intertext_id)
    
    verse_tokens = [' '.join(v) for v in work_tokens]
    verse_lemmas = [' '.join(v) for v in work_lemmas]

    verse_group_tokens = list(get_verse_groups(verse_tokens, size=GROUP_SIZE))
    verse_group_lemmas = list(get_verse_groups(verse_lemmas, size=GROUP_SIZE))
    verse_group_intertexts = list(get_verse_groups(work_intertexts, size=GROUP_SIZE, intertexts=True))

    tokens.extend(verse_group_tokens)
    lemmas.extend(verse_group_lemmas)
    intertexts.extend(verse_group_intertexts)
    titles.extend([title] * len(verse_group_lemmas))

100%|██████████| 204/204 [00:15<00:00, 12.95it/s]


Now, we can load all the verse groups from all the texts, as well as the associated metdata into a single dataframe:

In [7]:
df = pd.DataFrame(zip(titles, tokens, lemmas, intertexts), columns=('title', 'tokens', 'lemmas', 'intertext'))

Finally, we can add the separate column for the lemmas in rhyming position:

In [8]:
def tokenizer(text):
    return text.replace(' / ', ' ').lower().strip().split()

def add_rhyme_column(df):
    rhyme_words = []
    for lemmas in df['lemmas']:
        rhymes = []
        for verse in lemmas.split(' / '):
            rhymes.append(verse.strip().split()[-1])
        rhyme_words.append(' '.join(rhymes))
    df['rhyme'] = rhyme_words
    return df

In [9]:
df = add_rhyme_column(df)
df.head()

Unnamed: 0,title,tokens,lemmas,intertext,rhyme
0,AB recht ende averecht,Aensiet dese vrouwen hoe si gaen / Besiet hoe haer tuten staen,aanzien deze vrouw hoe zij gaan / bezien hoe zij de staan,,gaan staan
1,AB recht ende averecht,Besiet hoe haer tuten staen / Claer dat si hen blanketten,bezien hoe zij de staan / klaar dat zij zij n(prop),,staan n(prop)
2,AB recht ende averecht,Claer dat si hen blanketten / Die cleeder soe lanc dat si hen letten,klaar dat zij zij n(prop) / de kleed zo lang dat zij zij letten,,n(prop) letten
3,AB recht ende averecht,Die cleeder soe lanc dat si hen letten / Ende sleypen hen nae al op die eerde,de kleed zo lang dat zij zij letten / en slapen zij na al op de aarde,,letten aarde
4,AB recht ende averecht,Ende sleypen hen nae al op die eerde / Fi diere vuylder hoverde,en slapen zij na al op de aarde / fi duur vouwer hovaardij,,aarde hovaardij


In the previous notebook, these turned out to be the optimal hyperparameters for the retrieval system:

In [10]:
optim_vs = 9000
optim_rw = 0.1693877551020408
optim_th = 0.4217159759743446

In [11]:
def batch(iterable, n=1):
    l = iterable.shape[0]
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

def pairwise(AX, BX, batch_size=5000):
    all_distances = None

    for ax in batch(AX, batch_size):
        if all_distances is None:
            all_distances = pairwise_distances(ax, BX, metric='cosine')
        else:
            all_distances = np.append(all_distances,
                                      pairwise_distances(ax, BX, metric='cosine'),
                                      axis=0)
    
    return all_distances

In [12]:
def retrieve_intertexts(title1, title2, distances, threshold):
    A = df[df['title'] == title1]
    B = df[df['title'] == title2]
    
    intertexts = []
    for idx1, idx2 in np.transpose(np.nonzero(distances < threshold)):
        tokens1 = A.iloc[idx1]['tokens']
        tokens2 = B.iloc[idx2]['tokens']
        distance = distances[idx1, idx2]
        intertexts.append((title1, title2, tokens1, tokens2, distance))
        
    return pd.DataFrame(intertexts, columns=['title1', 'title2', 'tokens1', 'tokens2', 'distance'])

def plot_min_distance_nn(t1, t2, min_dists, threshold, rolling_size=1000):
    mu = min_dists.rolling(rolling_size).mean()
    fig, ax = plt.subplots()
    mu.plot(ax=ax)
    ax.axhline(threshold, ls='--', c='grey')
    ax.set_ylim(0, 1)
    ax.set_xlabel(t1)
    ax.set_ylabel('Distance to NN in ' + t2)
    return ax

We well restrict the analysis to all narrative texts (in rhyming couplets), or the so-called "Epiek":

In [13]:
chiv_titles = meta_df[meta_df['genre'] == 'Epiek']['title'].unique()

In [14]:
vec = TfidfVectorizer(max_features=optim_vs, min_df=2,
                      tokenizer=tokenizer, token_pattern=None).fit(df['lemmas'])

We will exhaustively combine all texts, but we make sure to only combine two texts once (because our retrieval is non-directional):

In [15]:
combs = sorted([sorted(comb) for comb in combinations(chiv_titles, 2)])
combs

[['Alexanders geesten', 'Alexiuslegende'],
 ['Alexanders geesten', 'Anatomie van de mens'],
 ['Alexanders geesten', 'Arturs doet'],
 ['Alexanders geesten', 'Aubri de Borgengoen'],
 ['Alexanders geesten', 'Barlaam en Josaphat'],
 ['Alexanders geesten', 'Beatrijs'],
 ['Alexanders geesten', 'Bediedenisse van der missen'],
 ['Alexanders geesten', 'Beerte metten breden voeten'],
 ['Alexanders geesten', 'Beginsel der oorlogen van Luyck'],
 ['Alexanders geesten', 'Boec Exemplaer'],
 ['Alexanders geesten', 'Boec van den houte'],
 ['Alexanders geesten', 'Boec vander wraken'],
 ['Alexanders geesten', 'Boeve van Hamtone'],
 ['Alexanders geesten', 'Borchgrave van Couchi'],
 ['Alexanders geesten', 'Borchgravinne van Vergi (1)'],
 ['Alexanders geesten', 'Borchgravinne van Vergi (2)'],
 ['Alexanders geesten', 'Boudewijn van Seborch'],
 ['Alexanders geesten', 'Brabantsche yeesten (B1-5)'],
 ['Alexanders geesten', 'Brabantsche yeesten (B6)'],
 ['Alexanders geesten', 'Brabantsche yeesten (B7)'],
 ['Alex

In [16]:
lookup = {}
for t1, t2 in combs:
    try:
        lookup[t1].append(t2)
    except KeyError:
        lookup[t1] = [t2]
lookup

{'Alexanders geesten': ['Alexiuslegende',
  'Anatomie van de mens',
  'Arturs doet',
  'Aubri de Borgengoen',
  'Barlaam en Josaphat',
  'Beatrijs',
  'Bediedenisse van der missen',
  'Beerte metten breden voeten',
  'Beginsel der oorlogen van Luyck',
  'Boec Exemplaer',
  'Boec van den houte',
  'Boec vander wraken',
  'Boeve van Hamtone',
  'Borchgrave van Couchi',
  'Borchgravinne van Vergi (1)',
  'Borchgravinne van Vergi (2)',
  'Boudewijn van Seborch',
  'Brabantsche yeesten (B1-5)',
  'Brabantsche yeesten (B6)',
  'Brabantsche yeesten (B7)',
  "Chanson d'Aspremont",
  'Collectief lunarium I',
  'Collectief lunarium II',
  'Collectief lunarium III',
  'Commentaar op het Hooglied',
  'Cyromanchie van den pape van den Hamme',
  'Der leken spieghel',
  'Der mannen ende vrouwen heimelijcheit',
  'Der minnen loep',
  'Der naturen bloeme',
  'Der vrouwen heimelijcheit',
  'Der ystorien bloeme',
  'Dietsche Lucidarius',
  'Dietsche doctrinale',
  'Enaamse Codex',
  'Esopet',
  'Expositi

The following block (which takes a while to run) runs over all text combinations and calculates the distance between all of the respective verse groups. We keep track of the length of both texts in the pair and count the number of "hits", or combinations of verse groups that have a smaller distance than the threshold value, which we found to be optimal in the manually annotated data:

In [17]:
results, intertexts = [], []

for cnt, t1 in enumerate(lookup):
    print(t1, f'{cnt+1}/{len(lookup)}')
    a1 = meta_df[meta_df['title'] == t1]['author'].iloc[0]
    g1 = meta_df[meta_df['title'] == t1]['genre'].iloc[0]
    sg1 = meta_df[meta_df['title'] == t1]['subgenre'].iloc[0]
    
    A = df[df['title'] == t1]
    AX = vec.transform(A['lemmas']) + optim_rw * vec.transform(A['rhyme'])
    
    for t2 in tqdm(lookup[t1]):
        a2 = meta_df[meta_df['title'] == t2]['author'].iloc[0] 
        g2 = meta_df[meta_df['title'] == t2]['genre'].iloc[0]
        sg2 = meta_df[meta_df['title'] == t2]['subgenre'].iloc[0]
        
        B = df[df['title'] == t2]
        BX = vec.transform(B['lemmas']) + optim_rw * vec.transform(B['rhyme'])
        
        # get NNs for statistics
        distances = pairwise(AX, BX, batch_size=10000)
        l1, l2 = len(df[df['title'] == t1]), len(df[df['title'] == t2])
        hits = np.sum(distances <= optim_th)
        results.append((t1, t2, l1, l2, hits, g1, g2, sg1, sg2, a1, a2))
    
        # collect actual intertexts
        #intertexts.append(retrieve_intertexts(t1, t2, distances=distances, threshold=threshold))
    
#intertexts = pd.concat(intertexts)
#intertexts.to_excel('../figures/intertexts.xlsx', index=False)

results = pd.DataFrame(results, columns=['t1', 't2',
                                         'l1', 'l2', 'hits',
                                         'genre1', 'genre2',
                                         'subgenre1', 'subgenre2',
                                         'author1', 'author2'])
results['hit_ratio'] = results['hits'] / (results['l1'] * results['l2'])
results = results.sort_values(by='hit_ratio', ascending=False)
results.to_excel('../figures/statistics.xlsx', index=False)
results

Alexanders geesten 1/153


100%|██████████| 153/153 [02:35<00:00,  1.02s/it]


Alexiuslegende 2/153


100%|██████████| 152/152 [00:16<00:00,  9.40it/s]


Anatomie van de mens 3/153


100%|██████████| 151/151 [00:17<00:00,  8.80it/s]


Arturs doet 4/153


100%|██████████| 150/150 [02:18<00:00,  1.08it/s]


Aubri de Borgengoen 5/153


100%|██████████| 149/149 [00:18<00:00,  8.20it/s]


Barlaam en Josaphat 6/153


100%|██████████| 148/148 [00:16<00:00,  8.77it/s]


Beatrijs 7/153


100%|██████████| 147/147 [00:23<00:00,  6.37it/s]


Bediedenisse van der missen 8/153


100%|██████████| 146/146 [00:24<00:00,  5.84it/s]


Beerte metten breden voeten 9/153


100%|██████████| 145/145 [00:16<00:00,  8.92it/s]


Beginsel der oorlogen van Luyck 10/153


100%|██████████| 144/144 [00:19<00:00,  7.51it/s]


Boec Exemplaer 11/153


100%|██████████| 143/143 [00:17<00:00,  8.39it/s]


Boec van den houte 12/153


100%|██████████| 142/142 [00:21<00:00,  6.61it/s]


Boec vander wraken 13/153


100%|██████████| 141/141 [00:57<00:00,  2.45it/s]


Boeve van Hamtone 14/153


100%|██████████| 140/140 [00:15<00:00,  9.09it/s]


Borchgrave van Couchi 15/153


100%|██████████| 139/139 [00:44<00:00,  3.10it/s]


Borchgravinne van Vergi (1) 16/153


100%|██████████| 138/138 [00:23<00:00,  5.84it/s]


Borchgravinne van Vergi (2) 17/153


100%|██████████| 137/137 [00:18<00:00,  7.40it/s]


Boudewijn van Seborch 18/153


100%|██████████| 136/136 [00:17<00:00,  7.62it/s]


Brabantsche yeesten (B1-5) 19/153


100%|██████████| 135/135 [01:07<00:00,  2.00it/s]


Brabantsche yeesten (B6) 20/153


100%|██████████| 134/134 [01:58<00:00,  1.13it/s]


Brabantsche yeesten (B7) 21/153


100%|██████████| 133/133 [03:04<00:00,  1.39s/it]


Chanson d'Aspremont 22/153


100%|██████████| 132/132 [00:14<00:00,  9.20it/s]


Collectief lunarium I 23/153


100%|██████████| 131/131 [00:14<00:00,  8.96it/s]


Collectief lunarium II 24/153


100%|██████████| 130/130 [00:17<00:00,  7.62it/s]


Collectief lunarium III 25/153


100%|██████████| 129/129 [00:16<00:00,  7.66it/s]


Commentaar op het Hooglied 26/153


100%|██████████| 128/128 [01:43<00:00,  1.24it/s]


Cyromanchie van den pape van den Hamme 27/153


100%|██████████| 127/127 [00:17<00:00,  7.22it/s]


Der leken spieghel 28/153


100%|██████████| 126/126 [04:16<00:00,  2.03s/it]


Der mannen ende vrouwen heimelijcheit 29/153


100%|██████████| 125/125 [00:29<00:00,  4.24it/s]


Der minnen loep 30/153


100%|██████████| 124/124 [01:40<00:00,  1.24it/s]


Der naturen bloeme 31/153


100%|██████████| 123/123 [02:27<00:00,  1.20s/it]


Der vrouwen heimelijcheit 32/153


100%|██████████| 122/122 [00:23<00:00,  5.13it/s]


Der ystorien bloeme 33/153


100%|██████████| 121/121 [00:42<00:00,  2.82it/s]


Dietsche Lucidarius 34/153


100%|██████████| 120/120 [00:53<00:00,  2.25it/s]


Dietsche doctrinale 35/153


100%|██████████| 119/119 [00:55<00:00,  2.15it/s]


Enaamse Codex 36/153


100%|██████████| 118/118 [00:36<00:00,  3.25it/s]


Esopet 37/153


100%|██████████| 117/117 [00:22<00:00,  5.20it/s]


Expositie vanden viere vingheren ende vanden dume 38/153


100%|██████████| 116/116 [00:13<00:00,  8.60it/s]


Ferguut 39/153


100%|██████████| 115/115 [00:49<00:00,  2.32it/s]


Fierabras 40/153


100%|██████████| 114/114 [00:14<00:00,  8.01it/s]


Flandrijs 41/153


100%|██████████| 113/113 [00:24<00:00,  4.58it/s]


Florent ende Durant 42/153


100%|██████████| 112/112 [00:12<00:00,  8.63it/s]


Florigout 43/153


100%|██████████| 111/111 [00:19<00:00,  5.80it/s]


Florimont 44/153


100%|██████████| 110/110 [00:13<00:00,  7.98it/s]


Floris ende Blancefloer 45/153


100%|██████████| 109/109 [00:40<00:00,  2.71it/s]


Flovent 46/153


100%|██████████| 108/108 [00:15<00:00,  6.89it/s]


Floyris ende Blantseflur 47/153


100%|██████████| 107/107 [00:13<00:00,  8.09it/s]


Fysionomie 48/153


100%|██████████| 106/106 [00:13<00:00,  8.00it/s]


Gedicht over de hemeltekenen 49/153


100%|██████████| 105/105 [00:12<00:00,  8.57it/s]


Geraert van Viane 50/153


100%|██████████| 104/104 [00:12<00:00,  8.38it/s]


Godevaert metten baerde 51/153


100%|██████████| 103/103 [00:11<00:00,  8.89it/s]


Godevaerts kintshede 52/153


100%|██████████| 102/102 [00:11<00:00,  8.97it/s]


Grimbergse oorlog 53/153


100%|██████████| 101/101 [01:44<00:00,  1.04s/it]


Gwidekijn van Sassen 54/153


100%|██████████| 100/100 [00:12<00:00,  8.29it/s]


Heymelijchede der heymelijcheit 55/153


100%|██████████| 99/99 [00:24<00:00,  4.09it/s]


Historie van Gaver Capeel 56/153


100%|██████████| 98/98 [00:11<00:00,  8.26it/s]


Historie van Troyen 57/153


100%|██████████| 97/97 [09:35<00:00,  5.93s/it]


Huge van Bordeeus 58/153


100%|██████████| 96/96 [00:15<00:00,  6.30it/s]


Iechemas 59/153


100%|██████████| 95/95 [00:12<00:00,  7.62it/s]


Jan Splinters testament 60/153


100%|██████████| 94/94 [00:10<00:00,  8.58it/s]


Jans teesteye 61/153


100%|██████████| 93/93 [00:32<00:00,  2.83it/s]


Jonathas ende Rosafiere 62/153


100%|██████████| 92/92 [00:17<00:00,  5.27it/s]


Karel ende Elegast 63/153


100%|██████████| 91/91 [00:17<00:00,  5.26it/s]


Korte kroniek van Brabant (korte versie 1) 64/153


100%|██████████| 90/90 [00:11<00:00,  7.66it/s]


Korte kroniek van Brabant (lange versie) 65/153


100%|██████████| 89/89 [00:21<00:00,  4.16it/s]


Lanceloet en het hert met de witte voet 66/153


100%|██████████| 88/88 [00:14<00:00,  6.18it/s]


Lancelot 67/153


100%|██████████| 87/87 [09:25<00:00,  6.50s/it] 


Lantsloot van der Haghedochte 68/153


100%|██████████| 86/86 [00:42<00:00,  2.03it/s]


Lapidarijs 69/153


100%|██████████| 85/85 [00:10<00:00,  7.78it/s]


Leven van Sint Trudo (berijming) 70/153


100%|██████████| 84/84 [00:10<00:00,  7.92it/s]


Limborch 71/153


100%|██████████| 83/83 [03:15<00:00,  2.36s/it]


Limburgse Aiol 72/153


100%|██████████| 82/82 [00:11<00:00,  7.11it/s]


Lion van Bourges 73/153


100%|██████████| 81/81 [00:09<00:00,  8.76it/s]


Loyhier en Malaert 74/153


100%|██████████| 80/80 [00:10<00:00,  7.55it/s]


Madelgijs 75/153


100%|██████████| 79/79 [00:19<00:00,  4.06it/s]


Melibeus 76/153


100%|██████████| 78/78 [00:26<00:00,  2.97it/s]


Merlijn 77/153


100%|██████████| 77/77 [00:55<00:00,  1.38it/s]


Merlijn-continuatie 78/153


100%|██████████| 76/76 [04:25<00:00,  3.49s/it]


Moriaen 79/153


100%|██████████| 75/75 [00:29<00:00,  2.58it/s]


Natuurkunde van het geheelal 80/153


100%|██████████| 74/74 [00:15<00:00,  4.70it/s]


Nevelingenlied 81/153


100%|██████████| 73/73 [00:08<00:00,  8.46it/s]


Nieuwe doctrinael 82/153


100%|██████████| 72/72 [00:20<00:00,  3.49it/s]


Noch die tien ghebode 83/153


100%|██████████| 71/71 [00:08<00:00,  8.71it/s]


Noch meer van wiven 84/153


100%|██████████| 70/70 [00:08<00:00,  8.63it/s]


Noch van Salladine 85/153


100%|██████████| 69/69 [00:07<00:00,  8.98it/s]


O crux lignum triumphale 86/153


100%|██████████| 68/68 [00:10<00:00,  6.22it/s]


Ogier van Denemarken 87/153


100%|██████████| 67/67 [00:09<00:00,  6.95it/s]


Ongeïdentificeerd (1) 88/153


100%|██████████| 66/66 [00:07<00:00,  8.55it/s]


Ongeïdentificeerd (2) 89/153


100%|██████████| 65/65 [00:07<00:00,  8.97it/s]


Ongeïdentificeerd (3) 90/153


100%|██████████| 64/64 [00:08<00:00,  7.43it/s]


Ongeïdentificeerd (4) 91/153


100%|██████████| 63/63 [00:07<00:00,  8.19it/s]


Ongeïdentificeerd (6) 92/153


100%|██████████| 62/62 [00:07<00:00,  8.10it/s]


Ongeïdentificeerd fragment 93/153


100%|██████████| 61/61 [00:07<00:00,  8.09it/s]


Ons heren passie 94/153


100%|██████████| 60/60 [00:11<00:00,  5.37it/s]


Parthonopeus van Bloys 95/153


100%|██████████| 59/59 [00:48<00:00,  1.21it/s]


Perchevael 96/153


100%|██████████| 58/58 [00:23<00:00,  2.50it/s]


Queeste van den Grale 97/153


100%|██████████| 57/57 [01:02<00:00,  1.09s/it]


Reinaerts historie 98/153


100%|██████████| 56/56 [00:24<00:00,  2.26it/s]


Reinout van Montalbaen 99/153


100%|██████████| 55/55 [00:06<00:00,  8.32it/s]


Renout van Montalbaen 100/153


100%|██████████| 54/54 [00:16<00:00,  3.30it/s]


Riddere metter mouwen 101/153


100%|██████████| 53/53 [00:20<00:00,  2.56it/s]


Rijmbijbel 102/153


100%|██████████| 52/52 [07:34<00:00,  8.74s/it]


Rijmkroniek van Holland 103/153


100%|██████████| 51/51 [01:06<00:00,  1.30s/it]


Rijmkroniek van Woeringen 104/153


100%|██████████| 50/50 [00:39<00:00,  1.27it/s]


Roelantslied 105/153


100%|██████████| 49/49 [00:09<00:00,  5.12it/s]


Roman der Lorreinen I 106/153


100%|██████████| 48/48 [00:08<00:00,  5.73it/s]


Roman der Lorreinen II 107/153


100%|██████████| 47/47 [00:39<00:00,  1.20it/s]


Roman van Antiochië 108/153


100%|██████████| 46/46 [00:05<00:00,  8.01it/s]


Roman van Caesar 109/153


100%|██████████| 45/45 [00:08<00:00,  5.22it/s]


Roman van Cassamus (kort) 110/153


100%|██████████| 44/44 [00:11<00:00,  3.92it/s]


Roman van Cassant 111/153


100%|██████████| 43/43 [00:05<00:00,  8.53it/s]


Rose 112/153


100%|██████████| 42/42 [00:57<00:00,  1.37s/it]


Rubben 113/153


100%|██████████| 41/41 [00:05<00:00,  7.87it/s]


Saladin 114/153


100%|██████████| 40/40 [00:04<00:00,  8.26it/s]


Schepping 115/153


100%|██████████| 39/39 [00:05<00:00,  7.56it/s]


Segheliin van Jerusalem 116/153


100%|██████████| 38/38 [00:38<00:00,  1.03s/it]


Sint Patricius 117/153


100%|██████████| 37/37 [00:05<00:00,  6.99it/s]


Sint Servaes legende 118/153


100%|██████████| 36/36 [00:20<00:00,  1.75it/s]


Sinte Franciscus leven 119/153


100%|██████████| 35/35 [00:34<00:00,  1.02it/s]


Sinte Jans ewangelium alsoe 't Augustijnken gheexponeert heeft 120/153


100%|██████████| 34/34 [00:06<00:00,  5.24it/s]


Sinte Kerstine 121/153


100%|██████████| 33/33 [00:08<00:00,  3.69it/s]


Sinte Lutgard 122/153


100%|██████████| 32/32 [01:41<00:00,  3.18s/it]


Slag van Crecy 123/153


100%|██████████| 31/31 [00:03<00:00,  7.98it/s]


Spiegel der jongers 124/153


100%|██████████| 30/30 [00:04<00:00,  6.41it/s]


Spiegel der sonden 125/153


100%|██████████| 29/29 [00:51<00:00,  1.77s/it]


Spiegel historiael (P1-P4) 126/153


100%|██████████| 28/28 [09:12<00:00, 19.73s/it]  


Spiegel historiael (P2) 127/153


100%|██████████| 27/27 [00:42<00:00,  1.57s/it]


Spiegel historiael (P4) 128/153


100%|██████████| 26/26 [00:08<00:00,  3.19it/s]


Spiegel historiael (P5) 129/153


100%|██████████| 25/25 [00:15<00:00,  1.60it/s]


Spieghel der menscheliker behoudenesse 130/153


100%|██████████| 24/24 [00:09<00:00,  2.59it/s]


Theophilus 131/153


100%|██████████| 23/23 [00:02<00:00,  8.03it/s]


Tien plaghen ende die tien ghebode 132/153


100%|██████████| 22/22 [00:02<00:00,  7.67it/s]


Torec 133/153


100%|██████████| 21/21 [00:02<00:00,  7.26it/s]


Tristant 134/153


100%|██████████| 20/20 [00:01<00:00, 11.23it/s]


Valentijn en Nameloos 135/153


100%|██████████| 19/19 [00:01<00:00, 10.24it/s]


Van den derden Eduwaert 136/153


100%|██████████| 18/18 [00:02<00:00,  7.87it/s]


Van den neghen besten (kort) 137/153


100%|██████████| 17/17 [00:01<00:00, 11.37it/s]


Van den neghen besten (lang) 138/153


100%|██████████| 16/16 [00:01<00:00,  9.65it/s]


Van den vos Reynaerde 139/153


100%|██████████| 15/15 [00:02<00:00,  6.20it/s]


Van der manen zeden 140/153


100%|██████████| 14/14 [00:01<00:00, 10.77it/s]


Van der wive wonderlijcheit (lange versie) 141/153


100%|██████████| 13/13 [00:01<00:00, 10.84it/s]


Van sente Brandane 142/153


100%|██████████| 12/12 [00:01<00:00,  7.02it/s]


Van ses vaerwen ende twaelf outheyden 143/153


100%|██████████| 11/11 [00:01<00:00, 10.54it/s]


Van smeinscen lede 144/153


100%|██████████| 10/10 [00:01<00:00,  7.08it/s]


Vande Hertogen ende Heeren van Brabant 145/153


100%|██████████| 9/9 [00:00<00:00,  9.97it/s]


Vanden levene ons heren 146/153


100%|██████████| 8/8 [00:01<00:00,  4.37it/s]


Vlaamse Aiol 147/153


100%|██████████| 7/7 [00:00<00:00,  8.01it/s]


Vlaamse Rose 148/153


100%|██████████| 6/6 [00:01<00:00,  5.78it/s]


Walewein ende Keye 149/153


100%|██████████| 5/5 [00:00<00:00,  5.62it/s]


Walewein-Penninc 150/153


100%|██████████| 4/4 [00:00<00:00,  4.60it/s]


Walewein-Vostaert 151/153


100%|██████████| 3/3 [00:00<00:00,  8.51it/s]


Willem van Oringen 152/153


100%|██████████| 2/2 [00:00<00:00, 10.54it/s]


Wisselau 153/153


100%|██████████| 1/1 [00:00<00:00, 10.41it/s]


Unnamed: 0,t1,t2,l1,l2,hits,genre1,genre2,subgenre1,subgenre2,author1,author2,hit_ratio
9352,Noch meer van wiven,Van der wive wonderlijcheit (lange versie),111,225,149,Epiek,Epiek,Didactiek,Didactiek,,,0.005966
6767,Gwidekijn van Sassen,Ongeïdentificeerd (4),199,139,35,Epiek,Epiek,Karel,Karel,,,0.001265
9784,Ongeïdentificeerd (4),Roman van Cassant,139,29,5,Epiek,Epiek,Karel,Kruisvaart,,,0.001240
5296,Fierabras,Roman van Cassant,314,29,8,Epiek,Epiek,Karel,Kruisvaart,,,0.000879
6787,Gwidekijn van Sassen,Roman van Cassant,199,29,5,Epiek,Epiek,Karel,Kruisvaart,,,0.000866
...,...,...,...,...,...,...,...,...,...,...,...,...
9657,Ongeïdentificeerd (2),Roman van Cassant,2,29,0,Epiek,Epiek,Karel,Kruisvaart,,,0.000000
9658,Ongeïdentificeerd (2),Rose,2,14409,0,Epiek,Epiek,Karel,Didactiek,,,0.000000
9659,Ongeïdentificeerd (2),Rubben,2,243,0,Epiek,Epiek,Karel,Didactiek,,,0.000000
9660,Ongeïdentificeerd (2),Saladin,2,143,0,Epiek,Epiek,Karel,Kruisvaart,,,0.000000
