## Corpus testing

This notebook takes works with known labels and looks how good it is attributed to its author. 

In [1]:
import pandas as pd
import numpy as np

import re

from sklearn.preprocessing import StandardScaler

import logging

logging.basicConfig(level="INFO")
logger = logging.getLogger("ruzicka")

from ruzicka.BDIVerifier import BDIVerifier

### I. Authors to themselves

In [2]:
# load preprocessed data

corpus = pd.read_csv('03_tests/authors_themselves/to-test_2000_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,la,les,vous,il,l,...,effet,mieux,donner,jamais,enfin,voir,rendre,ici,beaucoup,cependant
0,Avis,Baudeau,0,0__Baudeau_Avis,3.45,1.40,2.35,0.65,1.25,0.65,...,0.05,0.00,0.05,0.00,0.20,0.00,0.00,0.05,0.05,0.00
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.60,2.40,1.65,0.85,2.20,...,0.25,0.00,0.05,0.10,0.00,0.00,0.00,0.00,0.00,0.00
2,Explication,Baudeau,0,0__Baudeau_Explication,4.90,3.10,4.65,0.95,1.40,1.15,...,0.00,0.00,0.05,0.00,0.05,0.05,0.00,0.10,0.05,0.00
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1,4.65,3.00,3.20,0.10,0.80,2.00,...,0.00,0.10,0.00,0.20,0.00,0.05,0.00,0.10,0.00,0.00
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2,4.65,3.00,3.35,0.05,0.80,2.10,...,0.00,0.10,0.00,0.20,0.00,0.05,0.00,0.10,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2692,elements T3,Marmontel,94,94__Marmontel_elements T3,5.45,3.70,1.60,0.00,1.10,3.05,...,0.00,0.00,0.15,0.20,0.00,0.05,0.05,0.00,0.10,0.05
2693,elements T1,Marmontel,95,95__Marmontel_elements T1,4.35,3.25,0.95,0.10,2.05,3.45,...,0.05,0.05,0.05,0.05,0.05,0.20,0.35,0.00,0.00,0.05
2694,elements T3,Marmontel,95,95__Marmontel_elements T3,4.20,3.10,2.95,0.10,1.65,2.65,...,0.00,0.00,0.10,0.15,0.00,0.20,0.05,0.05,0.05,0.05
2695,elements T3,Marmontel,96,96__Marmontel_elements T3,4.15,3.00,1.15,0.00,1.45,2.75,...,0.05,0.05,0.05,0.05,0.05,0.00,0.10,0.00,0.00,0.00


In [3]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...
2692,elements T3,Marmontel,94,94__Marmontel_elements T3
2693,elements T1,Marmontel,95,95__Marmontel_elements T1
2694,elements T3,Marmontel,95,95__Marmontel_elements T3
2695,elements T3,Marmontel,96,96__Marmontel_elements T3


In [4]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(2697, 200)

In [5]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur les besoins 1,0,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,0,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...,...
2692,elements T3,10,Marmontel,94,94__Marmontel_elements T3
2693,elements T1,10,Marmontel,95,95__Marmontel_elements T1
2694,elements T3,10,Marmontel,95,95__Marmontel_elements T3
2695,elements T3,10,Marmontel,96,96__Marmontel_elements T3


In [6]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [7]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [9]:
# Loop that takes a work from each author
# and looks whether its (known) author is a good much according to BDI;
# nb: not getting results for a work vs all authors
# (writing results in a .csv, visualisation done later with R)

# select an author, look for his unique works
for a in label_uniques.values:
    #print(author)
    
    # take a subset with only works by an author, find unique works
    unique_works = raw_df[raw_df.author == a]['work'].unique()
    # unique_works is a numpy array of works of each author
    
    for w in unique_works:
        print(f"\n\n#######################\n{a}: {w}\n")
        
        # select each work as a problem set (problems-metadata, problems_X-word freqs)
        problems = raw_df[raw_df.work == w].reset_index(drop=True).copy()
        problems_X = X[raw_df.work == w].reset_index(drop=True).copy()
        print(problems.iloc[1:2,:])
        
        # extract the rest of the corpus
        rest = raw_df[raw_df.work != w].reset_index(drop=True).copy()
        rest_X = X[raw_df.work != w].reset_index(drop=True).copy()
        #print(f"\nRest of the corpus:\n{rest.iloc[1:6,:]}")
        
        # scaling
        ss = StandardScaler(with_mean=False).fit(rest_X)
        rest_scaled_X = ss.transform(rest_X)
        problems_scaled_X = ss.transform(problems_X)
        
        #print(problems_scaled_X)
        
        # fit 
        bdi_mm.fit(rest_scaled_X, rest.author_label)
        
        # prediction
        bdi_mm.predict_proba(
            problems_scaled_X, 
            # get_loc - select the author of the work (? is that right??)
            [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
        )
        
        # output results
        res = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
        
        # writing
        fh = '03_tests/authors_themselves/bdi_res/'+a+'_'+w+'.csv'
        res.to_csv(fh)
        print('Results written in:',fh)
        
        rest_scaled_X = None
        res = None

03/18/2025 01:23:23 [ruzicka:INFO] Fitting on 2672 documents...
03/18/2025 01:23:23 [ruzicka:INFO] Predicting on 25 documents




#######################
Baudeau: Avis 

    work  author_label   author  chunk_num               tag
1  Avis              0  Baudeau          1  1__Baudeau_Avis 


03/18/2025 01:23:27 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:23:27 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Avis .csv


#######################
Baudeau: Eclaircissemens

              work  author_label   author  chunk_num  \
1  Eclaircissemens             0  Baudeau          1   

                          tag  
1  1__Baudeau_Eclaircissemens  


03/18/2025 01:23:31 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:23:31 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Eclaircissemens.csv


#######################
Baudeau: Explication

          work  author_label   author  chunk_num                     tag
1  Explication             0  Baudeau          1  1__Baudeau_Explication


03/18/2025 01:23:33 [ruzicka:INFO] Fitting on 2661 documents...
03/18/2025 01:23:33 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Explication.csv


#######################
Baudeau: Idees sur les besoins 1

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 1             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 1  


03/18/2025 01:23:39 [ruzicka:INFO] Fitting on 2661 documents...
03/18/2025 01:23:39 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Idees sur les besoins 1.csv


#######################
Baudeau: Idees sur les besoins 2

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 2             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 2  


03/18/2025 01:23:45 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:23:45 [ruzicka:INFO] Predicting on 1 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Idees sur les besoins 2.csv


#######################
Baudeau: Lettre du fermier

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []


03/18/2025 01:23:45 [ruzicka:INFO] Fitting on 2683 documents...
03/18/2025 01:23:45 [ruzicka:INFO] Predicting on 14 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Lettre du fermier.csv


#######################
Baudeau: Lettres  a un magistraTtxt

                         work  author_label   author  chunk_num  \
1  Lettres  a un magistraTtxt             0  Baudeau          1   

                                     tag  
1  1__Baudeau_Lettres  a un magistraTtxt  


03/18/2025 01:23:48 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:23:48 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Lettres  a un magistraTtxt.csv


#######################
Baudeau: Resultats

        work  author_label   author  chunk_num                   tag
1  Resultats             0  Baudeau          1  1__Baudeau_Resultats


03/18/2025 01:23:48 [ruzicka:INFO] Fitting on 2629 documents...
03/18/2025 01:23:48 [ruzicka:INFO] Predicting on 68 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Resultats.csv


#######################
Chastellux: De la felicite

             work  author_label      author  chunk_num  \
1  De la felicite             1  Chastellux          1   

                            tag  
1  1__Chastellux_De la felicite  


03/18/2025 01:24:02 [ruzicka:INFO] Fitting on 2679 documents...
03/18/2025 01:24:02 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_De la felicite.csv


#######################
Chastellux: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


03/18/2025 01:24:06 [ruzicka:INFO] Fitting on 2659 documents...
03/18/2025 01:24:06 [ruzicka:INFO] Predicting on 38 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_eloge.csv


#######################
Chastellux: Voyage de Newport

                work  author_label      author  chunk_num  \
1  Voyage de Newport             1  Chastellux          1   

                               tag  
1  1__Chastellux_Voyage de Newport  


03/18/2025 01:24:14 [ruzicka:INFO] Fitting on 2656 documents...
03/18/2025 01:24:14 [ruzicka:INFO] Predicting on 41 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyage de Newport.csv


#######################
Chastellux: Voyages T1

         work  author_label      author  chunk_num                       tag
1  Voyages T1             1  Chastellux          1  1__Chastellux_Voyages T1


03/18/2025 01:24:23 [ruzicka:INFO] Fitting on 2660 documents...
03/18/2025 01:24:23 [ruzicka:INFO] Predicting on 37 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyages T1.csv


#######################
Chastellux: Voyages T2

         work  author_label      author  chunk_num                       tag
1  Voyages T2             1  Chastellux          1  1__Chastellux_Voyages T2


03/18/2025 01:24:31 [ruzicka:INFO] Fitting on 2663 documents...
03/18/2025 01:24:31 [ruzicka:INFO] Predicting on 34 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyages T2.csv


#######################
Condorcet: 12

  work  author_label     author  chunk_num              tag
1   12             2  Condorcet          1  1__Condorcet_12


03/18/2025 01:24:40 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:24:40 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_12.csv


#######################
Condorcet: 13

  work  author_label     author  chunk_num              tag
1   13             2  Condorcet          1  1__Condorcet_13


03/18/2025 01:24:48 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:24:48 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_13.csv


#######################
Condorcet: 15

  work  author_label     author  chunk_num              tag
1   15             2  Condorcet          1  1__Condorcet_15


03/18/2025 01:24:56 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:24:56 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_15.csv


#######################
Condorcet: 16

  work  author_label     author  chunk_num              tag
1   16             2  Condorcet          1  1__Condorcet_16


03/18/2025 01:25:05 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:25:05 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_16.csv


#######################
Condorcet: 17

  work  author_label     author  chunk_num              tag
1   17             2  Condorcet          1  1__Condorcet_17


03/18/2025 01:25:14 [ruzicka:INFO] Fitting on 2663 documents...
03/18/2025 01:25:14 [ruzicka:INFO] Predicting on 34 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_17.csv


#######################
Condorcet: 19

  work  author_label     author  chunk_num              tag
1   19             2  Condorcet          1  1__Condorcet_19


03/18/2025 01:25:23 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:25:23 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_19.csv


#######################
Condorcet: 20

  work  author_label     author  chunk_num              tag
1   20             2  Condorcet          1  1__Condorcet_20


03/18/2025 01:25:33 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:25:33 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_20.csv


#######################
Condorcet: 5

  work  author_label     author  chunk_num             tag
1    5             2  Condorcet          1  1__Condorcet_5


03/18/2025 01:25:42 [ruzicka:INFO] Fitting on 2674 documents...
03/18/2025 01:25:42 [ruzicka:INFO] Predicting on 23 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_5.csv


#######################
Deleyre: Analyse de la philosophie

                        work  author_label   author  chunk_num  \
1  Analyse de la philosophie             3  Deleyre          1   

                                    tag  
1  1__Deleyre_Analyse de la philosophie  


03/18/2025 01:25:45 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:25:45 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Analyse de la philosophie.csv


#######################
Deleyre: eloge de M Roux

              work  author_label   author  chunk_num  \
1  eloge de M Roux             3  Deleyre          1   

                          tag  
1  1__Deleyre_eloge de M Roux  


03/18/2025 01:25:46 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:25:46 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_eloge de M Roux.csv


#######################
Deleyre: epingle

      work  author_label   author  chunk_num                 tag
1  epingle             3  Deleyre          1  1__Deleyre_epingle


03/18/2025 01:25:46 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:25:46 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_epingle.csv


#######################
Deleyre: Essai sur la vie

               work  author_label   author  chunk_num  \
1  Essai sur la vie             3  Deleyre          1   

                           tag  
1  1__Deleyre_Essai sur la vie  


03/18/2025 01:25:50 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:25:50 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Essai sur la vie.csv


#######################
Deleyre: Idees sur l-education

                    work  author_label   author  chunk_num  \
1  Idees sur l-education             3  Deleyre          1   

                                tag  
1  1__Deleyre_Idees sur l-education  


03/18/2025 01:25:52 [ruzicka:INFO] Fitting on 2673 documents...
03/18/2025 01:25:52 [ruzicka:INFO] Predicting on 24 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Idees sur l-education.csv


#######################
Deleyre: L-esprit de Saint-evremont

                         work  author_label   author  chunk_num  \
1  L-esprit de Saint-evremont             3  Deleyre          1   

                                     tag  
1  1__Deleyre_L-esprit de Saint-evremont  


03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 1 documents
03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_L-esprit de Saint-evremont.csv


#######################
Deleyre: Rapport pour des Corses expatries

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []
Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Rapport pour des Corses expatries.csv


#######################
Deleyre: Sur la question

              work  author_label   author  chunk_num  \
1  Sur la question             3  Deleyre          1   

                          tag  
1  1__Deleyre_Sur la question  


03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Sur la question.csv


#######################
dHolbach: elements de la morale universelle

                                work  author_label    author  chunk_num  \
1  elements de la morale universelle             4  dHolbach          1   

                                             tag  
1  1__dHolbach_elements de la morale universelle  


03/18/2025 01:25:57 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:25:57 [ruzicka:INFO] Predicting on 1 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_elements de la morale universelle.csv


#######################
dHolbach: Essai sur l-art de ramper

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []


03/18/2025 01:25:58 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:25:58 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Essai sur l-art de ramper.csv


#######################
dHolbach: ethocratie

         work  author_label    author  chunk_num                     tag
1  ethocratie             4  dHolbach          1  1__dHolbach_ethocratie


03/18/2025 01:26:03 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:26:03 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_ethocratie.csv


#######################
dHolbach: La Morale 1

          work  author_label    author  chunk_num                      tag
1  La Morale 1             4  dHolbach          1  1__dHolbach_La Morale 1


03/18/2025 01:26:06 [ruzicka:INFO] Fitting on 2664 documents...
03/18/2025 01:26:06 [ruzicka:INFO] Predicting on 33 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 1.csv


#######################
dHolbach: La Morale 4

          work  author_label    author  chunk_num                      tag
1  La Morale 4             4  dHolbach          1  1__dHolbach_La Morale 4


03/18/2025 01:26:12 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:26:12 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 4.csv


#######################
dHolbach: La Morale 5

          work  author_label    author  chunk_num                      tag
1  La Morale 5             4  dHolbach          1  1__dHolbach_La Morale 5


03/18/2025 01:26:18 [ruzicka:INFO] Fitting on 2676 documents...
03/18/2025 01:26:18 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 5.csv


#######################
dHolbach: Système Social 2

                work  author_label    author  chunk_num  \
1  Système Social 2             4  dHolbach          1   

                             tag  
1  1__dHolbach_Système Social 2  


03/18/2025 01:26:22 [ruzicka:INFO] Fitting on 2676 documents...
03/18/2025 01:26:22 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Système Social 2.csv


#######################
dHolbach: Theologie portative

                  work  author_label    author  chunk_num  \
1  Theologie portative             4  dHolbach          1   

                               tag  
1  1__dHolbach_Theologie portative  


03/18/2025 01:26:26 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:26:26 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Theologie portative.csv


#######################
Diderot: De la suffisance

               work  author_label   author  chunk_num  \
1  De la suffisance             5  Diderot          1   

                           tag  
1  1__Diderot_De la suffisance  


03/18/2025 01:26:26 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:26:26 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_De la suffisance.csv


#######################
Diderot: Essai sur la Peinture

                    work  author_label   author  chunk_num  \
1  Essai sur la Peinture             5  Diderot          1   

                                tag  
1  1__Diderot_Essai sur la Peinture  


03/18/2025 01:26:28 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:26:28 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Essai sur la Peinture.csv


#######################
Diderot: Fragments echappes (+FP1)

                        work  author_label   author  chunk_num  \
1  Fragments echappes (+FP1)             5  Diderot          1   

                                    tag  
1  1__Diderot_Fragments echappes (+FP1)  


03/18/2025 01:26:28 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:26:28 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Fragments echappes (+FP1).csv


#######################
Diderot: Recherches philosophiques

                        work  author_label   author  chunk_num  \
1  Recherches philosophiques             5  Diderot          1   

                                    tag  
1  1__Diderot_Recherches philosophiques  


03/18/2025 01:26:29 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:26:29 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Recherches philosophiques.csv


#######################
Diderot: Refutation suivie

                work  author_label   author  chunk_num  \
1  Refutation suivie             5  Diderot          1   

                            tag  
1  1__Diderot_Refutation suivie  


03/18/2025 01:26:33 [ruzicka:INFO] Fitting on 2685 documents...
03/18/2025 01:26:33 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Refutation suivie.csv


#######################
Diderot: Salon 1763

         work  author_label   author  chunk_num                    tag
1  Salon 1763             5  Diderot          1  1__Diderot_Salon 1763


03/18/2025 01:26:34 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:26:34 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Salon 1763.csv


#######################
Diderot: Salon 1775

         work  author_label   author  chunk_num                    tag
1  Salon 1775             5  Diderot          1  1__Diderot_Salon 1775


03/18/2025 01:26:35 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:26:35 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Salon 1775.csv


#######################
Diderot: Suite de l-apologie

                  work  author_label   author  chunk_num  \
1  Suite de l-apologie             5  Diderot          1   

                              tag  
1  1__Diderot_Suite de l-apologie  


03/18/2025 01:26:36 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:26:36 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Suite de l-apologie.csv


#######################
Guibert: Discurs

      work  author_label   author  chunk_num                 tag
1  Discurs             6  Guibert          1  1__Guibert_Discurs


03/18/2025 01:26:38 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:26:38 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Discurs.csv


#######################
Guibert: Eloge du roi

           work  author_label   author  chunk_num                      tag
1  Eloge du roi             6  Guibert          1  1__Guibert_Eloge du roi


03/18/2025 01:26:42 [ruzicka:INFO] Fitting on 2645 documents...
03/18/2025 01:26:42 [ruzicka:INFO] Predicting on 52 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Eloge du roi.csv


#######################
Guibert: Essai generale

             work  author_label   author  chunk_num                        tag
1  Essai generale             6  Guibert          1  1__Guibert_Essai generale


03/18/2025 01:26:54 [ruzicka:INFO] Fitting on 2644 documents...
03/18/2025 01:26:54 [ruzicka:INFO] Predicting on 53 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Essai generale.csv


#######################
Guibert: Oeuvres militaires T1

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T1             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T1  


03/18/2025 01:27:06 [ruzicka:INFO] Fitting on 2664 documents...
03/18/2025 01:27:06 [ruzicka:INFO] Predicting on 33 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T1.csv


#######################
Guibert: Oeuvres militaires T2

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T2             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T2  


03/18/2025 01:27:14 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:27:14 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T2.csv


#######################
Guibert: Oeuvres militaires T3

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T3             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T3  


03/18/2025 01:27:21 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:27:21 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T3.csv


#######################
Guibert: Oeuvres militaires T4

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T4             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T4  


03/18/2025 01:27:28 [ruzicka:INFO] Fitting on 2677 documents...
03/18/2025 01:27:28 [ruzicka:INFO] Predicting on 20 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T4.csv


#######################
Jaucourt: Ency 1-7

       work  author_label    author  chunk_num                   tag
1  Ency 1-7             7  Jaucourt          1  1__Jaucourt_Ency 1-7


03/18/2025 01:27:33 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:27:33 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 1-7.csv


#######################
Jaucourt: Ency 10

      work  author_label    author  chunk_num                  tag
1  Ency 10             7  Jaucourt          1  1__Jaucourt_Ency 10


03/18/2025 01:27:35 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:27:35 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 10.csv


#######################
Jaucourt: Ency 11

      work  author_label    author  chunk_num                  tag
1  Ency 11             7  Jaucourt          1  1__Jaucourt_Ency 11


03/18/2025 01:27:37 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:27:37 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 11.csv


#######################
Jaucourt: Ency 12

      work  author_label    author  chunk_num                  tag
1  Ency 12             7  Jaucourt          1  1__Jaucourt_Ency 12


03/18/2025 01:27:39 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:27:39 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 12.csv


#######################
Jaucourt: Ency 8

     work  author_label    author  chunk_num                 tag
1  Ency 8             7  Jaucourt          1  1__Jaucourt_Ency 8


03/18/2025 01:27:41 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:27:41 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 8.csv


#######################
Jaucourt: Ency 9

     work  author_label    author  chunk_num                 tag
1  Ency 9             7  Jaucourt          1  1__Jaucourt_Ency 9


03/18/2025 01:27:43 [ruzicka:INFO] Fitting on 2637 documents...
03/18/2025 01:27:43 [ruzicka:INFO] Predicting on 60 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 9.csv


#######################
Jaucourt: Essais de teodicee T1

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T1             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T1  


03/18/2025 01:27:54 [ruzicka:INFO] Fitting on 2637 documents...
03/18/2025 01:27:54 [ruzicka:INFO] Predicting on 60 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Essais de teodicee T1.csv


#######################
Jaucourt: Essais de teodicee T2

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T2             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T2  


03/18/2025 01:28:05 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:28:05 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Essais de teodicee T2.csv


#######################
Jussieu: Exposition d-un nouvel ordre

                           work  author_label   author  chunk_num  \
1  Exposition d-un nouvel ordre             8  Jussieu          1   

                                       tag  
1  1__Jussieu_Exposition d-un nouvel ordre  


03/18/2025 01:28:06 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:28:06 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Exposition d-un nouvel ordre.csv


#######################
Jussieu: HDI T1-4

       work  author_label   author  chunk_num                  tag
1  HDI T1-4             8  Jussieu          1  1__Jussieu_HDI T1-4


03/18/2025 01:28:07 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:28:07 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_HDI T1-4.csv


#######################
Jussieu: Memoire caractères 1-14

                       work  author_label   author  chunk_num  \
1  Memoire caractères 1-14             8  Jussieu          1   

                                   tag  
1  1__Jussieu_Memoire caractères 1-14  


03/18/2025 01:28:12 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:28:12 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Memoire caractères 1-14.csv


#######################
Jussieu: Memoire famille

              work  author_label   author  chunk_num  \
1  Memoire famille             8  Jussieu          1   

                          tag  
1  1__Jussieu_Memoire famille  


03/18/2025 01:28:13 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:28:13 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Memoire famille.csv


#######################
Jussieu: Methode naturelle

                work  author_label   author  chunk_num  \
1  Methode naturelle             8  Jussieu          1   

                            tag  
1  1__Jussieu_Methode naturelle  


03/18/2025 01:28:14 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:28:14 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Methode naturelle.csv


#######################
Jussieu: Notice Historique 1-6

                    work  author_label   author  chunk_num  \
1  Notice Historique 1-6             8  Jussieu          1   

                                tag  
1  1__Jussieu_Notice Historique 1-6  


03/18/2025 01:28:16 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:28:16 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Notice Historique 1-6.csv


#######################
Jussieu: Principes

        work  author_label   author  chunk_num                   tag
1  Principes             8  Jussieu          1  1__Jussieu_Principes


03/18/2025 01:28:17 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:28:17 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Principes.csv


#######################
Jussieu: RapporTtxt

         work  author_label   author  chunk_num                    tag
1  RapporTtxt             8  Jussieu          1  1__Jussieu_RapporTtxt


03/18/2025 01:28:18 [ruzicka:INFO] Fitting on 2656 documents...
03/18/2025 01:28:18 [ruzicka:INFO] Predicting on 41 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_RapporTtxt.csv


#######################
Jussieu: Traite des vertus

                work  author_label   author  chunk_num  \
1  Traite des vertus             8  Jussieu          1   

                            tag  
1  1__Jussieu_Traite des vertus  


03/18/2025 01:28:24 [ruzicka:INFO] Fitting on 2658 documents...
03/18/2025 01:28:24 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Traite des vertus.csv


#######################
La Grange: Lucrece T1

         work  author_label     author  chunk_num                      tag
1  Lucrece T1             9  La Grange          1  1__La Grange_Lucrece T1


03/18/2025 01:28:28 [ruzicka:INFO] Fitting on 2651 documents...
03/18/2025 01:28:28 [ruzicka:INFO] Predicting on 46 documents


Results written in: 03_tests/authors_themselves/bdi_res/La Grange_Lucrece T1.csv


#######################
La Grange: Lucrece T2

         work  author_label     author  chunk_num                      tag
1  Lucrece T2             9  La Grange          1  1__La Grange_Lucrece T2


03/18/2025 01:28:33 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:28:33 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/La Grange_Lucrece T2.csv


#######################
Marmontel: Belisaire

        work  author_label     author  chunk_num                     tag
1  Belisaire            10  Marmontel          1  1__Marmontel_Belisaire


03/18/2025 01:28:45 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:28:45 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Belisaire.csv


#######################
Marmontel: Contes T1

        work  author_label     author  chunk_num                     tag
1  Contes T1            10  Marmontel          1  1__Marmontel_Contes T1


03/18/2025 01:28:58 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:28:58 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T1.csv


#######################
Marmontel: Contes T2

        work  author_label     author  chunk_num                     tag
1  Contes T2            10  Marmontel          1  1__Marmontel_Contes T2


03/18/2025 01:29:12 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:29:12 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T2.csv


#######################
Marmontel: Contes T3

        work  author_label     author  chunk_num                     tag
1  Contes T3            10  Marmontel          1  1__Marmontel_Contes T3


03/18/2025 01:29:24 [ruzicka:INFO] Fitting on 2601 documents...
03/18/2025 01:29:24 [ruzicka:INFO] Predicting on 96 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T3.csv


#######################
Marmontel: elements T1

          work  author_label     author  chunk_num                       tag
1  elements T1            10  Marmontel          1  1__Marmontel_elements T1


03/18/2025 01:29:59 [ruzicka:INFO] Fitting on 2612 documents...
03/18/2025 01:29:59 [ruzicka:INFO] Predicting on 85 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T1.csv


#######################
Marmontel: elements T2

          work  author_label     author  chunk_num                       tag
1  elements T2            10  Marmontel          1  1__Marmontel_elements T2


03/18/2025 01:30:32 [ruzicka:INFO] Fitting on 2599 documents...
03/18/2025 01:30:32 [ruzicka:INFO] Predicting on 98 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T2.csv


#######################
Marmontel: elements T3

          work  author_label     author  chunk_num                       tag
1  elements T3            10  Marmontel          1  1__Marmontel_elements T3


03/18/2025 01:31:08 [ruzicka:INFO] Fitting on 2660 documents...
03/18/2025 01:31:08 [ruzicka:INFO] Predicting on 37 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T3.csv


#######################
Marmontel: Les Incas

        work  author_label     author  chunk_num                     tag
1  Les Incas            10  Marmontel          1  1__Marmontel_Les Incas


03/18/2025 01:31:24 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:24 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Les Incas.csv


#######################
Meister: De l-origine

           work  author_label   author  chunk_num                      tag
1  De l-origine            11  Meister          1  1__Meister_De l-origine


03/18/2025 01:31:24 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:31:24 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Meister_De l-origine.csv


#######################
Meister: De la morale

           work  author_label   author  chunk_num                      tag
1  De la morale            11  Meister          1  1__Meister_De la morale


03/18/2025 01:31:25 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:31:25 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Meister_De la morale.csv


#######################
Morellet: FragmenTtxt

          work  author_label    author  chunk_num                      tag
1  FragmenTtxt            12  Morellet          1  1__Morellet_FragmenTtxt


03/18/2025 01:31:25 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:25 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_FragmenTtxt.csv


#######################
Morellet: Le cri

     work  author_label    author  chunk_num                 tag
1  Le cri            12  Morellet          1  1__Morellet_Le cri


03/18/2025 01:31:26 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:31:26 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Le cri.csv


#######################
Morellet: Nouvelles

        work  author_label    author  chunk_num                    tag
1  Nouvelles            12  Morellet          1  1__Morellet_Nouvelles


03/18/2025 01:31:26 [ruzicka:INFO] Fitting on 2692 documents...
03/18/2025 01:31:26 [ruzicka:INFO] Predicting on 5 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Nouvelles.csv


#######################
Morellet: Observations

           work  author_label    author  chunk_num                       tag
1  Observations            12  Morellet          1  1__Morellet_Observations


03/18/2025 01:31:27 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:27 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Observations.csv


#######################
Morellet: Quelques

       work  author_label    author  chunk_num                   tag
1  Quelques            12  Morellet          1  1__Morellet_Quelques


03/18/2025 01:31:27 [ruzicka:INFO] Fitting on 2683 documents...
03/18/2025 01:31:27 [ruzicka:INFO] Predicting on 14 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Quelques.csv


#######################
Morellet: Reflexions

         work  author_label    author  chunk_num                     tag
1  Reflexions            12  Morellet          1  1__Morellet_Reflexions


03/18/2025 01:31:29 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:31:29 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Reflexions.csv


#######################
Morellet: SupplemenTtxt

            work  author_label    author  chunk_num                        tag
1  SupplemenTtxt            12  Morellet          1  1__Morellet_SupplemenTtxt


03/18/2025 01:31:30 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:31:30 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_SupplemenTtxt.csv


#######################
Morellet: Theorie

      work  author_label    author  chunk_num                  tag
1  Theorie            12  Morellet          1  1__Morellet_Theorie


03/18/2025 01:31:32 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:31:32 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Theorie.csv


#######################
Naigeon: Le militaire

           work  author_label   author  chunk_num                      tag
1  Le militaire            13  Naigeon          1  1__Naigeon_Le militaire


03/18/2025 01:31:34 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:31:34 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Le militaire.csv


#######################
Naigeon: Liberte

      work  author_label   author  chunk_num                 tag
1  Liberte            13  Naigeon          1  1__Naigeon_Liberte


03/18/2025 01:31:35 [ruzicka:INFO] Fitting on 2643 documents...
03/18/2025 01:31:35 [ruzicka:INFO] Predicting on 54 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Liberte.csv


#######################
Naigeon: Manuel d-epictete

                work  author_label   author  chunk_num  \
1  Manuel d-epictete            13  Naigeon          1   

                            tag  
1  1__Naigeon_Manuel d-epictete  


03/18/2025 01:31:41 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:31:41 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Manuel d-epictete.csv


#######################
Naigeon: Richesse

       work  author_label   author  chunk_num                  tag
1  Richesse            13  Naigeon          1  1__Naigeon_Richesse


03/18/2025 01:31:41 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:31:41 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Richesse.csv


#######################
Naigeon: Unitaires

        work  author_label   author  chunk_num                   tag
1  Unitaires            13  Naigeon          1  1__Naigeon_Unitaires


03/18/2025 01:31:43 [ruzicka:INFO] Fitting on 2679 documents...
03/18/2025 01:31:43 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Unitaires.csv


#######################
Pechmeja: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


03/18/2025 01:31:44 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:31:44 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Pechmeja_eloge.csv


#######################
Pechmeja: Telephe

      work  author_label    author  chunk_num                  tag
1  Telephe            14  Pechmeja          1  1__Pechmeja_Telephe


03/18/2025 01:31:47 [ruzicka:INFO] Fitting on 2674 documents...
03/18/2025 01:31:47 [ruzicka:INFO] Predicting on 23 documents


Results written in: 03_tests/authors_themselves/bdi_res/Pechmeja_Telephe.csv


#######################
Raynal: Anecdotes historiques 1

                      work  author_label  author  chunk_num  \
1  Anecdotes historiques 1            15  Raynal          1   

                                 tag  
1  1__Raynal_Anecdotes historiques 1  


03/18/2025 01:31:52 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:31:52 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes historiques 1.csv


#######################
Raynal: Anecdotes historiques 2

                      work  author_label  author  chunk_num  \
1  Anecdotes historiques 2            15  Raynal          1   

                                 tag  
1  1__Raynal_Anecdotes historiques 2  


03/18/2025 01:31:58 [ruzicka:INFO] Fitting on 2675 documents...
03/18/2025 01:31:58 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes historiques 2.csv


#######################
Raynal: Anecdotes litteraires2 1756

                          work  author_label  author  chunk_num  \
1  Anecdotes litteraires2 1756            15  Raynal          1   

                                     tag  
1  1__Raynal_Anecdotes litteraires2 1756  


03/18/2025 01:32:03 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:32:03 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes litteraires2 1756.csv


#######################
Raynal: ecole 2

      work  author_label  author  chunk_num                tag
1  ecole 2            15  Raynal          1  1__Raynal_ecole 2


03/18/2025 01:32:09 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:32:09 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_ecole 2.csv


#######################
Raynal: ecole 3

      work  author_label  author  chunk_num                tag
1  ecole 3            15  Raynal          1  1__Raynal_ecole 3


03/18/2025 01:32:15 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:32:15 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_ecole 3.csv


#######################
Raynal: Histoire du Parlement 1748

                         work  author_label  author  chunk_num  \
1  Histoire du Parlement 1748            15  Raynal          1   

                                    tag  
1  1__Raynal_Histoire du Parlement 1748  


03/18/2025 01:32:20 [ruzicka:INFO] Fitting on 2677 documents...
03/18/2025 01:32:20 [ruzicka:INFO] Predicting on 20 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement 1748.csv


#######################
Raynal: Histoire du Parlement Band 1

                           work  author_label  author  chunk_num  \
1  Histoire du Parlement Band 1            15  Raynal          1   

                                      tag  
1  1__Raynal_Histoire du Parlement Band 1  


03/18/2025 01:32:25 [ruzicka:INFO] Fitting on 2680 documents...
03/18/2025 01:32:25 [ruzicka:INFO] Predicting on 17 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement Band 1.csv


#######################
Raynal: Histoire du Parlement Band 2

                           work  author_label  author  chunk_num  \
1  Histoire du Parlement Band 2            15  Raynal          1   

                                      tag  
1  1__Raynal_Histoire du Parlement Band 2  


03/18/2025 01:32:29 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:32:29 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement Band 2.csv


#######################
Rivière: De l-instruction publique  1775

                              work  author_label    author  chunk_num  \
1  De l-instruction publique  1775            16  Rivière          1   

                                           tag  
1  1__Rivière_De l-instruction publique  1775  


03/18/2025 01:32:31 [ruzicka:INFO] Fitting on 2657 documents...
03/18/2025 01:32:31 [ruzicka:INFO] Predicting on 40 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_De l-instruction publique  1775.csv


#######################
Rivière: L-Interet

        work  author_label    author  chunk_num                    tag
1  L-Interet            16  Rivière          1  1__Rivière_L-Interet


03/18/2025 01:32:36 [ruzicka:INFO] Fitting on 2618 documents...
03/18/2025 01:32:36 [ruzicka:INFO] Predicting on 79 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_L-Interet.csv


#######################
Rivière: L-ordre naturel

              work  author_label    author  chunk_num  \
1  L-ordre naturel            16  Rivière          1   

                           tag  
1  1__Rivière_L-ordre naturel  


03/18/2025 01:32:45 [ruzicka:INFO] Fitting on 2692 documents...
03/18/2025 01:32:45 [ruzicka:INFO] Predicting on 5 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_L-ordre naturel.csv


#######################
Saint-Lambert: Les deux Amis

            work  author_label         author  chunk_num  \
1  Les deux Amis            17  Saint-Lambert          1   

                              tag  
1  1__Saint-Lambert_Les deux Amis  


03/18/2025 01:32:46 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:32:46 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Les deux Amis.csv


#######################
Saint-Lambert: Oeuvres Phil T1

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T1            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T1  


03/18/2025 01:32:54 [ruzicka:INFO] Fitting on 2658 documents...
03/18/2025 01:32:54 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T1.csv


#######################
Saint-Lambert: Oeuvres Phil T2

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T2            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T2  


03/18/2025 01:33:02 [ruzicka:INFO] Fitting on 2653 documents...
03/18/2025 01:33:02 [ruzicka:INFO] Predicting on 44 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T2.csv


#######################
Saint-Lambert: Oeuvres Phil T3

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T3            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T3  


03/18/2025 01:33:10 [ruzicka:INFO] Fitting on 2652 documents...
03/18/2025 01:33:10 [ruzicka:INFO] Predicting on 45 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T3.csv


#######################
Saint-Lambert: Oeuvres Phil T4

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T4            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T4  


03/18/2025 01:33:19 [ruzicka:INFO] Fitting on 2675 documents...
03/18/2025 01:33:19 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T4.csv


#######################
Saint-Lambert: Oeuvres Phil T5

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T5            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T5  
Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T5.csv


# FP1

Test FP1 chunks with BDI

In [52]:
# load preprocessed data

corpus = pd.read_csv('03_tests/fp1/2000_words/FP1_2000_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,des,et,la,les,vous,...,voit,rendre,beaucoup,seul,ici,mieux,donner,enfin,voir,cependant
0,Avis,Baudeau,0,0__Baudeau_Avis,3.45,2.20,2.50,1.40,2.35,0.65,...,0.05,0.00,0.05,0.05,0.05,0.00,0.05,0.20,0.00,0.00
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.85,2.80,2.60,2.40,1.65,...,0.05,0.00,0.00,0.05,0.00,0.00,0.05,0.00,0.00,0.00
2,Explication,Baudeau,0,0__Baudeau_Explication,4.90,2.60,3.80,3.10,4.65,0.95,...,0.00,0.00,0.05,0.10,0.10,0.00,0.05,0.05,0.05,0.00
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,5.60,1.90,2.20,3.65,1.85,0.10,...,0.00,0.00,0.10,0.10,0.00,0.05,0.05,0.10,0.00,0.05
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,4.90,1.85,2.50,2.35,2.75,0.10,...,0.00,0.10,0.05,0.00,0.00,0.10,0.00,0.10,0.05,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4429,elements T3,Marmontel,96,96__Marmontel_elements T3,4.15,1.20,3.35,3.00,1.15,0.00,...,0.05,0.10,0.00,0.25,0.00,0.05,0.05,0.05,0.00,0.00
4430,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,5.25,1.85,0.75,3.75,2.65,0.00,...,0.00,0.05,0.05,0.00,0.00,0.00,0.00,0.00,0.00,0.00
4431,elements T3,Marmontel,97,97__Marmontel_elements T3,4.60,0.95,3.10,3.40,1.55,0.00,...,0.00,0.10,0.00,0.00,0.00,0.00,0.05,0.00,0.00,0.00
4432,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,4.90,2.95,0.75,3.25,3.20,0.05,...,0.05,0.00,0.20,0.00,0.00,0.05,0.00,0.00,0.05,0.05


In [53]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4429,elements T3,Marmontel,96,96__Marmontel_elements T3
4430,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4431,elements T3,Marmontel,97,97__Marmontel_elements T3
4432,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [54]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4434, 200)

In [55]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4429,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4430,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4431,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4432,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [56]:
# select an "unknown" work

problems = raw_df[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,FP1,7,HDI,0,0__HDI_FP1
1,FP1,7,HDI,1,1__HDI_FP1
2,FP1,7,HDI,2,2__HDI_FP1


In [57]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "FP1"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "FP1"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4426,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4427,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4428,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4429,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


**Scaling**

In [58]:
ss = StandardScaler(with_mean=False).fit(rest_X)

In [59]:
rest_scaled_X = ss.transform(rest_X)
problems_scaled_X = ss.transform(problems_X)

**Verification**

In [60]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [61]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

03/18/2025 02:31:31 [ruzicka:INFO] Fitting on 4431 documents...


In [62]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [63]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

03/18/2025 02:31:35 [ruzicka:INFO] Predicting on 3 documents


Testing against Baudeau


03/18/2025 02:31:36 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.656 0.717 0.722]
Testing against Chastellux


03/18/2025 02:31:36 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.849 0.668 0.967]
Testing against Condorcet


03/18/2025 02:31:39 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.979 0.993 0.968]
Testing against Deleyre


03/18/2025 02:31:40 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.813 0.788 0.889]
Testing against dHolbach


03/18/2025 02:31:42 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.842 0.967 0.865]
Testing against Diderot


03/18/2025 02:31:43 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1. 1. 1.]
Testing against Guibert


03/18/2025 02:31:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.734 0.614 0.768]
Testing against HDI


03/18/2025 02:31:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.316 0.408 0.45 ]
Testing against Jaucourt


03/18/2025 02:31:45 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.787 0.673 0.714]
Testing against Jussieu


03/18/2025 02:31:45 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.329 0.279 0.275]
Testing against La Grange


03/18/2025 02:31:46 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.101 0.261 0.297]
Testing against Marmontel


03/18/2025 02:31:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.967 0.925 0.969]
Testing against Meister


03/18/2025 02:31:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.305 0.288 0.103]
Testing against Morellet


03/18/2025 02:31:48 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.935 0.844 0.744]
Testing against Naigeon


03/18/2025 02:31:48 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.835 0.702 0.354]
Testing against Pechmeja


03/18/2025 02:31:49 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.216 0.555 0.605]
Testing against Raynal


03/18/2025 02:31:50 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.421 0.559 0.657]
Testing against Rivière


03/18/2025 02:31:50 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.572 0.51  0.407]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.689 0.92  0.89 ]


In [64]:
authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '03_tests/fp1/2000_words/mfw200/fp1_vs_' + a + '.csv'

    x.to_csv(fh)

03/18/2025 02:31:57 [ruzicka:INFO] Predicting on 3 documents


Baudeau


03/18/2025 02:31:57 [ruzicka:INFO] Predicting on 3 documents


Chastellux


03/18/2025 02:31:58 [ruzicka:INFO] Predicting on 3 documents


Condorcet


03/18/2025 02:32:00 [ruzicka:INFO] Predicting on 3 documents


Deleyre


03/18/2025 02:32:01 [ruzicka:INFO] Predicting on 3 documents


dHolbach


03/18/2025 02:32:03 [ruzicka:INFO] Predicting on 3 documents


Diderot


03/18/2025 02:32:05 [ruzicka:INFO] Predicting on 3 documents


Guibert


03/18/2025 02:32:06 [ruzicka:INFO] Predicting on 3 documents


HDI


03/18/2025 02:32:06 [ruzicka:INFO] Predicting on 3 documents


Jaucourt


03/18/2025 02:32:07 [ruzicka:INFO] Predicting on 3 documents


Jussieu


03/18/2025 02:32:07 [ruzicka:INFO] Predicting on 3 documents


La Grange


03/18/2025 02:32:08 [ruzicka:INFO] Predicting on 3 documents


Marmontel


03/18/2025 02:32:09 [ruzicka:INFO] Predicting on 3 documents


Meister


03/18/2025 02:32:09 [ruzicka:INFO] Predicting on 3 documents


Morellet


03/18/2025 02:32:10 [ruzicka:INFO] Predicting on 3 documents


Naigeon


03/18/2025 02:32:10 [ruzicka:INFO] Predicting on 3 documents


Pechmeja


03/18/2025 02:32:11 [ruzicka:INFO] Predicting on 3 documents


Raynal


03/18/2025 02:32:12 [ruzicka:INFO] Predicting on 3 documents


Rivière


03/18/2025 02:32:12 [ruzicka:INFO] Predicting on 3 documents


Saint-Lambert


# III. Work vs all authors
Here random 2 works from each author are used to see how good BDI will recognise true author (same settings as in the actual analysis)

In [9]:
# load preprocessed data

corpus = pd.read_csv('03_tests/authors_vs_all/5-authors_5works_2k_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,la,les,des,et,l,...,fort,encore,autant,seul,plusieurs,assez,enfin,presque,beaucoup,donner
0,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.60,2.40,2.85,2.80,2.20,...,0.10,0.05,0.05,0.05,0.10,0.00,0.00,0.05,0.00,0.05
1,Explication,Baudeau,0,0__Baudeau_Explication,4.90,3.10,4.65,2.60,3.80,1.15,...,0.05,0.10,0.00,0.10,0.20,0.10,0.05,0.05,0.05,0.05
2,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2,2.00,1.25,1.50,2.15,1.15,1.00,...,0.00,0.05,0.00,0.00,0.00,0.00,0.00,0.05,0.00,0.00
3,Lettres a un magistraTtxt,Baudeau,0,0__Baudeau_Lettres a un magistraTtxt,4.15,3.60,4.00,2.55,3.20,1.00,...,0.05,0.25,0.05,0.20,0.05,0.10,0.05,0.00,0.10,0.00
4,Resultats,Baudeau,0,0__Baudeau_Resultats,4.60,2.45,2.95,3.30,3.05,2.20,...,0.00,0.15,0.00,0.00,0.00,0.00,0.00,0.05,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531,Anecdotes historiques 1,Raynal,9,9__Raynal_Anecdotes historiques 1,4.40,3.10,3.35,1.15,3.15,1.20,...,0.15,0.10,0.05,0.05,0.00,0.00,0.00,0.00,0.05,0.05
532,Anecdotes litteraires1 1756,Raynal,9,9__Raynal_Anecdotes litteraires1 1756,4.55,2.05,1.10,0.40,1.90,1.25,...,0.20,0.10,0.00,0.00,0.00,0.00,0.00,0.05,0.15,0.05
533,Anecdotes litteraires2 1756,Raynal,9,9__Raynal_Anecdotes litteraires2 1756,3.85,1.15,1.30,0.85,1.45,1.10,...,0.20,0.05,0.00,0.00,0.05,0.15,0.05,0.00,0.15,0.05
534,ecole 2,Raynal,9,9__Raynal_ecole 2,2.90,0.90,0.85,0.55,1.70,0.45,...,0.00,0.05,0.05,0.10,0.10,0.00,0.00,0.00,0.05,0.05


In [10]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
1,Explication,Baudeau,0,0__Baudeau_Explication
2,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2
3,Lettres a un magistraTtxt,Baudeau,0,0__Baudeau_Lettres a un magistraTtxt
4,Resultats,Baudeau,0,0__Baudeau_Resultats
...,...,...,...,...
531,Anecdotes historiques 1,Raynal,9,9__Raynal_Anecdotes historiques 1
532,Anecdotes litteraires1 1756,Raynal,9,9__Raynal_Anecdotes litteraires1 1756
533,Anecdotes litteraires2 1756,Raynal,9,9__Raynal_Anecdotes litteraires2 1756
534,ecole 2,Raynal,9,9__Raynal_ecole 2


In [11]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
1,Explication,0,Baudeau,0,0__Baudeau_Explication
2,Idees sur les besoins 2,0,Baudeau,0,0__Baudeau_Idees sur les besoins 2
3,Lettres a un magistraTtxt,0,Baudeau,0,0__Baudeau_Lettres a un magistraTtxt
4,Resultats,0,Baudeau,0,0__Baudeau_Resultats
...,...,...,...,...,...
531,Anecdotes historiques 1,4,Raynal,9,9__Raynal_Anecdotes historiques 1
532,Anecdotes litteraires1 1756,4,Raynal,9,9__Raynal_Anecdotes litteraires1 1756
533,Anecdotes litteraires2 1756,4,Raynal,9,9__Raynal_Anecdotes litteraires2 1756
534,ecole 2,4,Raynal,9,9__Raynal_ecole 2


In [12]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Condorcet', 'dHolbach', 'Diderot', 'Raynal'],
      dtype=object)

In [13]:
print('N unique works:', len(list(set(raw_df.work))))

unique_works = list(set(raw_df.work))

# test
for w in unique_works[0:3]:
    print(w)

N unique works: 25
Pensees philosophiques
Explication
2


## NB: CHANGE PATH

In [14]:
# testing each individual work
for w in unique_works:
    
    problems = raw_df[raw_df.work == w].reset_index(drop=True).copy()
    problems_X = X[raw_df.work == w].reset_index(drop=True).copy()
    #print(problems)
    
    # extract true author
    a_true = problems.author[0]
    # print(a_true)
    
    # extract the rest of the corpus
    rest = raw_df[raw_df.work != w].reset_index(drop=True).copy()
    rest_X = X[raw_df.work != w].reset_index(drop = True).copy()
    #print(rest)
    
    
    print('\n\n########################## Working with:', w, '---- by', a_true)
    
    ### Scaling

    sts = StandardScaler(with_mean=False).fit(rest_X)
    rest_scaled_X = sts.transform(rest_X)
    problems_scaled_X = sts.transform(problems_X)


    # set verifier
    rng = np.random.default_rng(42)

    bdi_mm = BDIVerifier(
        metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
    )

    # fit 
    bdi_mm.fit(rest_scaled_X, rest.author_label)
    
    for label in label_uniques.values:
        print(f"Testing against {label}")
        code = label_uniques.get_loc(label)
        print(
            f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
        )
    
    authors = label_uniques.values

    fh = ''
    
    # compare with each author and store results
    for a in authors:

        print(a)

        bdi_mm.predict_proba(
            problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
        )


        x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))

        fh = '03_tests/authors_vs_all/runs_5-authors/' + a_true + '-' + w + '_vs_' + a + '.csv'

        x.to_csv(fh)

03/19/2025 02:19:30 [ruzicka:INFO] Fitting on 530 documents...
03/19/2025 02:19:30 [ruzicka:INFO] Predicting on 6 documents




########################## Working with: Pensees philosophiques ---- by Diderot
Testing against Baudeau


03/19/2025 02:19:30 [ruzicka:INFO] Predicting on 6 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.152 0.231 0.355 0.22  0.484 0.318]
Testing against Condorcet


03/19/2025 02:19:32 [ruzicka:INFO] Predicting on 6 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.456 0.295 0.248 0.514 0.416 0.696]
Testing against dHolbach


03/19/2025 02:19:33 [ruzicka:INFO] Predicting on 6 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.466 0.269 0.576 0.726 0.404 0.712]
Testing against Diderot


03/19/2025 02:19:33 [ruzicka:INFO] Predicting on 6 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1.    1.    1.    0.993 1.    0.996]
Testing against Raynal


03/19/2025 02:19:34 [ruzicka:INFO] Predicting on 6 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.224 0.183 0.434 0.63  0.248 0.45 ]
Baudeau


03/19/2025 02:19:35 [ruzicka:INFO] Predicting on 6 documents


Condorcet


03/19/2025 02:19:36 [ruzicka:INFO] Predicting on 6 documents


dHolbach


03/19/2025 02:19:37 [ruzicka:INFO] Predicting on 6 documents


Diderot


03/19/2025 02:19:38 [ruzicka:INFO] Predicting on 6 documents


Raynal


03/19/2025 02:19:39 [ruzicka:INFO] Fitting on 525 documents...
03/19/2025 02:19:39 [ruzicka:INFO] Predicting on 11 documents




########################## Working with: Explication ---- by Baudeau
Testing against Baudeau


03/19/2025 02:19:40 [ruzicka:INFO] Predicting on 11 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.993 1.    1.    0.994 1.    1.    0.994 0.985 0.992 0.999 1.   ]
Testing against Condorcet


03/19/2025 02:19:43 [ruzicka:INFO] Predicting on 11 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.495 0.372 0.246 0.416 0.081 0.213 0.301 0.391 0.367 0.371 0.32 ]
Testing against dHolbach


03/19/2025 02:19:45 [ruzicka:INFO] Predicting on 11 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.229 0.215 0.229 0.219 0.292 0.266 0.284 0.783 0.559 0.322 0.3  ]
Testing against Diderot


03/19/2025 02:19:46 [ruzicka:INFO] Predicting on 11 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.798 0.613 0.342 0.553 0.513 0.435 0.667 0.643 0.799 0.554 0.492]
Testing against Raynal


03/19/2025 02:19:48 [ruzicka:INFO] Predicting on 11 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.324 0.235 0.207 0.41  0.11  0.273 0.289 0.255 0.261 0.229 0.123]
Baudeau


03/19/2025 02:19:49 [ruzicka:INFO] Predicting on 11 documents


Condorcet


03/19/2025 02:19:51 [ruzicka:INFO] Predicting on 11 documents


dHolbach


03/19/2025 02:19:53 [ruzicka:INFO] Predicting on 11 documents


Diderot


03/19/2025 02:19:55 [ruzicka:INFO] Predicting on 11 documents


Raynal


03/19/2025 02:19:56 [ruzicka:INFO] Fitting on 507 documents...
03/19/2025 02:19:56 [ruzicka:INFO] Predicting on 29 documents




########################## Working with: 2 ---- by Condorcet
Testing against Baudeau


03/19/2025 02:20:00 [ruzicka:INFO] Predicting on 29 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.269 0.047 0.157 0.068 0.167 0.491 0.233 0.027 0.337 0.235 0.25  0.336
 0.133 0.097 0.32  0.086 0.171 0.394 0.324 0.18  0.589 0.326 0.261 0.048
 0.198 0.114 0.209 0.338 0.494]
Testing against Condorcet


03/19/2025 02:20:05 [ruzicka:INFO] Predicting on 29 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1.    1.    0.999 1.    1.    1.    1.    1.    0.998 0.999 1.    1.
 1.    1.    0.999 0.999 0.995 1.    1.    0.999 0.998 0.997 0.997 1.
 1.    0.999 0.999 1.    0.999]
Testing against dHolbach


03/19/2025 02:20:10 [ruzicka:INFO] Predicting on 29 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.527 0.446 0.691 0.421 0.335 0.503 0.65  0.303 0.522 0.729 0.437 0.413
 0.384 0.467 0.409 0.62  0.645 0.29  0.559 0.467 0.639 0.754 0.763 0.331
 0.42  0.643 0.669 0.783 0.703]
Testing against Diderot


03/19/2025 02:20:14 [ruzicka:INFO] Predicting on 29 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.064 0.011 0.048 0.052 0.03  0.737 0.14  0.01  0.621 0.152 0.717 0.506
 0.087 0.057 0.107 0.098 0.635 0.504 0.033 0.764 0.681 0.121 0.711 0.024
 0.169 0.696 0.75  0.179 0.295]
Testing against Raynal


03/19/2025 02:20:18 [ruzicka:INFO] Predicting on 29 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.374 0.115 0.22  0.321 0.09  0.333 0.11  0.238 0.546 0.037 0.422 0.515
 0.069 0.248 0.09  0.381 0.556 0.545 0.084 0.297 0.449 0.131 0.369 0.4
 0.135 0.391 0.247 0.066 0.069]
Baudeau


03/19/2025 02:20:22 [ruzicka:INFO] Predicting on 29 documents


Condorcet


03/19/2025 02:20:27 [ruzicka:INFO] Predicting on 29 documents


dHolbach


03/19/2025 02:20:32 [ruzicka:INFO] Predicting on 29 documents


Diderot


03/19/2025 02:20:36 [ruzicka:INFO] Predicting on 29 documents


Raynal


03/19/2025 02:20:40 [ruzicka:INFO] Fitting on 528 documents...
03/19/2025 02:20:40 [ruzicka:INFO] Predicting on 8 documents




########################## Working with: Salon 1761 ---- by Diderot
Testing against Baudeau


03/19/2025 02:20:41 [ruzicka:INFO] Predicting on 8 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.288 0.39  0.275 0.224 0.363 0.229 0.306 0.535]
Testing against Condorcet


03/19/2025 02:20:43 [ruzicka:INFO] Predicting on 8 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.509 0.295 0.354 0.551 0.552 0.435 0.843 0.522]
Testing against dHolbach


03/19/2025 02:20:44 [ruzicka:INFO] Predicting on 8 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.18  0.29  0.188 0.214 0.36  0.306 0.393 0.246]
Testing against Diderot


03/19/2025 02:20:45 [ruzicka:INFO] Predicting on 8 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1.    0.998 0.999 0.996 0.997 0.994 0.984 0.988]
Testing against Raynal


03/19/2025 02:20:46 [ruzicka:INFO] Predicting on 8 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.634 0.764 0.605 0.606 0.641 0.766 0.553 0.783]
Baudeau


03/19/2025 02:20:48 [ruzicka:INFO] Predicting on 8 documents


Condorcet


03/19/2025 02:20:49 [ruzicka:INFO] Predicting on 8 documents


dHolbach


03/19/2025 02:20:50 [ruzicka:INFO] Predicting on 8 documents


Diderot


03/19/2025 02:20:51 [ruzicka:INFO] Predicting on 8 documents


Raynal


03/19/2025 02:20:53 [ruzicka:INFO] Fitting on 513 documents...
03/19/2025 02:20:53 [ruzicka:INFO] Predicting on 23 documents




########################## Working with: Anecdotes historiques 1 ---- by Raynal
Testing against Baudeau


03/19/2025 02:20:56 [ruzicka:INFO] Predicting on 23 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.391 0.07  0.038 0.029 0.055 0.111 0.041 0.346 0.17  0.1   0.061 0.286
 0.269 0.045 0.089 0.117 0.207 0.159 0.155 0.052 0.241 0.308 0.191]
Testing against Condorcet


03/19/2025 02:21:00 [ruzicka:INFO] Predicting on 23 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.966 0.692 0.469 0.287 0.41  0.458 0.503 0.994 0.942 0.523 0.56  0.451
 0.992 0.523 0.34  0.567 0.947 0.628 0.55  0.528 0.936 0.366 0.396]
Testing against dHolbach


03/19/2025 02:21:04 [ruzicka:INFO] Predicting on 23 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.55  0.175 0.089 0.235 0.149 0.19  0.158 0.623 0.709 0.136 0.149 0.178
 0.846 0.192 0.253 0.143 0.869 0.064 0.123 0.24  0.833 0.251 0.228]
Testing against Diderot


03/19/2025 02:21:07 [ruzicka:INFO] Predicting on 23 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.042 0.187 0.117 0.09  0.067 0.138 0.176 0.05  0.163 0.095 0.183 0.249
 0.081 0.126 0.146 0.435 0.073 0.308 0.175 0.205 0.006 0.458 0.307]
Testing against Raynal


03/19/2025 02:21:10 [ruzicka:INFO] Predicting on 23 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.809 1.    1.    1.    1.    1.    1.    0.543 0.884 0.999 0.998 1.
 0.522 0.999 1.    0.996 0.843 0.999 1.    1.    0.838 0.999 1.   ]
Baudeau


03/19/2025 02:21:13 [ruzicka:INFO] Predicting on 23 documents


Condorcet


03/19/2025 02:21:18 [ruzicka:INFO] Predicting on 23 documents


dHolbach


03/19/2025 02:21:22 [ruzicka:INFO] Predicting on 23 documents


Diderot


03/19/2025 02:21:24 [ruzicka:INFO] Predicting on 23 documents


Raynal


03/19/2025 02:21:28 [ruzicka:INFO] Fitting on 509 documents...
03/19/2025 02:21:28 [ruzicka:INFO] Predicting on 27 documents




########################## Working with: La Morale 5 ---- by dHolbach
Testing against Baudeau


03/19/2025 02:21:31 [ruzicka:INFO] Predicting on 27 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.367 0.371 0.326 0.306 0.709 0.625 0.713 0.514 0.583 0.359 0.269 0.483
 0.797 0.329 0.331 0.215 0.271 0.295 0.793 0.72  0.628 0.736 0.68  0.746
 0.88  0.667 0.695]
Testing against Condorcet


03/19/2025 02:21:37 [ruzicka:INFO] Predicting on 27 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.868 0.57  0.453 0.358 0.954 0.928 0.818 0.348 0.885 0.442 0.35  0.993
 0.864 0.217 0.332 0.191 0.503 0.349 0.954 0.959 0.922 0.938 0.943 0.816
 0.86  0.871 0.826]
Testing against dHolbach


03/19/2025 02:21:40 [ruzicka:INFO] Predicting on 27 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.998 1.    1.    1.    0.917 0.944 0.992 1.    0.964 1.    1.    0.786
 0.95  1.    1.    1.    1.    1.    0.935 0.897 0.983 0.896 0.944 0.983
 0.953 0.976 0.989]
Testing against Diderot


03/19/2025 02:21:44 [ruzicka:INFO] Predicting on 27 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.546 0.481 0.467 0.62  0.493 0.346 0.631 0.47  0.316 0.486 0.475 0.188
 0.575 0.504 0.495 0.424 0.63  0.676 0.249 0.297 0.317 0.525 0.193 0.173
 0.228 0.248 0.15 ]
Testing against Raynal


03/19/2025 02:21:48 [ruzicka:INFO] Predicting on 27 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.241 0.124 0.208 0.157 0.061 0.206 0.148 0.105 0.282 0.135 0.22  0.179
 0.218 0.089 0.076 0.054 0.04  0.226 0.182 0.186 0.232 0.2   0.296 0.251
 0.085 0.119 0.167]
Baudeau


03/19/2025 02:21:52 [ruzicka:INFO] Predicting on 27 documents


Condorcet


03/19/2025 02:21:57 [ruzicka:INFO] Predicting on 27 documents


dHolbach


03/19/2025 02:22:01 [ruzicka:INFO] Predicting on 27 documents


Diderot


03/19/2025 02:22:04 [ruzicka:INFO] Predicting on 27 documents


Raynal


03/19/2025 02:22:09 [ruzicka:INFO] Fitting on 502 documents...
03/19/2025 02:22:09 [ruzicka:INFO] Predicting on 34 documents




########################## Working with: 1 ---- by Condorcet
Testing against Baudeau


03/19/2025 02:22:13 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.629 0.52  0.239 0.019 0.102 0.167 0.096 0.066 0.201 0.12  0.179 0.19
 0.017 0.189 0.091 0.115 0.253 0.131 0.222 0.056 0.097 0.116 0.128 0.442
 0.444 0.063 0.054 0.219 0.299 0.049 0.404 0.671 0.053 0.416]
Testing against Condorcet


03/19/2025 02:22:19 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.99  0.972 0.998 1.    0.995 1.    1.    0.997 0.997 1.    1.    0.996
 0.996 0.998 0.998 1.    0.995 1.    1.    0.996 0.996 1.    1.    0.992
 0.998 1.    1.    0.999 1.    1.    0.999 0.991 0.999 1.   ]
Testing against dHolbach


03/19/2025 02:22:25 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.568 0.932 0.73  0.303 0.769 0.286 0.712 0.576 0.56  0.568 0.592 0.855
 0.736 0.797 0.596 0.425 0.833 0.265 0.666 0.703 0.584 0.336 0.39  0.63
 0.731 0.463 0.301 0.595 0.279 0.428 0.486 0.627 0.335 0.695]
Testing against Diderot


03/19/2025 02:22:29 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.692 0.418 0.226 0.107 0.019 0.072 0.156 0.135 0.047 0.031 0.037 0.175
 0.064 0.146 0.109 0.04  0.076 0.458 0.105 0.021 0.034 0.396 0.038 0.256
 0.125 0.029 0.073 0.117 0.618 0.053 0.698 0.336 0.25  0.165]
Testing against Raynal


03/19/2025 02:22:34 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.392 0.114 0.179 0.446 0.537 0.151 0.235 0.283 0.375 0.299 0.435 0.355
 0.417 0.128 0.374 0.17  0.363 0.515 0.367 0.319 0.44  0.485 0.165 0.147
 0.139 0.293 0.152 0.357 0.412 0.17  0.235 0.156 0.604 0.169]
Baudeau


03/19/2025 02:22:38 [ruzicka:INFO] Predicting on 34 documents


Condorcet


03/19/2025 02:22:44 [ruzicka:INFO] Predicting on 34 documents


dHolbach


03/19/2025 02:22:50 [ruzicka:INFO] Predicting on 34 documents


Diderot


03/19/2025 02:22:54 [ruzicka:INFO] Predicting on 34 documents


Raynal


03/19/2025 02:23:00 [ruzicka:INFO] Fitting on 506 documents...
03/19/2025 02:23:00 [ruzicka:INFO] Predicting on 30 documents




########################## Working with: 20 ---- by Condorcet
Testing against Baudeau


03/19/2025 02:23:04 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.631 0.678 0.741 0.748 0.725 0.593 0.676 0.752 0.722 0.794 0.66  0.632
 0.694 0.606 0.67  0.568 0.65  0.522 0.499 0.666 0.505 0.505 0.577 0.339
 0.784 0.526 0.68  0.616 0.511 0.937]
Testing against Condorcet


03/19/2025 02:23:09 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.957 0.956 0.992 0.978 0.986 1.    0.99  0.98  0.998 0.968 0.971 0.991
 0.994 0.997 0.904 0.899 0.998 0.953 0.948 0.839 0.995 0.984 0.75  0.998
 0.975 0.932 0.997 1.    0.997 0.724]
Testing against dHolbach


03/19/2025 02:23:14 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.918 0.683 0.628 0.787 0.604 0.52  0.518 0.625 0.673 0.91  0.799 0.773
 0.746 0.678 0.464 0.258 0.617 0.801 0.856 0.84  0.578 0.525 0.544 0.596
 0.743 0.793 0.722 0.576 0.658 0.707]
Testing against Diderot


03/19/2025 02:23:18 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.42  0.854 0.476 0.62  0.428 0.435 0.574 0.3   0.199 0.398 0.698 0.263
 0.445 0.254 0.959 0.973 0.246 0.904 0.892 0.899 0.542 0.611 0.993 0.441
 0.553 0.923 0.287 0.674 0.393 0.868]
Testing against Raynal


03/19/2025 02:23:23 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.145 0.33  0.107 0.19  0.062 0.085 0.311 0.421 0.11  0.173 0.128 0.254
 0.184 0.242 0.203 0.372 0.312 0.125 0.194 0.336 0.057 0.458 0.248 0.378
 0.188 0.218 0.175 0.134 0.137 0.277]
Baudeau


03/19/2025 02:23:27 [ruzicka:INFO] Predicting on 30 documents


Condorcet


03/19/2025 02:23:32 [ruzicka:INFO] Predicting on 30 documents


dHolbach


03/19/2025 02:23:37 [ruzicka:INFO] Predicting on 30 documents


Diderot


03/19/2025 02:23:42 [ruzicka:INFO] Predicting on 30 documents


Raynal


03/19/2025 02:23:47 [ruzicka:INFO] Fitting on 521 documents...
03/19/2025 02:23:47 [ruzicka:INFO] Predicting on 15 documents




########################## Working with: Lettre sur les sourds et muets ---- by Diderot
Testing against Baudeau


03/19/2025 02:23:49 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.586 0.537 0.572 0.718 0.56  0.548 0.336 0.399 0.44  0.618 0.712 0.527
 0.292 0.431 0.362]
Testing against Condorcet


03/19/2025 02:23:52 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.692 0.736 0.659 0.619 0.647 0.347 0.593 0.558 0.591 0.745 0.808 0.692
 0.537 0.65  0.684]
Testing against dHolbach


03/19/2025 02:23:55 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.136 0.557 0.291 0.272 0.291 0.576 0.438 0.411 0.553 0.265 0.292 0.153
 0.252 0.267 0.382]
Testing against Diderot


03/19/2025 02:23:57 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.999 0.989 0.999 0.998 1.    0.999 0.997 1.    0.994 0.995 0.995 0.996
 0.998 0.999 0.999]
Testing against Raynal


03/19/2025 02:23:59 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.336 0.304 0.335 0.142 0.099 0.467 0.66  0.547 0.381 0.303 0.196 0.46
 0.68  0.37  0.314]
Baudeau


03/19/2025 02:24:01 [ruzicka:INFO] Predicting on 15 documents


Condorcet


03/19/2025 02:24:04 [ruzicka:INFO] Predicting on 15 documents


dHolbach


03/19/2025 02:24:06 [ruzicka:INFO] Predicting on 15 documents


Diderot


03/19/2025 02:24:08 [ruzicka:INFO] Predicting on 15 documents


Raynal


03/19/2025 02:24:11 [ruzicka:INFO] Fitting on 511 documents...
03/19/2025 02:24:11 [ruzicka:INFO] Predicting on 25 documents




########################## Working with: ecole 2 ---- by Raynal
Testing against Baudeau


03/19/2025 02:24:14 [ruzicka:INFO] Predicting on 25 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.083 0.233 0.228 0.264 0.08  0.285 0.125 0.178 0.367 0.418 0.062 0.191
 0.281 0.028 0.615 0.098 0.079 0.282 0.167 0.06  0.061 0.226 0.164 0.145
 0.335]
Testing against Condorcet


03/19/2025 02:24:19 [ruzicka:INFO] Predicting on 25 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.188 0.289 0.881 0.488 0.412 0.912 0.393 0.225 0.511 0.941 0.258 0.965
 0.824 0.211 0.823 0.132 0.382 0.54  0.97  0.899 0.314 0.298 0.942 0.178
 0.984]
Testing against dHolbach


03/19/2025 02:24:24 [ruzicka:INFO] Predicting on 25 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.076 0.12  0.698 0.287 0.237 0.839 0.11  0.242 0.095 0.878 0.05  0.695
 0.77  0.043 0.846 0.085 0.135 0.234 0.695 0.647 0.234 0.269 0.605 0.133
 0.745]
Testing against Diderot


03/19/2025 02:24:27 [ruzicka:INFO] Predicting on 25 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.416 0.707 0.188 0.858 0.517 0.142 0.634 0.798 0.462 0.267 0.414 0.177
 0.32  0.413 0.344 0.423 0.468 0.75  0.214 0.133 0.645 0.538 0.218 0.449
 0.122]
Testing against Raynal


03/19/2025 02:24:30 [ruzicka:INFO] Predicting on 25 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.999 0.992 0.987 0.979 0.999 0.937 0.996 0.999 0.996 0.818 1.    0.914
 0.969 1.    0.943 0.999 1.    0.996 0.837 0.98  0.999 1.    0.959 1.
 0.804]
Baudeau


03/19/2025 02:24:33 [ruzicka:INFO] Predicting on 25 documents


Condorcet


03/19/2025 02:24:38 [ruzicka:INFO] Predicting on 25 documents


dHolbach


03/19/2025 02:24:44 [ruzicka:INFO] Predicting on 25 documents


Diderot


03/19/2025 02:24:47 [ruzicka:INFO] Predicting on 25 documents


Raynal


03/19/2025 02:24:50 [ruzicka:INFO] Fitting on 514 documents...
03/19/2025 02:24:50 [ruzicka:INFO] Predicting on 22 documents




########################## Working with: Anecdotes litteraires1 1756 ---- by Raynal
Testing against Baudeau


03/19/2025 02:24:53 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.066 0.04  0.005 0.157 0.023 0.033 0.111 0.016 0.023 0.071 0.032 0.365
 0.029 0.095 0.098 0.024 0.033 0.043 0.034 0.019 0.392 0.127]
Testing against Condorcet


03/19/2025 02:24:58 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.021 0.151 0.056 0.062 0.077 0.098 0.012 0.066 0.317 0.466 0.048 0.694
 0.118 0.064 0.023 0.039 0.032 0.041 0.024 0.349 0.194 0.022]
Testing against dHolbach


03/19/2025 02:25:01 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.053 0.175 0.153 0.123 0.123 0.292 0.194 0.192 0.355 0.482 0.123 0.794
 0.199 0.071 0.263 0.183 0.093 0.238 0.136 0.472 0.469 0.114]
Testing against Diderot


03/19/2025 02:25:04 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.276 0.133 0.167 0.446 0.406 0.47  0.394 0.267 0.04  0.229 0.15  0.31
 0.135 0.596 0.292 0.308 0.324 0.213 0.286 0.344 0.745 0.428]
Testing against Raynal


03/19/2025 02:25:07 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1.    1.    1.    1.    1.    1.    1.    1.    1.    0.999 1.    0.984
 1.    1.    1.    1.    1.    1.    1.    1.    0.992 1.   ]
Baudeau


03/19/2025 02:25:10 [ruzicka:INFO] Predicting on 22 documents


Condorcet


03/19/2025 02:25:14 [ruzicka:INFO] Predicting on 22 documents


dHolbach


03/19/2025 02:25:18 [ruzicka:INFO] Predicting on 22 documents


Diderot


03/19/2025 02:25:21 [ruzicka:INFO] Predicting on 22 documents


Raynal


03/19/2025 02:25:24 [ruzicka:INFO] Fitting on 514 documents...
03/19/2025 02:25:24 [ruzicka:INFO] Predicting on 22 documents




########################## Working with: Anecdotes litteraires2 1756 ---- by Raynal
Testing against Baudeau


03/19/2025 02:25:27 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.036 0.024 0.01  0.019 0.008 0.031 0.041 0.052 0.126 0.058 0.063 0.121
 0.109 0.118 0.072 0.017 0.099 0.019 0.011 0.27  0.064 0.06 ]
Testing against Condorcet


03/19/2025 02:25:31 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.069 0.125 0.069 0.051 0.095 0.12  0.049 0.06  0.24  0.062 0.795 0.056
 0.033 0.012 0.089 0.064 0.785 0.796 0.058 0.56  0.033 0.021]
Testing against dHolbach


03/19/2025 02:25:35 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.085 0.19  0.187 0.069 0.268 0.271 0.133 0.435 0.24  0.186 0.623 0.218
 0.214 0.048 0.176 0.205 0.633 0.75  0.183 0.531 0.187 0.228]
Testing against Diderot


03/19/2025 02:25:37 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.262 0.149 0.263 0.315 0.211 0.358 0.405 0.264 0.227 0.457 0.205 0.312
 0.3   0.373 0.276 0.257 0.267 0.045 0.219 0.215 0.311 0.447]
Testing against Raynal


03/19/2025 02:25:40 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1.    1.    1.    1.    1.    1.    1.    1.    1.    1.    0.983 1.
 1.    1.    1.    1.    0.99  0.985 1.    0.998 1.    1.   ]
Baudeau


03/19/2025 02:25:43 [ruzicka:INFO] Predicting on 22 documents


Condorcet


03/19/2025 02:25:48 [ruzicka:INFO] Predicting on 22 documents


dHolbach


03/19/2025 02:25:52 [ruzicka:INFO] Predicting on 22 documents


Diderot


03/19/2025 02:25:54 [ruzicka:INFO] Predicting on 22 documents


Raynal


03/19/2025 02:26:01 [ruzicka:INFO] Fitting on 516 documents...
03/19/2025 02:26:01 [ruzicka:INFO] Predicting on 20 documents




########################## Working with: Histoire du Parlement Band 1 ---- by Raynal
Testing against Baudeau


03/19/2025 02:26:04 [ruzicka:INFO] Predicting on 20 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.72  0.774 0.189 0.224 0.02  0.202 0.252 0.16  0.169 0.039 0.424 0.217
 0.187 0.447 0.229 0.597 0.956 0.433 0.143 0.411]
Testing against Condorcet


03/19/2025 02:26:08 [ruzicka:INFO] Predicting on 20 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.859 0.724 0.358 0.816 0.545 0.598 0.602 0.636 0.794 0.451 0.618 0.391
 0.595 0.612 0.428 0.338 0.585 0.699 0.415 0.742]
Testing against dHolbach


03/19/2025 02:26:11 [ruzicka:INFO] Predicting on 20 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.601 0.717 0.156 0.449 0.474 0.228 0.61  0.235 0.617 0.216 0.477 0.521
 0.595 0.594 0.518 0.502 0.709 0.449 0.339 0.618]
Testing against Diderot


03/19/2025 02:26:14 [ruzicka:INFO] Predicting on 20 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.844 0.769 0.277 0.593 0.179 0.249 0.254 0.233 0.408 0.164 0.405 0.301
 0.232 0.244 0.223 0.63  0.923 0.892 0.175 0.341]
Testing against Raynal


03/19/2025 02:26:17 [ruzicka:INFO] Predicting on 20 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.868 0.923 1.    0.981 0.998 0.999 0.997 0.999 0.995 1.    0.994 1.
 0.999 0.995 0.999 0.996 0.4   0.933 0.999 0.999]
Baudeau


03/19/2025 02:26:19 [ruzicka:INFO] Predicting on 20 documents


Condorcet


03/19/2025 02:26:23 [ruzicka:INFO] Predicting on 20 documents


dHolbach


03/19/2025 02:26:27 [ruzicka:INFO] Predicting on 20 documents


Diderot


03/19/2025 02:26:29 [ruzicka:INFO] Predicting on 20 documents


Raynal


03/19/2025 02:26:32 [ruzicka:INFO] Fitting on 526 documents...
03/19/2025 02:26:32 [ruzicka:INFO] Predicting on 10 documents




########################## Working with: elements de la morale universelle ---- by dHolbach
Testing against Baudeau


03/19/2025 02:26:33 [ruzicka:INFO] Predicting on 10 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.351 0.299 0.283 0.372 0.249 0.349 0.305 0.264 0.354 0.344]
Testing against Condorcet


03/19/2025 02:26:35 [ruzicka:INFO] Predicting on 10 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.662 0.515 0.526 0.782 0.401 0.235 0.278 0.274 0.335 0.479]
Testing against dHolbach


03/19/2025 02:26:37 [ruzicka:INFO] Predicting on 10 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.828 0.987 0.998 0.978 1.    1.    0.996 1.    0.999 0.999]
Testing against Diderot


03/19/2025 02:26:38 [ruzicka:INFO] Predicting on 10 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.976 0.86  0.643 0.842 0.419 0.378 0.767 0.561 0.674 0.488]
Testing against Raynal


03/19/2025 02:26:40 [ruzicka:INFO] Predicting on 10 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.165 0.145 0.176 0.279 0.077 0.166 0.082 0.114 0.1   0.09 ]
Baudeau


03/19/2025 02:26:41 [ruzicka:INFO] Predicting on 10 documents


Condorcet


03/19/2025 02:26:43 [ruzicka:INFO] Predicting on 10 documents


dHolbach


03/19/2025 02:26:45 [ruzicka:INFO] Predicting on 10 documents


Diderot


03/19/2025 02:26:46 [ruzicka:INFO] Predicting on 10 documents


Raynal


03/19/2025 02:26:48 [ruzicka:INFO] Fitting on 502 documents...
03/19/2025 02:26:48 [ruzicka:INFO] Predicting on 34 documents




########################## Working with: Tableau 2 ---- by dHolbach
Testing against Baudeau


03/19/2025 02:26:52 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.543 0.731 0.597 0.085 0.409 0.509 0.733 0.6   0.914 0.721 0.748 0.708
 0.74  0.607 0.794 0.922 0.754 0.561 0.681 0.513 0.615 0.76  0.778 0.635
 0.711 0.54  0.745 0.928 0.804 0.863 0.907 0.763 0.777 0.751]
Testing against Condorcet


03/19/2025 02:26:59 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.824 0.953 0.952 0.755 0.966 0.86  0.934 0.933 0.915 0.945 0.684 0.898
 0.782 0.975 0.898 0.812 0.763 0.967 0.936 0.457 0.964 0.846 0.789 0.652
 0.75  0.935 0.682 0.784 0.946 0.934 0.892 0.927 0.861 0.876]
Testing against dHolbach


03/19/2025 02:27:04 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.985 0.905 0.937 0.89  0.88  0.94  0.963 0.952 0.891 0.932 0.907 0.976
 0.793 0.888 0.942 0.972 0.986 0.802 0.821 0.999 0.881 0.98  0.98  0.986
 0.984 0.963 0.996 0.932 0.943 0.924 0.95  0.951 0.982 0.984]
Testing against Diderot


03/19/2025 02:27:08 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.136 0.452 0.148 0.553 0.011 0.269 0.267 0.095 0.261 0.114 0.609 0.067
 0.282 0.028 0.112 0.038 0.188 0.046 0.064 0.657 0.095 0.1   0.463 0.57
 0.203 0.129 0.181 0.343 0.084 0.152 0.2   0.132 0.13  0.086]
Testing against Raynal


03/19/2025 02:27:13 [ruzicka:INFO] Predicting on 34 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.551 0.363 0.559 0.942 0.868 0.839 0.128 0.492 0.265 0.364 0.798 0.427
 0.919 0.53  0.695 0.564 0.365 0.72  0.853 0.419 0.603 0.405 0.309 0.485
 0.225 0.421 0.174 0.114 0.239 0.171 0.125 0.421 0.279 0.236]
Baudeau


03/19/2025 02:27:18 [ruzicka:INFO] Predicting on 34 documents


Condorcet


03/19/2025 02:27:25 [ruzicka:INFO] Predicting on 34 documents


dHolbach


03/19/2025 02:27:30 [ruzicka:INFO] Predicting on 34 documents


Diderot


03/19/2025 02:27:34 [ruzicka:INFO] Predicting on 34 documents


Raynal


03/19/2025 02:27:39 [ruzicka:INFO] Fitting on 506 documents...
03/19/2025 02:27:39 [ruzicka:INFO] Predicting on 30 documents




########################## Working with: Histoire ---- by dHolbach
Testing against Baudeau


03/19/2025 02:27:43 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.586 0.058 0.24  0.167 0.305 0.164 0.2   0.751 0.266 0.382 0.38  0.304
 0.244 0.032 0.046 0.158 0.308 0.473 0.344 0.26  0.419 0.106 0.126 0.247
 0.259 0.191 0.23  0.243 0.163 0.179]
Testing against Condorcet


03/19/2025 02:27:49 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.936 0.934 0.897 0.939 0.957 0.932 0.953 0.925 0.927 0.737 0.907 0.779
 0.875 0.741 0.852 0.881 0.938 0.849 0.741 0.865 0.704 0.927 0.943 0.861
 0.927 0.983 0.962 0.937 0.996 0.98 ]
Testing against dHolbach


03/19/2025 02:27:54 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.961 0.657 0.857 0.744 0.774 0.851 0.808 0.85  0.895 0.392 0.916 0.524
 0.753 0.296 0.602 0.937 0.906 0.974 0.893 0.99  1.    0.923 0.885 0.601
 0.705 0.874 0.827 0.952 0.56  0.729]
Testing against Diderot


03/19/2025 02:27:57 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.568 0.772 0.682 0.469 0.469 0.312 0.819 0.513 0.368 0.99  0.535 0.91
 0.826 0.97  0.644 0.26  0.416 0.502 0.944 0.149 0.025 0.247 0.207 0.945
 0.944 0.127 0.263 0.345 0.198 0.299]
Testing against Raynal


03/19/2025 02:28:02 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.149 0.798 0.535 0.487 0.647 0.477 0.503 0.334 0.632 0.606 0.38  0.861
 0.819 0.825 0.925 0.452 0.552 0.387 0.431 0.142 0.213 0.393 0.061 0.703
 0.534 0.226 0.481 0.307 0.48  0.232]
Baudeau


03/19/2025 02:28:06 [ruzicka:INFO] Predicting on 30 documents


Condorcet


03/19/2025 02:28:12 [ruzicka:INFO] Predicting on 30 documents


dHolbach


03/19/2025 02:28:16 [ruzicka:INFO] Predicting on 30 documents


Diderot


03/19/2025 02:28:20 [ruzicka:INFO] Predicting on 30 documents


Raynal


03/19/2025 02:28:25 [ruzicka:INFO] Fitting on 506 documents...
03/19/2025 02:28:25 [ruzicka:INFO] Predicting on 30 documents




########################## Working with: 6 ---- by Condorcet
Testing against Baudeau


03/19/2025 02:28:28 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.019 0.053 0.189 0.201 0.117 0.047 0.168 0.176 0.168 0.21  0.274 0.087
 0.091 0.138 0.374 0.126 0.524 0.16  0.23  0.325 0.468 0.324 0.223 0.511
 0.68  0.176 0.612 0.047 0.061 0.011]
Testing against Condorcet


03/19/2025 02:28:34 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.993 1.    1.    1.    1.    0.997 0.978 0.996 0.999 0.997 0.996 0.996
 0.994 0.971 0.921 0.991 0.936 0.972 0.982 0.998 0.999 0.986 1.    0.997
 0.991 1.    0.997 1.    1.    1.   ]
Testing against dHolbach


03/19/2025 02:28:39 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.657 0.554 0.386 0.556 0.699 0.66  0.566 0.544 0.597 0.663 0.826 0.614
 0.629 0.861 0.345 0.511 0.862 0.49  0.904 0.639 0.806 0.845 0.767 0.3
 0.604 0.618 0.739 0.375 0.381 0.445]
Testing against Diderot


03/19/2025 02:28:42 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.323 0.014 0.726 0.184 0.008 0.005 0.511 0.557 0.088 0.184 0.031 0.039
 0.616 0.193 0.974 0.434 0.508 0.861 0.424 0.303 0.081 0.879 0.283 0.914
 0.328 0.088 0.152 0.399 0.138 0.004]
Testing against Raynal


03/19/2025 02:28:47 [ruzicka:INFO] Predicting on 30 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.814 0.37  0.198 0.374 0.373 0.464 0.909 0.757 0.115 0.213 0.306 0.287
 0.677 0.732 0.594 0.672 0.143 0.623 0.338 0.155 0.114 0.267 0.143 0.153
 0.058 0.151 0.239 0.624 0.261 0.446]
Baudeau


03/19/2025 02:28:51 [ruzicka:INFO] Predicting on 30 documents


Condorcet


03/19/2025 02:28:56 [ruzicka:INFO] Predicting on 30 documents


dHolbach


03/19/2025 02:29:01 [ruzicka:INFO] Predicting on 30 documents


Diderot


03/19/2025 02:29:05 [ruzicka:INFO] Predicting on 30 documents


Raynal


03/19/2025 02:29:10 [ruzicka:INFO] Fitting on 505 documents...
03/19/2025 02:29:10 [ruzicka:INFO] Predicting on 31 documents




########################## Working with: Idees sur les besoins 2 ---- by Baudeau
Testing against Baudeau


03/19/2025 02:29:13 [ruzicka:INFO] Predicting on 31 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.033 0.306 0.02  0.891 0.15  0.11  0.817 0.698 0.183 0.224 0.348 0.347
 0.098 0.744 0.093 0.257 0.068 0.184 0.088 0.945 0.276 0.932 0.188 0.237
 0.594 0.303 0.464 0.207 0.754 0.04  0.928]
Testing against Condorcet


03/19/2025 02:29:19 [ruzicka:INFO] Predicting on 31 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.947 0.957 0.988 0.874 0.971 0.946 0.77  0.854 0.978 0.957 0.908 0.928
 0.977 0.956 0.949 0.982 0.92  0.892 0.826 0.632 0.969 0.505 0.827 0.88
 0.604 0.95  0.917 0.987 0.756 0.931 0.841]
Testing against dHolbach


03/19/2025 02:29:24 [ruzicka:INFO] Predicting on 31 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.953 0.847 0.891 0.732 0.902 0.94  0.756 0.813 0.912 0.952 0.958 0.827
 0.965 0.796 0.912 0.76  0.989 0.954 0.996 0.369 0.857 0.72  0.99  0.98
 0.867 0.961 0.928 0.823 0.903 0.974 0.572]
Testing against Diderot


03/19/2025 02:29:28 [ruzicka:INFO] Predicting on 31 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.49  0.724 0.118 0.794 0.32  0.259 0.764 0.752 0.314 0.515 0.286 0.486
 0.18  0.82  0.461 0.282 0.261 0.394 0.255 0.927 0.48  0.705 0.251 0.373
 0.927 0.392 0.457 0.493 0.89  0.239 0.911]
Testing against Raynal


03/19/2025 02:29:33 [ruzicka:INFO] Predicting on 31 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.41  0.352 0.378 0.525 0.497 0.529 0.847 0.832 0.191 0.243 0.461 0.701
 0.107 0.32  0.664 0.324 0.226 0.513 0.173 0.423 0.297 0.821 0.355 0.32
 0.439 0.156 0.29  0.212 0.348 0.346 0.571]
Baudeau


03/19/2025 02:29:36 [ruzicka:INFO] Predicting on 31 documents


Condorcet


03/19/2025 02:29:43 [ruzicka:INFO] Predicting on 31 documents


dHolbach


03/19/2025 02:29:48 [ruzicka:INFO] Predicting on 31 documents


Diderot


03/19/2025 02:29:52 [ruzicka:INFO] Predicting on 31 documents


Raynal


03/19/2025 02:29:57 [ruzicka:INFO] Fitting on 522 documents...
03/19/2025 02:29:57 [ruzicka:INFO] Predicting on 14 documents




########################## Working with: Lettres  a un magistraTtxt ---- by Baudeau
Testing against Baudeau


03/19/2025 02:29:58 [ruzicka:INFO] Predicting on 14 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.995 1.    0.999 0.999 1.    0.993 1.    0.992 1.    1.    0.994 1.
 0.999 0.998]
Testing against Condorcet


03/19/2025 02:30:02 [ruzicka:INFO] Predicting on 14 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.299 0.168 0.303 0.211 0.344 0.272 0.239 0.398 0.525 0.378 0.619 0.302
 0.315 0.487]
Testing against dHolbach


03/19/2025 02:30:04 [ruzicka:INFO] Predicting on 14 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.148 0.128 0.14  0.102 0.068 0.288 0.155 0.603 0.346 0.294 0.365 0.199
 0.151 0.265]
Testing against Diderot


03/19/2025 02:30:06 [ruzicka:INFO] Predicting on 14 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.907 0.536 0.668 0.659 0.55  0.808 0.622 0.822 0.49  0.479 0.639 0.447
 0.638 0.408]
Testing against Raynal


03/19/2025 02:30:08 [ruzicka:INFO] Predicting on 14 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.228 0.239 0.215 0.233 0.101 0.474 0.173 0.268 0.146 0.077 0.317 0.262
 0.11  0.267]
Baudeau


03/19/2025 02:30:09 [ruzicka:INFO] Predicting on 14 documents


Condorcet


03/19/2025 02:30:12 [ruzicka:INFO] Predicting on 14 documents


dHolbach


03/19/2025 02:30:15 [ruzicka:INFO] Predicting on 14 documents


Diderot


03/19/2025 02:30:16 [ruzicka:INFO] Predicting on 14 documents


Raynal


03/19/2025 02:30:19 [ruzicka:INFO] Fitting on 520 documents...
03/19/2025 02:30:19 [ruzicka:INFO] Predicting on 16 documents




########################## Working with: Miscellanea Philosophiques ---- by Diderot
Testing against Baudeau


03/19/2025 02:30:21 [ruzicka:INFO] Predicting on 16 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.121 0.38  0.637 0.334 0.1   0.419 0.349 0.406 0.362 0.653 0.735 0.369
 0.573 0.365 0.361 0.656]
Testing against Condorcet


03/19/2025 02:30:24 [ruzicka:INFO] Predicting on 16 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.72  0.512 0.745 0.529 0.624 0.959 0.601 0.661 0.156 0.336 0.627 0.852
 0.693 0.606 0.56  0.504]
Testing against dHolbach


03/19/2025 02:30:27 [ruzicka:INFO] Predicting on 16 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.374 0.549 0.639 0.628 0.574 0.735 1.    0.995 0.26  0.644 0.418 0.32
 0.795 0.379 0.212 0.31 ]
Testing against Diderot


03/19/2025 02:30:28 [ruzicka:INFO] Predicting on 16 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.963 0.996 0.971 1.    0.997 0.912 0.641 0.675 1.    0.986 0.985 0.996
 0.983 1.    0.996 1.   ]
Testing against Raynal


03/19/2025 02:30:31 [ruzicka:INFO] Predicting on 16 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.776 0.368 0.666 0.201 0.475 0.253 0.185 0.092 0.542 0.712 0.728 0.297
 0.243 0.155 0.729 0.364]
Baudeau


03/19/2025 02:30:33 [ruzicka:INFO] Predicting on 16 documents


Condorcet


03/19/2025 02:30:36 [ruzicka:INFO] Predicting on 16 documents


dHolbach


03/19/2025 02:30:39 [ruzicka:INFO] Predicting on 16 documents


Diderot


03/19/2025 02:30:40 [ruzicka:INFO] Predicting on 16 documents


Raynal


03/19/2025 02:30:43 [ruzicka:INFO] Fitting on 514 documents...
03/19/2025 02:30:43 [ruzicka:INFO] Predicting on 22 documents




########################## Working with: Plan d-une universite ---- by Diderot
Testing against Baudeau


03/19/2025 02:30:46 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.667 0.776 0.587 0.488 0.652 0.629 0.766 0.397 0.647 0.802 0.829 0.552
 0.604 0.305 0.144 0.795 0.875 0.761 0.648 0.717 0.812 0.68 ]
Testing against Condorcet


03/19/2025 02:30:50 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.923 0.985 0.985 0.79  0.545 0.768 0.909 0.825 0.91  0.831 0.723 0.621
 0.881 0.254 0.477 0.735 0.882 0.981 0.987 0.816 0.915 0.467]
Testing against dHolbach


03/19/2025 02:30:54 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.609 0.654 0.46  0.319 0.533 0.434 0.594 0.498 0.333 0.585 0.503 0.362
 0.779 0.367 0.384 0.398 0.253 0.561 0.832 0.567 0.91  0.361]
Testing against Diderot


03/19/2025 02:30:56 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.916 0.737 0.233 0.995 0.995 0.994 0.893 0.986 0.982 0.971 0.956 0.996
 0.961 1.    1.    0.99  0.964 0.499 0.28  0.973 0.451 1.   ]
Testing against Raynal


03/19/2025 02:31:00 [ruzicka:INFO] Predicting on 22 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.57  0.494 0.793 0.421 0.159 0.255 0.488 0.564 0.167 0.365 0.548 0.515
 0.22  0.434 0.429 0.375 0.166 0.207 0.318 0.399 0.27  0.151]
Baudeau


03/19/2025 02:31:03 [ruzicka:INFO] Predicting on 22 documents


Condorcet


03/19/2025 02:31:07 [ruzicka:INFO] Predicting on 22 documents


dHolbach


03/19/2025 02:31:11 [ruzicka:INFO] Predicting on 22 documents


Diderot


03/19/2025 02:31:13 [ruzicka:INFO] Predicting on 22 documents


Raynal


03/19/2025 02:31:17 [ruzicka:INFO] Fitting on 517 documents...
03/19/2025 02:31:17 [ruzicka:INFO] Predicting on 19 documents




########################## Working with: Eclaircissemens ---- by Baudeau
Testing against Baudeau


03/19/2025 02:31:19 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.857 0.905 0.969 0.99  0.972 0.993 0.954 0.962 0.545 0.875 0.931 0.989
 0.954 0.881 0.984 0.902 0.987 0.985 0.979]
Testing against Condorcet


03/19/2025 02:31:23 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.704 0.566 0.531 0.412 0.298 0.365 0.521 0.372 0.674 0.438 0.354 0.534
 0.461 0.373 0.577 0.616 0.211 0.685 0.305]
Testing against dHolbach


03/19/2025 02:31:26 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.399 0.845 0.473 0.427 0.6   0.534 0.369 0.314 0.528 0.294 0.375 0.215
 0.421 0.355 0.553 0.609 0.25  0.41  0.424]
Testing against Diderot


03/19/2025 02:31:28 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.945 0.874 0.933 0.923 0.95  0.816 0.953 0.938 0.841 0.965 0.947 0.904
 0.967 0.988 0.929 0.981 0.861 0.852 0.926]
Testing against Raynal


03/19/2025 02:31:31 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.513 0.361 0.49  0.536 0.221 0.472 0.285 0.428 0.927 0.806 0.674 0.34
 0.368 0.606 0.353 0.291 0.477 0.316 0.451]
Baudeau


03/19/2025 02:31:33 [ruzicka:INFO] Predicting on 19 documents


Condorcet


03/19/2025 02:31:37 [ruzicka:INFO] Predicting on 19 documents


dHolbach


03/19/2025 02:31:40 [ruzicka:INFO] Predicting on 19 documents


Diderot


03/19/2025 02:31:43 [ruzicka:INFO] Predicting on 19 documents


Raynal


03/19/2025 02:31:46 [ruzicka:INFO] Fitting on 500 documents...
03/19/2025 02:31:46 [ruzicka:INFO] Predicting on 36 documents




########################## Working with: 3 ---- by Condorcet
Testing against Baudeau


03/19/2025 02:31:51 [ruzicka:INFO] Predicting on 36 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.29  0.318 0.017 0.36  0.183 0.324 0.131 0.3   0.222 0.376 0.445 0.23
 0.086 0.17  0.324 0.145 0.116 0.124 0.153 0.063 0.299 0.154 0.373 0.338
 0.132 0.251 0.328 0.151 0.047 0.048 0.152 0.168 0.166 0.046 0.297 0.303]
Testing against Condorcet


03/19/2025 02:31:57 [ruzicka:INFO] Predicting on 36 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.999 1.    0.998 0.999 0.995 0.998 1.    1.    1.    1.    0.998 1.
 1.    1.    1.    0.997 1.    0.999 0.999 0.999 1.    1.    1.    0.999
 1.    1.    1.    0.999 0.999 1.    1.    1.    0.998 0.999 1.    1.   ]
Testing against dHolbach


03/19/2025 02:32:03 [ruzicka:INFO] Predicting on 36 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.596 0.58  0.449 0.676 0.804 0.609 0.603 0.689 0.482 0.63  0.701 0.606
 0.697 0.443 0.318 0.644 0.391 0.344 0.465 0.567 0.593 0.325 0.315 0.526
 0.479 0.635 0.557 0.67  0.599 0.351 0.35  0.468 0.737 0.599 0.351 0.526]
Testing against Diderot


03/19/2025 02:32:07 [ruzicka:INFO] Predicting on 36 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.1   0.249 0.019 0.219 0.052 0.058 0.111 0.065 0.204 0.063 0.034 0.397
 0.03  0.027 0.135 0.059 0.038 0.116 0.01  0.51  0.105 0.429 0.204 0.228
 0.008 0.105 0.52  0.659 0.044 0.572 0.043 0.04  0.049 0.051 0.147 0.112]
Testing against Raynal


03/19/2025 02:32:13 [ruzicka:INFO] Predicting on 36 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.412 0.153 0.637 0.16  0.352 0.539 0.237 0.221 0.166 0.219 0.298 0.2
 0.187 0.297 0.425 0.192 0.311 0.291 0.326 0.382 0.111 0.285 0.086 0.241
 0.305 0.298 0.148 0.226 0.396 0.256 0.223 0.596 0.267 0.359 0.194 0.171]
Baudeau


03/19/2025 02:32:18 [ruzicka:INFO] Predicting on 36 documents


Condorcet


03/19/2025 02:32:24 [ruzicka:INFO] Predicting on 36 documents


dHolbach


03/19/2025 02:32:30 [ruzicka:INFO] Predicting on 36 documents


Diderot


03/19/2025 02:32:34 [ruzicka:INFO] Predicting on 36 documents


Raynal


03/19/2025 02:32:40 [ruzicka:INFO] Fitting on 517 documents...
03/19/2025 02:32:40 [ruzicka:INFO] Predicting on 19 documents




########################## Working with: La Morale 3 ---- by dHolbach
Testing against Baudeau


03/19/2025 02:32:43 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.467 0.389 0.404 0.271 0.386 0.462 0.427 0.355 0.43  0.313 0.361 0.429
 0.503 0.267 0.338 0.628 0.52  0.46  0.518]
Testing against Condorcet


03/19/2025 02:32:47 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.577 0.603 0.441 0.648 0.841 0.718 0.496 0.654 0.371 0.493 0.492 0.47
 0.47  0.829 0.429 0.853 0.438 0.45  0.577]
Testing against dHolbach


03/19/2025 02:32:49 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.999 0.999 1.    0.999 0.998 1.    0.999 1.    1.    1.    0.999 1.
 1.    0.991 1.    0.954 0.999 0.999 0.998]
Testing against Diderot


03/19/2025 02:32:52 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.784 0.585 0.777 0.703 0.606 0.559 0.751 0.593 0.845 0.704 0.763 0.589
 0.598 0.679 0.599 0.806 0.575 0.74  0.747]
Testing against Raynal


03/19/2025 02:32:55 [ruzicka:INFO] Predicting on 19 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.156 0.278 0.12  0.18  0.078 0.292 0.098 0.067 0.124 0.104 0.082 0.351
 0.195 0.32  0.067 0.427 0.347 0.364 0.13 ]
Baudeau


03/19/2025 02:32:57 [ruzicka:INFO] Predicting on 19 documents


Condorcet


03/19/2025 02:33:01 [ruzicka:INFO] Predicting on 19 documents


dHolbach


03/19/2025 02:33:04 [ruzicka:INFO] Predicting on 19 documents


Diderot


03/19/2025 02:33:06 [ruzicka:INFO] Predicting on 19 documents


Raynal


03/19/2025 02:33:10 [ruzicka:INFO] Fitting on 533 documents...
03/19/2025 02:33:10 [ruzicka:INFO] Predicting on 3 documents




########################## Working with: Resultats ---- by Baudeau
Testing against Baudeau


03/19/2025 02:33:10 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.98  1.    0.992]
Testing against Condorcet


03/19/2025 02:33:11 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.695 0.408 0.607]
Testing against dHolbach


03/19/2025 02:33:11 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.489 0.309 0.387]
Testing against Diderot


03/19/2025 02:33:11 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.684 0.489 0.853]
Testing against Raynal


03/19/2025 02:33:12 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.532 0.38  0.582]
Baudeau


03/19/2025 02:33:12 [ruzicka:INFO] Predicting on 3 documents


Condorcet


03/19/2025 02:33:13 [ruzicka:INFO] Predicting on 3 documents


dHolbach


03/19/2025 02:33:13 [ruzicka:INFO] Predicting on 3 documents


Diderot


03/19/2025 02:33:14 [ruzicka:INFO] Predicting on 3 documents


Raynal
