## Corpus testing

This notebook takes works with known labels and looks how good it is attributed to its author. 

In [1]:
import pandas as pd
import numpy as np

import re

from sklearn.preprocessing import StandardScaler

import logging

logging.basicConfig(level="INFO")
logger = logging.getLogger("ruzicka")

from ruzicka.BDIVerifier import BDIVerifier

### I. Authors to themselves

In [2]:
# load preprocessed data

corpus = pd.read_csv('03_tests/authors_themselves/to-test_2000_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,la,les,vous,il,l,...,effet,mieux,donner,jamais,enfin,voir,rendre,ici,beaucoup,cependant
0,Avis,Baudeau,0,0__Baudeau_Avis,3.45,1.40,2.35,0.65,1.25,0.65,...,0.05,0.00,0.05,0.00,0.20,0.00,0.00,0.05,0.05,0.00
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.60,2.40,1.65,0.85,2.20,...,0.25,0.00,0.05,0.10,0.00,0.00,0.00,0.00,0.00,0.00
2,Explication,Baudeau,0,0__Baudeau_Explication,4.90,3.10,4.65,0.95,1.40,1.15,...,0.00,0.00,0.05,0.00,0.05,0.05,0.00,0.10,0.05,0.00
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1,4.65,3.00,3.20,0.10,0.80,2.00,...,0.00,0.10,0.00,0.20,0.00,0.05,0.00,0.10,0.00,0.00
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2,4.65,3.00,3.35,0.05,0.80,2.10,...,0.00,0.10,0.00,0.20,0.00,0.05,0.00,0.10,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2692,elements T3,Marmontel,94,94__Marmontel_elements T3,5.45,3.70,1.60,0.00,1.10,3.05,...,0.00,0.00,0.15,0.20,0.00,0.05,0.05,0.00,0.10,0.05
2693,elements T1,Marmontel,95,95__Marmontel_elements T1,4.35,3.25,0.95,0.10,2.05,3.45,...,0.05,0.05,0.05,0.05,0.05,0.20,0.35,0.00,0.00,0.05
2694,elements T3,Marmontel,95,95__Marmontel_elements T3,4.20,3.10,2.95,0.10,1.65,2.65,...,0.00,0.00,0.10,0.15,0.00,0.20,0.05,0.05,0.05,0.05
2695,elements T3,Marmontel,96,96__Marmontel_elements T3,4.15,3.00,1.15,0.00,1.45,2.75,...,0.05,0.05,0.05,0.05,0.05,0.00,0.10,0.00,0.00,0.00


In [3]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...
2692,elements T3,Marmontel,94,94__Marmontel_elements T3
2693,elements T1,Marmontel,95,95__Marmontel_elements T1
2694,elements T3,Marmontel,95,95__Marmontel_elements T3
2695,elements T3,Marmontel,96,96__Marmontel_elements T3


In [4]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(2697, 200)

In [5]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur les besoins 1,0,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,0,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...,...
2692,elements T3,10,Marmontel,94,94__Marmontel_elements T3
2693,elements T1,10,Marmontel,95,95__Marmontel_elements T1
2694,elements T3,10,Marmontel,95,95__Marmontel_elements T3
2695,elements T3,10,Marmontel,96,96__Marmontel_elements T3


In [6]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [7]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [9]:
# Loop that takes a work from each author
# and looks whether its (known) author is a good much according to BDI;
# nb: not getting results for a work vs all authors
# (writing results in a .csv, visualisation done later with R)

# select an author, look for his unique works
for a in label_uniques.values:
    #print(author)
    
    # take a subset with only works by an author, find unique works
    unique_works = raw_df[raw_df.author == a]['work'].unique()
    # unique_works is a numpy array of works of each author
    
    for w in unique_works:
        print(f"\n\n#######################\n{a}: {w}\n")
        
        # select each work as a problem set (problems-metadata, problems_X-word freqs)
        problems = raw_df[raw_df.work == w].reset_index(drop=True).copy()
        problems_X = X[raw_df.work == w].reset_index(drop=True).copy()
        print(problems.iloc[1:2,:])
        
        # extract the rest of the corpus
        rest = raw_df[raw_df.work != w].reset_index(drop=True).copy()
        rest_X = X[raw_df.work != w].reset_index(drop=True).copy()
        #print(f"\nRest of the corpus:\n{rest.iloc[1:6,:]}")
        
        # scaling
        ss = StandardScaler(with_mean=False).fit(rest_X)
        rest_scaled_X = ss.transform(rest_X)
        problems_scaled_X = ss.transform(problems_X)
        
        #print(problems_scaled_X)
        
        # fit 
        bdi_mm.fit(rest_scaled_X, rest.author_label)
        
        # prediction
        bdi_mm.predict_proba(
            problems_scaled_X, 
            # get_loc - select the author of the work (? is that right??)
            [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
        )
        
        # output results
        res = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
        
        # writing
        fh = '03_tests/authors_themselves/bdi_res/'+a+'_'+w+'.csv'
        res.to_csv(fh)
        print('Results written in:',fh)
        
        rest_scaled_X = None
        res = None

03/18/2025 01:23:23 [ruzicka:INFO] Fitting on 2672 documents...
03/18/2025 01:23:23 [ruzicka:INFO] Predicting on 25 documents




#######################
Baudeau: Avis 

    work  author_label   author  chunk_num               tag
1  Avis              0  Baudeau          1  1__Baudeau_Avis 


03/18/2025 01:23:27 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:23:27 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Avis .csv


#######################
Baudeau: Eclaircissemens

              work  author_label   author  chunk_num  \
1  Eclaircissemens             0  Baudeau          1   

                          tag  
1  1__Baudeau_Eclaircissemens  


03/18/2025 01:23:31 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:23:31 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Eclaircissemens.csv


#######################
Baudeau: Explication

          work  author_label   author  chunk_num                     tag
1  Explication             0  Baudeau          1  1__Baudeau_Explication


03/18/2025 01:23:33 [ruzicka:INFO] Fitting on 2661 documents...
03/18/2025 01:23:33 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Explication.csv


#######################
Baudeau: Idees sur les besoins 1

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 1             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 1  


03/18/2025 01:23:39 [ruzicka:INFO] Fitting on 2661 documents...
03/18/2025 01:23:39 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Idees sur les besoins 1.csv


#######################
Baudeau: Idees sur les besoins 2

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 2             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 2  


03/18/2025 01:23:45 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:23:45 [ruzicka:INFO] Predicting on 1 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Idees sur les besoins 2.csv


#######################
Baudeau: Lettre du fermier

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []


03/18/2025 01:23:45 [ruzicka:INFO] Fitting on 2683 documents...
03/18/2025 01:23:45 [ruzicka:INFO] Predicting on 14 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Lettre du fermier.csv


#######################
Baudeau: Lettres  a un magistraTtxt

                         work  author_label   author  chunk_num  \
1  Lettres  a un magistraTtxt             0  Baudeau          1   

                                     tag  
1  1__Baudeau_Lettres  a un magistraTtxt  


03/18/2025 01:23:48 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:23:48 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Lettres  a un magistraTtxt.csv


#######################
Baudeau: Resultats

        work  author_label   author  chunk_num                   tag
1  Resultats             0  Baudeau          1  1__Baudeau_Resultats


03/18/2025 01:23:48 [ruzicka:INFO] Fitting on 2629 documents...
03/18/2025 01:23:48 [ruzicka:INFO] Predicting on 68 documents


Results written in: 03_tests/authors_themselves/bdi_res/Baudeau_Resultats.csv


#######################
Chastellux: De la felicite

             work  author_label      author  chunk_num  \
1  De la felicite             1  Chastellux          1   

                            tag  
1  1__Chastellux_De la felicite  


03/18/2025 01:24:02 [ruzicka:INFO] Fitting on 2679 documents...
03/18/2025 01:24:02 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_De la felicite.csv


#######################
Chastellux: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


03/18/2025 01:24:06 [ruzicka:INFO] Fitting on 2659 documents...
03/18/2025 01:24:06 [ruzicka:INFO] Predicting on 38 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_eloge.csv


#######################
Chastellux: Voyage de Newport

                work  author_label      author  chunk_num  \
1  Voyage de Newport             1  Chastellux          1   

                               tag  
1  1__Chastellux_Voyage de Newport  


03/18/2025 01:24:14 [ruzicka:INFO] Fitting on 2656 documents...
03/18/2025 01:24:14 [ruzicka:INFO] Predicting on 41 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyage de Newport.csv


#######################
Chastellux: Voyages T1

         work  author_label      author  chunk_num                       tag
1  Voyages T1             1  Chastellux          1  1__Chastellux_Voyages T1


03/18/2025 01:24:23 [ruzicka:INFO] Fitting on 2660 documents...
03/18/2025 01:24:23 [ruzicka:INFO] Predicting on 37 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyages T1.csv


#######################
Chastellux: Voyages T2

         work  author_label      author  chunk_num                       tag
1  Voyages T2             1  Chastellux          1  1__Chastellux_Voyages T2


03/18/2025 01:24:31 [ruzicka:INFO] Fitting on 2663 documents...
03/18/2025 01:24:31 [ruzicka:INFO] Predicting on 34 documents


Results written in: 03_tests/authors_themselves/bdi_res/Chastellux_Voyages T2.csv


#######################
Condorcet: 12

  work  author_label     author  chunk_num              tag
1   12             2  Condorcet          1  1__Condorcet_12


03/18/2025 01:24:40 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:24:40 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_12.csv


#######################
Condorcet: 13

  work  author_label     author  chunk_num              tag
1   13             2  Condorcet          1  1__Condorcet_13


03/18/2025 01:24:48 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:24:48 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_13.csv


#######################
Condorcet: 15

  work  author_label     author  chunk_num              tag
1   15             2  Condorcet          1  1__Condorcet_15


03/18/2025 01:24:56 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:24:56 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_15.csv


#######################
Condorcet: 16

  work  author_label     author  chunk_num              tag
1   16             2  Condorcet          1  1__Condorcet_16


03/18/2025 01:25:05 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:25:05 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_16.csv


#######################
Condorcet: 17

  work  author_label     author  chunk_num              tag
1   17             2  Condorcet          1  1__Condorcet_17


03/18/2025 01:25:14 [ruzicka:INFO] Fitting on 2663 documents...
03/18/2025 01:25:14 [ruzicka:INFO] Predicting on 34 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_17.csv


#######################
Condorcet: 19

  work  author_label     author  chunk_num              tag
1   19             2  Condorcet          1  1__Condorcet_19


03/18/2025 01:25:23 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:25:23 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_19.csv


#######################
Condorcet: 20

  work  author_label     author  chunk_num              tag
1   20             2  Condorcet          1  1__Condorcet_20


03/18/2025 01:25:33 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:25:33 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_20.csv


#######################
Condorcet: 5

  work  author_label     author  chunk_num             tag
1    5             2  Condorcet          1  1__Condorcet_5


03/18/2025 01:25:42 [ruzicka:INFO] Fitting on 2674 documents...
03/18/2025 01:25:42 [ruzicka:INFO] Predicting on 23 documents


Results written in: 03_tests/authors_themselves/bdi_res/Condorcet_5.csv


#######################
Deleyre: Analyse de la philosophie

                        work  author_label   author  chunk_num  \
1  Analyse de la philosophie             3  Deleyre          1   

                                    tag  
1  1__Deleyre_Analyse de la philosophie  


03/18/2025 01:25:45 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:25:45 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Analyse de la philosophie.csv


#######################
Deleyre: eloge de M Roux

              work  author_label   author  chunk_num  \
1  eloge de M Roux             3  Deleyre          1   

                          tag  
1  1__Deleyre_eloge de M Roux  


03/18/2025 01:25:46 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:25:46 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_eloge de M Roux.csv


#######################
Deleyre: epingle

      work  author_label   author  chunk_num                 tag
1  epingle             3  Deleyre          1  1__Deleyre_epingle


03/18/2025 01:25:46 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:25:46 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_epingle.csv


#######################
Deleyre: Essai sur la vie

               work  author_label   author  chunk_num  \
1  Essai sur la vie             3  Deleyre          1   

                           tag  
1  1__Deleyre_Essai sur la vie  


03/18/2025 01:25:50 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:25:50 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Essai sur la vie.csv


#######################
Deleyre: Idees sur l-education

                    work  author_label   author  chunk_num  \
1  Idees sur l-education             3  Deleyre          1   

                                tag  
1  1__Deleyre_Idees sur l-education  


03/18/2025 01:25:52 [ruzicka:INFO] Fitting on 2673 documents...
03/18/2025 01:25:52 [ruzicka:INFO] Predicting on 24 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Idees sur l-education.csv


#######################
Deleyre: L-esprit de Saint-evremont

                         work  author_label   author  chunk_num  \
1  L-esprit de Saint-evremont             3  Deleyre          1   

                                     tag  
1  1__Deleyre_L-esprit de Saint-evremont  


03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 1 documents
03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_L-esprit de Saint-evremont.csv


#######################
Deleyre: Rapport pour des Corses expatries

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []
Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Rapport pour des Corses expatries.csv


#######################
Deleyre: Sur la question

              work  author_label   author  chunk_num  \
1  Sur la question             3  Deleyre          1   

                          tag  
1  1__Deleyre_Sur la question  


03/18/2025 01:25:55 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:25:55 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Deleyre_Sur la question.csv


#######################
dHolbach: elements de la morale universelle

                                work  author_label    author  chunk_num  \
1  elements de la morale universelle             4  dHolbach          1   

                                             tag  
1  1__dHolbach_elements de la morale universelle  


03/18/2025 01:25:57 [ruzicka:INFO] Fitting on 2696 documents...
03/18/2025 01:25:57 [ruzicka:INFO] Predicting on 1 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_elements de la morale universelle.csv


#######################
dHolbach: Essai sur l-art de ramper

Empty DataFrame
Columns: [work, author_label, author, chunk_num, tag]
Index: []


03/18/2025 01:25:58 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:25:58 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Essai sur l-art de ramper.csv


#######################
dHolbach: ethocratie

         work  author_label    author  chunk_num                     tag
1  ethocratie             4  dHolbach          1  1__dHolbach_ethocratie


03/18/2025 01:26:03 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:26:03 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_ethocratie.csv


#######################
dHolbach: La Morale 1

          work  author_label    author  chunk_num                      tag
1  La Morale 1             4  dHolbach          1  1__dHolbach_La Morale 1


03/18/2025 01:26:06 [ruzicka:INFO] Fitting on 2664 documents...
03/18/2025 01:26:06 [ruzicka:INFO] Predicting on 33 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 1.csv


#######################
dHolbach: La Morale 4

          work  author_label    author  chunk_num                      tag
1  La Morale 4             4  dHolbach          1  1__dHolbach_La Morale 4


03/18/2025 01:26:12 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:26:12 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 4.csv


#######################
dHolbach: La Morale 5

          work  author_label    author  chunk_num                      tag
1  La Morale 5             4  dHolbach          1  1__dHolbach_La Morale 5


03/18/2025 01:26:18 [ruzicka:INFO] Fitting on 2676 documents...
03/18/2025 01:26:18 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_La Morale 5.csv


#######################
dHolbach: Système Social 2

                work  author_label    author  chunk_num  \
1  Système Social 2             4  dHolbach          1   

                             tag  
1  1__dHolbach_Système Social 2  


03/18/2025 01:26:22 [ruzicka:INFO] Fitting on 2676 documents...
03/18/2025 01:26:22 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Système Social 2.csv


#######################
dHolbach: Theologie portative

                  work  author_label    author  chunk_num  \
1  Theologie portative             4  dHolbach          1   

                               tag  
1  1__dHolbach_Theologie portative  


03/18/2025 01:26:26 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:26:26 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/dHolbach_Theologie portative.csv


#######################
Diderot: De la suffisance

               work  author_label   author  chunk_num  \
1  De la suffisance             5  Diderot          1   

                           tag  
1  1__Diderot_De la suffisance  


03/18/2025 01:26:26 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:26:26 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_De la suffisance.csv


#######################
Diderot: Essai sur la Peinture

                    work  author_label   author  chunk_num  \
1  Essai sur la Peinture             5  Diderot          1   

                                tag  
1  1__Diderot_Essai sur la Peinture  


03/18/2025 01:26:28 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:26:28 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Essai sur la Peinture.csv


#######################
Diderot: Fragments echappes (+FP1)

                        work  author_label   author  chunk_num  \
1  Fragments echappes (+FP1)             5  Diderot          1   

                                    tag  
1  1__Diderot_Fragments echappes (+FP1)  


03/18/2025 01:26:28 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:26:28 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Fragments echappes (+FP1).csv


#######################
Diderot: Recherches philosophiques

                        work  author_label   author  chunk_num  \
1  Recherches philosophiques             5  Diderot          1   

                                    tag  
1  1__Diderot_Recherches philosophiques  


03/18/2025 01:26:29 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:26:29 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Recherches philosophiques.csv


#######################
Diderot: Refutation suivie

                work  author_label   author  chunk_num  \
1  Refutation suivie             5  Diderot          1   

                            tag  
1  1__Diderot_Refutation suivie  


03/18/2025 01:26:33 [ruzicka:INFO] Fitting on 2685 documents...
03/18/2025 01:26:33 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Refutation suivie.csv


#######################
Diderot: Salon 1763

         work  author_label   author  chunk_num                    tag
1  Salon 1763             5  Diderot          1  1__Diderot_Salon 1763


03/18/2025 01:26:34 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:26:34 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Salon 1763.csv


#######################
Diderot: Salon 1775

         work  author_label   author  chunk_num                    tag
1  Salon 1775             5  Diderot          1  1__Diderot_Salon 1775


03/18/2025 01:26:35 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:26:35 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Salon 1775.csv


#######################
Diderot: Suite de l-apologie

                  work  author_label   author  chunk_num  \
1  Suite de l-apologie             5  Diderot          1   

                              tag  
1  1__Diderot_Suite de l-apologie  


03/18/2025 01:26:36 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:26:36 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Diderot_Suite de l-apologie.csv


#######################
Guibert: Discurs

      work  author_label   author  chunk_num                 tag
1  Discurs             6  Guibert          1  1__Guibert_Discurs


03/18/2025 01:26:38 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:26:38 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Discurs.csv


#######################
Guibert: Eloge du roi

           work  author_label   author  chunk_num                      tag
1  Eloge du roi             6  Guibert          1  1__Guibert_Eloge du roi


03/18/2025 01:26:42 [ruzicka:INFO] Fitting on 2645 documents...
03/18/2025 01:26:42 [ruzicka:INFO] Predicting on 52 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Eloge du roi.csv


#######################
Guibert: Essai generale

             work  author_label   author  chunk_num                        tag
1  Essai generale             6  Guibert          1  1__Guibert_Essai generale


03/18/2025 01:26:54 [ruzicka:INFO] Fitting on 2644 documents...
03/18/2025 01:26:54 [ruzicka:INFO] Predicting on 53 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Essai generale.csv


#######################
Guibert: Oeuvres militaires T1

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T1             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T1  


03/18/2025 01:27:06 [ruzicka:INFO] Fitting on 2664 documents...
03/18/2025 01:27:06 [ruzicka:INFO] Predicting on 33 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T1.csv


#######################
Guibert: Oeuvres militaires T2

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T2             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T2  


03/18/2025 01:27:14 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:27:14 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T2.csv


#######################
Guibert: Oeuvres militaires T3

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T3             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T3  


03/18/2025 01:27:21 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:27:21 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T3.csv


#######################
Guibert: Oeuvres militaires T4

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T4             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T4  


03/18/2025 01:27:28 [ruzicka:INFO] Fitting on 2677 documents...
03/18/2025 01:27:28 [ruzicka:INFO] Predicting on 20 documents


Results written in: 03_tests/authors_themselves/bdi_res/Guibert_Oeuvres militaires T4.csv


#######################
Jaucourt: Ency 1-7

       work  author_label    author  chunk_num                   tag
1  Ency 1-7             7  Jaucourt          1  1__Jaucourt_Ency 1-7


03/18/2025 01:27:33 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:27:33 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 1-7.csv


#######################
Jaucourt: Ency 10

      work  author_label    author  chunk_num                  tag
1  Ency 10             7  Jaucourt          1  1__Jaucourt_Ency 10


03/18/2025 01:27:35 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:27:35 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 10.csv


#######################
Jaucourt: Ency 11

      work  author_label    author  chunk_num                  tag
1  Ency 11             7  Jaucourt          1  1__Jaucourt_Ency 11


03/18/2025 01:27:37 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:27:37 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 11.csv


#######################
Jaucourt: Ency 12

      work  author_label    author  chunk_num                  tag
1  Ency 12             7  Jaucourt          1  1__Jaucourt_Ency 12


03/18/2025 01:27:39 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:27:39 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 12.csv


#######################
Jaucourt: Ency 8

     work  author_label    author  chunk_num                 tag
1  Ency 8             7  Jaucourt          1  1__Jaucourt_Ency 8


03/18/2025 01:27:41 [ruzicka:INFO] Fitting on 2686 documents...
03/18/2025 01:27:41 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 8.csv


#######################
Jaucourt: Ency 9

     work  author_label    author  chunk_num                 tag
1  Ency 9             7  Jaucourt          1  1__Jaucourt_Ency 9


03/18/2025 01:27:43 [ruzicka:INFO] Fitting on 2637 documents...
03/18/2025 01:27:43 [ruzicka:INFO] Predicting on 60 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Ency 9.csv


#######################
Jaucourt: Essais de teodicee T1

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T1             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T1  


03/18/2025 01:27:54 [ruzicka:INFO] Fitting on 2637 documents...
03/18/2025 01:27:54 [ruzicka:INFO] Predicting on 60 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Essais de teodicee T1.csv


#######################
Jaucourt: Essais de teodicee T2

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T2             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T2  


03/18/2025 01:28:05 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:28:05 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jaucourt_Essais de teodicee T2.csv


#######################
Jussieu: Exposition d-un nouvel ordre

                           work  author_label   author  chunk_num  \
1  Exposition d-un nouvel ordre             8  Jussieu          1   

                                       tag  
1  1__Jussieu_Exposition d-un nouvel ordre  


03/18/2025 01:28:06 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:28:06 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Exposition d-un nouvel ordre.csv


#######################
Jussieu: HDI T1-4

       work  author_label   author  chunk_num                  tag
1  HDI T1-4             8  Jussieu          1  1__Jussieu_HDI T1-4


03/18/2025 01:28:07 [ruzicka:INFO] Fitting on 2666 documents...
03/18/2025 01:28:07 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_HDI T1-4.csv


#######################
Jussieu: Memoire caractères 1-14

                       work  author_label   author  chunk_num  \
1  Memoire caractères 1-14             8  Jussieu          1   

                                   tag  
1  1__Jussieu_Memoire caractères 1-14  


03/18/2025 01:28:12 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:28:12 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Memoire caractères 1-14.csv


#######################
Jussieu: Memoire famille

              work  author_label   author  chunk_num  \
1  Memoire famille             8  Jussieu          1   

                          tag  
1  1__Jussieu_Memoire famille  


03/18/2025 01:28:13 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:28:13 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Memoire famille.csv


#######################
Jussieu: Methode naturelle

                work  author_label   author  chunk_num  \
1  Methode naturelle             8  Jussieu          1   

                            tag  
1  1__Jussieu_Methode naturelle  


03/18/2025 01:28:14 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:28:14 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Methode naturelle.csv


#######################
Jussieu: Notice Historique 1-6

                    work  author_label   author  chunk_num  \
1  Notice Historique 1-6             8  Jussieu          1   

                                tag  
1  1__Jussieu_Notice Historique 1-6  


03/18/2025 01:28:16 [ruzicka:INFO] Fitting on 2690 documents...
03/18/2025 01:28:16 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Notice Historique 1-6.csv


#######################
Jussieu: Principes

        work  author_label   author  chunk_num                   tag
1  Principes             8  Jussieu          1  1__Jussieu_Principes


03/18/2025 01:28:17 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:28:17 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Principes.csv


#######################
Jussieu: RapporTtxt

         work  author_label   author  chunk_num                    tag
1  RapporTtxt             8  Jussieu          1  1__Jussieu_RapporTtxt


03/18/2025 01:28:18 [ruzicka:INFO] Fitting on 2656 documents...
03/18/2025 01:28:18 [ruzicka:INFO] Predicting on 41 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_RapporTtxt.csv


#######################
Jussieu: Traite des vertus

                work  author_label   author  chunk_num  \
1  Traite des vertus             8  Jussieu          1   

                            tag  
1  1__Jussieu_Traite des vertus  


03/18/2025 01:28:24 [ruzicka:INFO] Fitting on 2658 documents...
03/18/2025 01:28:24 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/authors_themselves/bdi_res/Jussieu_Traite des vertus.csv


#######################
La Grange: Lucrece T1

         work  author_label     author  chunk_num                      tag
1  Lucrece T1             9  La Grange          1  1__La Grange_Lucrece T1


03/18/2025 01:28:28 [ruzicka:INFO] Fitting on 2651 documents...
03/18/2025 01:28:28 [ruzicka:INFO] Predicting on 46 documents


Results written in: 03_tests/authors_themselves/bdi_res/La Grange_Lucrece T1.csv


#######################
La Grange: Lucrece T2

         work  author_label     author  chunk_num                      tag
1  Lucrece T2             9  La Grange          1  1__La Grange_Lucrece T2


03/18/2025 01:28:33 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:28:33 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/La Grange_Lucrece T2.csv


#######################
Marmontel: Belisaire

        work  author_label     author  chunk_num                     tag
1  Belisaire            10  Marmontel          1  1__Marmontel_Belisaire


03/18/2025 01:28:45 [ruzicka:INFO] Fitting on 2667 documents...
03/18/2025 01:28:45 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Belisaire.csv


#######################
Marmontel: Contes T1

        work  author_label     author  chunk_num                     tag
1  Contes T1            10  Marmontel          1  1__Marmontel_Contes T1


03/18/2025 01:28:58 [ruzicka:INFO] Fitting on 2665 documents...
03/18/2025 01:28:58 [ruzicka:INFO] Predicting on 32 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T1.csv


#######################
Marmontel: Contes T2

        work  author_label     author  chunk_num                     tag
1  Contes T2            10  Marmontel          1  1__Marmontel_Contes T2


03/18/2025 01:29:12 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:29:12 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T2.csv


#######################
Marmontel: Contes T3

        work  author_label     author  chunk_num                     tag
1  Contes T3            10  Marmontel          1  1__Marmontel_Contes T3


03/18/2025 01:29:24 [ruzicka:INFO] Fitting on 2601 documents...
03/18/2025 01:29:24 [ruzicka:INFO] Predicting on 96 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Contes T3.csv


#######################
Marmontel: elements T1

          work  author_label     author  chunk_num                       tag
1  elements T1            10  Marmontel          1  1__Marmontel_elements T1


03/18/2025 01:29:59 [ruzicka:INFO] Fitting on 2612 documents...
03/18/2025 01:29:59 [ruzicka:INFO] Predicting on 85 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T1.csv


#######################
Marmontel: elements T2

          work  author_label     author  chunk_num                       tag
1  elements T2            10  Marmontel          1  1__Marmontel_elements T2


03/18/2025 01:30:32 [ruzicka:INFO] Fitting on 2599 documents...
03/18/2025 01:30:32 [ruzicka:INFO] Predicting on 98 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T2.csv


#######################
Marmontel: elements T3

          work  author_label     author  chunk_num                       tag
1  elements T3            10  Marmontel          1  1__Marmontel_elements T3


03/18/2025 01:31:08 [ruzicka:INFO] Fitting on 2660 documents...
03/18/2025 01:31:08 [ruzicka:INFO] Predicting on 37 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_elements T3.csv


#######################
Marmontel: Les Incas

        work  author_label     author  chunk_num                     tag
1  Les Incas            10  Marmontel          1  1__Marmontel_Les Incas


03/18/2025 01:31:24 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:24 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Marmontel_Les Incas.csv


#######################
Meister: De l-origine

           work  author_label   author  chunk_num                      tag
1  De l-origine            11  Meister          1  1__Meister_De l-origine


03/18/2025 01:31:24 [ruzicka:INFO] Fitting on 2688 documents...
03/18/2025 01:31:24 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/authors_themselves/bdi_res/Meister_De l-origine.csv


#######################
Meister: De la morale

           work  author_label   author  chunk_num                      tag
1  De la morale            11  Meister          1  1__Meister_De la morale


03/18/2025 01:31:25 [ruzicka:INFO] Fitting on 2695 documents...
03/18/2025 01:31:25 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/authors_themselves/bdi_res/Meister_De la morale.csv


#######################
Morellet: FragmenTtxt

          work  author_label    author  chunk_num                      tag
1  FragmenTtxt            12  Morellet          1  1__Morellet_FragmenTtxt


03/18/2025 01:31:25 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:25 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_FragmenTtxt.csv


#######################
Morellet: Le cri

     work  author_label    author  chunk_num                 tag
1  Le cri            12  Morellet          1  1__Morellet_Le cri


03/18/2025 01:31:26 [ruzicka:INFO] Fitting on 2694 documents...
03/18/2025 01:31:26 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Le cri.csv


#######################
Morellet: Nouvelles

        work  author_label    author  chunk_num                    tag
1  Nouvelles            12  Morellet          1  1__Morellet_Nouvelles


03/18/2025 01:31:26 [ruzicka:INFO] Fitting on 2692 documents...
03/18/2025 01:31:26 [ruzicka:INFO] Predicting on 5 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Nouvelles.csv


#######################
Morellet: Observations

           work  author_label    author  chunk_num                       tag
1  Observations            12  Morellet          1  1__Morellet_Observations


03/18/2025 01:31:27 [ruzicka:INFO] Fitting on 2693 documents...
03/18/2025 01:31:27 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Observations.csv


#######################
Morellet: Quelques

       work  author_label    author  chunk_num                   tag
1  Quelques            12  Morellet          1  1__Morellet_Quelques


03/18/2025 01:31:27 [ruzicka:INFO] Fitting on 2683 documents...
03/18/2025 01:31:27 [ruzicka:INFO] Predicting on 14 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Quelques.csv


#######################
Morellet: Reflexions

         work  author_label    author  chunk_num                     tag
1  Reflexions            12  Morellet          1  1__Morellet_Reflexions


03/18/2025 01:31:29 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:31:29 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Reflexions.csv


#######################
Morellet: SupplemenTtxt

            work  author_label    author  chunk_num                        tag
1  SupplemenTtxt            12  Morellet          1  1__Morellet_SupplemenTtxt


03/18/2025 01:31:30 [ruzicka:INFO] Fitting on 2684 documents...
03/18/2025 01:31:30 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_SupplemenTtxt.csv


#######################
Morellet: Theorie

      work  author_label    author  chunk_num                  tag
1  Theorie            12  Morellet          1  1__Morellet_Theorie


03/18/2025 01:31:32 [ruzicka:INFO] Fitting on 2678 documents...
03/18/2025 01:31:32 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/authors_themselves/bdi_res/Morellet_Theorie.csv


#######################
Naigeon: Le militaire

           work  author_label   author  chunk_num                      tag
1  Le militaire            13  Naigeon          1  1__Naigeon_Le militaire


03/18/2025 01:31:34 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:31:34 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Le militaire.csv


#######################
Naigeon: Liberte

      work  author_label   author  chunk_num                 tag
1  Liberte            13  Naigeon          1  1__Naigeon_Liberte


03/18/2025 01:31:35 [ruzicka:INFO] Fitting on 2643 documents...
03/18/2025 01:31:35 [ruzicka:INFO] Predicting on 54 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Liberte.csv


#######################
Naigeon: Manuel d-epictete

                work  author_label   author  chunk_num  \
1  Manuel d-epictete            13  Naigeon          1   

                            tag  
1  1__Naigeon_Manuel d-epictete  


03/18/2025 01:31:41 [ruzicka:INFO] Fitting on 2691 documents...
03/18/2025 01:31:41 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Manuel d-epictete.csv


#######################
Naigeon: Richesse

       work  author_label   author  chunk_num                  tag
1  Richesse            13  Naigeon          1  1__Naigeon_Richesse


03/18/2025 01:31:41 [ruzicka:INFO] Fitting on 2689 documents...
03/18/2025 01:31:41 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Richesse.csv


#######################
Naigeon: Unitaires

        work  author_label   author  chunk_num                   tag
1  Unitaires            13  Naigeon          1  1__Naigeon_Unitaires


03/18/2025 01:31:43 [ruzicka:INFO] Fitting on 2679 documents...
03/18/2025 01:31:43 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/authors_themselves/bdi_res/Naigeon_Unitaires.csv


#######################
Pechmeja: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


03/18/2025 01:31:44 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:31:44 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Pechmeja_eloge.csv


#######################
Pechmeja: Telephe

      work  author_label    author  chunk_num                  tag
1  Telephe            14  Pechmeja          1  1__Pechmeja_Telephe


03/18/2025 01:31:47 [ruzicka:INFO] Fitting on 2674 documents...
03/18/2025 01:31:47 [ruzicka:INFO] Predicting on 23 documents


Results written in: 03_tests/authors_themselves/bdi_res/Pechmeja_Telephe.csv


#######################
Raynal: Anecdotes historiques 1

                      work  author_label  author  chunk_num  \
1  Anecdotes historiques 1            15  Raynal          1   

                                 tag  
1  1__Raynal_Anecdotes historiques 1  


03/18/2025 01:31:52 [ruzicka:INFO] Fitting on 2668 documents...
03/18/2025 01:31:52 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes historiques 1.csv


#######################
Raynal: Anecdotes historiques 2

                      work  author_label  author  chunk_num  \
1  Anecdotes historiques 2            15  Raynal          1   

                                 tag  
1  1__Raynal_Anecdotes historiques 2  


03/18/2025 01:31:58 [ruzicka:INFO] Fitting on 2675 documents...
03/18/2025 01:31:58 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes historiques 2.csv


#######################
Raynal: Anecdotes litteraires2 1756

                          work  author_label  author  chunk_num  \
1  Anecdotes litteraires2 1756            15  Raynal          1   

                                     tag  
1  1__Raynal_Anecdotes litteraires2 1756  


03/18/2025 01:32:03 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:32:03 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Anecdotes litteraires2 1756.csv


#######################
Raynal: ecole 2

      work  author_label  author  chunk_num                tag
1  ecole 2            15  Raynal          1  1__Raynal_ecole 2


03/18/2025 01:32:09 [ruzicka:INFO] Fitting on 2669 documents...
03/18/2025 01:32:09 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_ecole 2.csv


#######################
Raynal: ecole 3

      work  author_label  author  chunk_num                tag
1  ecole 3            15  Raynal          1  1__Raynal_ecole 3


03/18/2025 01:32:15 [ruzicka:INFO] Fitting on 2670 documents...
03/18/2025 01:32:15 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_ecole 3.csv


#######################
Raynal: Histoire du Parlement 1748

                         work  author_label  author  chunk_num  \
1  Histoire du Parlement 1748            15  Raynal          1   

                                    tag  
1  1__Raynal_Histoire du Parlement 1748  


03/18/2025 01:32:20 [ruzicka:INFO] Fitting on 2677 documents...
03/18/2025 01:32:20 [ruzicka:INFO] Predicting on 20 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement 1748.csv


#######################
Raynal: Histoire du Parlement Band 1

                           work  author_label  author  chunk_num  \
1  Histoire du Parlement Band 1            15  Raynal          1   

                                      tag  
1  1__Raynal_Histoire du Parlement Band 1  


03/18/2025 01:32:25 [ruzicka:INFO] Fitting on 2680 documents...
03/18/2025 01:32:25 [ruzicka:INFO] Predicting on 17 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement Band 1.csv


#######################
Raynal: Histoire du Parlement Band 2

                           work  author_label  author  chunk_num  \
1  Histoire du Parlement Band 2            15  Raynal          1   

                                      tag  
1  1__Raynal_Histoire du Parlement Band 2  


03/18/2025 01:32:29 [ruzicka:INFO] Fitting on 2687 documents...
03/18/2025 01:32:29 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/authors_themselves/bdi_res/Raynal_Histoire du Parlement Band 2.csv


#######################
Rivière: De l-instruction publique  1775

                              work  author_label    author  chunk_num  \
1  De l-instruction publique  1775            16  Rivière          1   

                                           tag  
1  1__Rivière_De l-instruction publique  1775  


03/18/2025 01:32:31 [ruzicka:INFO] Fitting on 2657 documents...
03/18/2025 01:32:31 [ruzicka:INFO] Predicting on 40 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_De l-instruction publique  1775.csv


#######################
Rivière: L-Interet

        work  author_label    author  chunk_num                    tag
1  L-Interet            16  Rivière          1  1__Rivière_L-Interet


03/18/2025 01:32:36 [ruzicka:INFO] Fitting on 2618 documents...
03/18/2025 01:32:36 [ruzicka:INFO] Predicting on 79 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_L-Interet.csv


#######################
Rivière: L-ordre naturel

              work  author_label    author  chunk_num  \
1  L-ordre naturel            16  Rivière          1   

                           tag  
1  1__Rivière_L-ordre naturel  


03/18/2025 01:32:45 [ruzicka:INFO] Fitting on 2692 documents...
03/18/2025 01:32:45 [ruzicka:INFO] Predicting on 5 documents


Results written in: 03_tests/authors_themselves/bdi_res/Rivière_L-ordre naturel.csv


#######################
Saint-Lambert: Les deux Amis

            work  author_label         author  chunk_num  \
1  Les deux Amis            17  Saint-Lambert          1   

                              tag  
1  1__Saint-Lambert_Les deux Amis  


03/18/2025 01:32:46 [ruzicka:INFO] Fitting on 2662 documents...
03/18/2025 01:32:46 [ruzicka:INFO] Predicting on 35 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Les deux Amis.csv


#######################
Saint-Lambert: Oeuvres Phil T1

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T1            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T1  


03/18/2025 01:32:54 [ruzicka:INFO] Fitting on 2658 documents...
03/18/2025 01:32:54 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T1.csv


#######################
Saint-Lambert: Oeuvres Phil T2

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T2            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T2  


03/18/2025 01:33:02 [ruzicka:INFO] Fitting on 2653 documents...
03/18/2025 01:33:02 [ruzicka:INFO] Predicting on 44 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T2.csv


#######################
Saint-Lambert: Oeuvres Phil T3

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T3            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T3  


03/18/2025 01:33:10 [ruzicka:INFO] Fitting on 2652 documents...
03/18/2025 01:33:10 [ruzicka:INFO] Predicting on 45 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T3.csv


#######################
Saint-Lambert: Oeuvres Phil T4

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T4            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T4  


03/18/2025 01:33:19 [ruzicka:INFO] Fitting on 2675 documents...
03/18/2025 01:33:19 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T4.csv


#######################
Saint-Lambert: Oeuvres Phil T5

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T5            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T5  
Results written in: 03_tests/authors_themselves/bdi_res/Saint-Lambert_Oeuvres Phil T5.csv


# FP1

Test FP1 chunks with BDI

In [52]:
# load preprocessed data

corpus = pd.read_csv('03_tests/fp1/2000_words/FP1_2000_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,des,et,la,les,vous,...,voit,rendre,beaucoup,seul,ici,mieux,donner,enfin,voir,cependant
0,Avis,Baudeau,0,0__Baudeau_Avis,3.45,2.20,2.50,1.40,2.35,0.65,...,0.05,0.00,0.05,0.05,0.05,0.00,0.05,0.20,0.00,0.00
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.85,2.80,2.60,2.40,1.65,...,0.05,0.00,0.00,0.05,0.00,0.00,0.05,0.00,0.00,0.00
2,Explication,Baudeau,0,0__Baudeau_Explication,4.90,2.60,3.80,3.10,4.65,0.95,...,0.00,0.00,0.05,0.10,0.10,0.00,0.05,0.05,0.05,0.00
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,5.60,1.90,2.20,3.65,1.85,0.10,...,0.00,0.00,0.10,0.10,0.00,0.05,0.05,0.10,0.00,0.05
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,4.90,1.85,2.50,2.35,2.75,0.10,...,0.00,0.10,0.05,0.00,0.00,0.10,0.00,0.10,0.05,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4429,elements T3,Marmontel,96,96__Marmontel_elements T3,4.15,1.20,3.35,3.00,1.15,0.00,...,0.05,0.10,0.00,0.25,0.00,0.05,0.05,0.05,0.00,0.00
4430,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,5.25,1.85,0.75,3.75,2.65,0.00,...,0.00,0.05,0.05,0.00,0.00,0.00,0.00,0.00,0.00,0.00
4431,elements T3,Marmontel,97,97__Marmontel_elements T3,4.60,0.95,3.10,3.40,1.55,0.00,...,0.00,0.10,0.00,0.00,0.00,0.00,0.05,0.00,0.00,0.00
4432,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,4.90,2.95,0.75,3.25,3.20,0.05,...,0.05,0.00,0.20,0.00,0.00,0.05,0.00,0.00,0.05,0.05


In [53]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4429,elements T3,Marmontel,96,96__Marmontel_elements T3
4430,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4431,elements T3,Marmontel,97,97__Marmontel_elements T3
4432,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [54]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4434, 200)

In [55]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4429,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4430,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4431,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4432,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [56]:
# select an "unknown" work

problems = raw_df[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,FP1,7,HDI,0,0__HDI_FP1
1,FP1,7,HDI,1,1__HDI_FP1
2,FP1,7,HDI,2,2__HDI_FP1


In [57]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "FP1"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "FP1"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4426,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4427,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4428,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4429,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


**Scaling**

In [58]:
ss = StandardScaler(with_mean=False).fit(rest_X)

In [59]:
rest_scaled_X = ss.transform(rest_X)
problems_scaled_X = ss.transform(problems_X)

**Verification**

In [60]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [61]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

03/18/2025 02:31:31 [ruzicka:INFO] Fitting on 4431 documents...


In [62]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [63]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

03/18/2025 02:31:35 [ruzicka:INFO] Predicting on 3 documents


Testing against Baudeau


03/18/2025 02:31:36 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.656 0.717 0.722]
Testing against Chastellux


03/18/2025 02:31:36 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.849 0.668 0.967]
Testing against Condorcet


03/18/2025 02:31:39 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.979 0.993 0.968]
Testing against Deleyre


03/18/2025 02:31:40 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.813 0.788 0.889]
Testing against dHolbach


03/18/2025 02:31:42 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.842 0.967 0.865]
Testing against Diderot


03/18/2025 02:31:43 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1. 1. 1.]
Testing against Guibert


03/18/2025 02:31:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.734 0.614 0.768]
Testing against HDI


03/18/2025 02:31:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.316 0.408 0.45 ]
Testing against Jaucourt


03/18/2025 02:31:45 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.787 0.673 0.714]
Testing against Jussieu


03/18/2025 02:31:45 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.329 0.279 0.275]
Testing against La Grange


03/18/2025 02:31:46 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.101 0.261 0.297]
Testing against Marmontel


03/18/2025 02:31:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.967 0.925 0.969]
Testing against Meister


03/18/2025 02:31:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.305 0.288 0.103]
Testing against Morellet


03/18/2025 02:31:48 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.935 0.844 0.744]
Testing against Naigeon


03/18/2025 02:31:48 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.835 0.702 0.354]
Testing against Pechmeja


03/18/2025 02:31:49 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.216 0.555 0.605]
Testing against Raynal


03/18/2025 02:31:50 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.421 0.559 0.657]
Testing against Rivière


03/18/2025 02:31:50 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.572 0.51  0.407]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.689 0.92  0.89 ]


In [64]:
authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '03_tests/fp1/2000_words/mfw200/fp1_vs_' + a + '.csv'

    x.to_csv(fh)

03/18/2025 02:31:57 [ruzicka:INFO] Predicting on 3 documents


Baudeau


03/18/2025 02:31:57 [ruzicka:INFO] Predicting on 3 documents


Chastellux


03/18/2025 02:31:58 [ruzicka:INFO] Predicting on 3 documents


Condorcet


03/18/2025 02:32:00 [ruzicka:INFO] Predicting on 3 documents


Deleyre


03/18/2025 02:32:01 [ruzicka:INFO] Predicting on 3 documents


dHolbach


03/18/2025 02:32:03 [ruzicka:INFO] Predicting on 3 documents


Diderot


03/18/2025 02:32:05 [ruzicka:INFO] Predicting on 3 documents


Guibert


03/18/2025 02:32:06 [ruzicka:INFO] Predicting on 3 documents


HDI


03/18/2025 02:32:06 [ruzicka:INFO] Predicting on 3 documents


Jaucourt


03/18/2025 02:32:07 [ruzicka:INFO] Predicting on 3 documents


Jussieu


03/18/2025 02:32:07 [ruzicka:INFO] Predicting on 3 documents


La Grange


03/18/2025 02:32:08 [ruzicka:INFO] Predicting on 3 documents


Marmontel


03/18/2025 02:32:09 [ruzicka:INFO] Predicting on 3 documents


Meister


03/18/2025 02:32:09 [ruzicka:INFO] Predicting on 3 documents


Morellet


03/18/2025 02:32:10 [ruzicka:INFO] Predicting on 3 documents


Naigeon


03/18/2025 02:32:10 [ruzicka:INFO] Predicting on 3 documents


Pechmeja


03/18/2025 02:32:11 [ruzicka:INFO] Predicting on 3 documents


Raynal


03/18/2025 02:32:12 [ruzicka:INFO] Predicting on 3 documents


Rivière


03/18/2025 02:32:12 [ruzicka:INFO] Predicting on 3 documents


Saint-Lambert


# III. Work vs all authors
Here random 2 works from each author are used to see how good BDI will recognise true author (same settings as in the actual analysis)

In [98]:
# load preprocessed data

corpus = pd.read_csv('03_tests/authors_vs_all/all_2works_2k_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,a,et,la,que,les,...,peuvent,voit,assez,tant,lorsqu,autant,effet,fois,seul,donner
0,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,4.90,0.50,2.50,2.35,1.15,2.75,...,0.20,0.00,0.00,0.10,0.00,0.15,0.00,0.00,0.00,0.00
1,Voyage de Newport,Chastellux,0,0__Chastellux_Voyage de Newport,2.40,0.50,1.65,1.00,1.25,1.10,...,0.05,0.05,0.00,0.00,0.20,0.00,0.00,0.00,0.00,0.05
2,Voyages T1,Chastellux,0,0__Chastellux_Voyages T1,2.70,1.05,1.10,0.55,0.95,0.95,...,0.00,0.00,0.00,0.00,0.00,0.05,0.05,0.05,0.00,0.10
3,eloge de M Roux,Deleyre,0,0__Deleyre_eloge de M Roux,5.30,0.80,2.65,3.00,1.35,2.20,...,0.00,0.00,0.15,0.00,0.00,0.05,0.00,0.00,0.10,0.10
4,Essai sur la vie,Deleyre,0,0__Deleyre_Essai sur la vie,2.20,0.65,1.15,1.30,0.55,1.15,...,0.00,0.00,0.00,0.05,0.00,0.00,0.00,0.10,0.00,0.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,Belisaire,Marmontel,9,9__Marmontel_Belisaire,2.05,0.35,2.05,1.45,0.85,0.75,...,0.00,0.05,0.00,0.00,0.00,0.05,0.00,0.10,0.05,0.05
454,eloge,Pechmeja,9,9__Pechmeja_eloge,1.70,0.15,0.85,1.00,0.70,1.45,...,0.10,0.05,0.00,0.00,0.05,0.15,0.10,0.05,0.00,0.00
455,ecole 1,Raynal,9,9__Raynal_ecole 1,2.50,0.60,1.30,1.20,0.40,1.15,...,0.00,0.05,0.00,0.00,0.05,0.00,0.05,0.00,0.00,0.00
456,De l-instruction publique 1775,Rivière,9,9__Rivière_De l-instruction publique 1775,5.00,0.40,1.90,3.30,1.05,2.75,...,0.05,0.00,0.00,0.00,0.10,0.10,0.20,0.00,0.05,0.10


In [99]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
1,Voyage de Newport,Chastellux,0,0__Chastellux_Voyage de Newport
2,Voyages T1,Chastellux,0,0__Chastellux_Voyages T1
3,eloge de M Roux,Deleyre,0,0__Deleyre_eloge de M Roux
4,Essai sur la vie,Deleyre,0,0__Deleyre_Essai sur la vie
...,...,...,...,...
453,Belisaire,Marmontel,9,9__Marmontel_Belisaire
454,eloge,Pechmeja,9,9__Pechmeja_eloge
455,ecole 1,Raynal,9,9__Raynal_ecole 1
456,De l-instruction publique 1775,Rivière,9,9__Rivière_De l-instruction publique 1775


In [100]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
1,Voyage de Newport,1,Chastellux,0,0__Chastellux_Voyage de Newport
2,Voyages T1,1,Chastellux,0,0__Chastellux_Voyages T1
3,eloge de M Roux,2,Deleyre,0,0__Deleyre_eloge de M Roux
4,Essai sur la vie,2,Deleyre,0,0__Deleyre_Essai sur la vie
...,...,...,...,...,...
453,Belisaire,17,Marmontel,9,9__Marmontel_Belisaire
454,eloge,11,Pechmeja,9,9__Pechmeja_eloge
455,ecole 1,12,Raynal,9,9__Raynal_ecole 1
456,De l-instruction publique 1775,13,Rivière,9,9__Rivière_De l-instruction publique 1775


In [101]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Deleyre', 'dHolbach', 'Diderot',
       'Guibert', 'Jaucourt', 'Jussieu', 'Meister', 'Morellet', 'Naigeon',
       'Pechmeja', 'Raynal', 'Rivière', 'Saint-Lambert', 'Condorcet',
       'La Grange', 'Marmontel'], dtype=object)

In [109]:
print('N unique works:', len(list(set(raw_df.work))))

unique_works = list(set(raw_df.work))

for w in unique_works[0:3]:
    print(w)

N unique works: 36
2
De la Poesie Dramatique
Les Incas


In [114]:
# testing each individual work
for w in unique_works[0:3]:
    
    problems = raw_df[raw_df.work == w].reset_index(drop=True).copy()
    problems_X = X[raw_df.work == w].reset_index(drop=True).copy()
    #print(problems)
    
    # extract true author
    a_true = problems.author[0]
    # print(a_true)
    
    # extract the rest of the corpus
    rest = raw_df[raw_df.work != w].reset_index(drop=True).copy()
    rest_X = X[raw_df.work != w].reset_index(drop = True).copy()
    #print(rest)
    
    
    print('\n\n########################## Working with:', w, '---- by', a_true)
    
    ### Scaling

    sts = StandardScaler(with_mean=False).fit(rest_X)
    rest_scaled_X = sts.transform(rest_X)
    problems_scaled_X = sts.transform(problems_X)


    # set verifier
    rng = np.random.default_rng(42)

    bdi_mm = BDIVerifier(
        metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
    )

    # fit 
    bdi_mm.fit(rest_scaled_X, rest.author_label)
    
    for label in label_uniques.values:
        print(f"Testing against {label}")
        code = label_uniques.get_loc(label)
        print(
            f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
        )
    
    authors = label_uniques.values

    fh = ''
    
    # compare with each author and store results
    for a in authors:

        print(a)

        bdi_mm.predict_proba(
            problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
        )


        x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))

        fh = '03_tests/authors_vs_all/runs/' + a_true + '-' + w + '_vs_' + a + '.csv'

        x.to_csv(fh)

03/18/2025 04:02:02 [ruzicka:INFO] Fitting on 443 documents...
03/18/2025 04:02:02 [ruzicka:INFO] Predicting on 15 documents




########################## Working with: 2 ---- by Condorcet
Testing against Baudeau


03/18/2025 04:02:03 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.197 0.036 0.028 0.589 0.356 0.383 0.018 0.701 0.479 0.313 0.092 0.403
 0.044 0.441 0.542]
Testing against Chastellux


03/18/2025 04:02:04 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.58  0.158 0.357 0.654 0.415 0.528 0.038 0.549 0.385 0.659 0.122 0.54
 0.206 0.158 0.271]
Testing against Deleyre


03/18/2025 04:02:06 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.485 0.784 0.782 0.463 0.206 0.227 0.869 0.375 0.341 0.428 0.904 0.187
 0.591 0.235 0.283]
Testing against dHolbach


03/18/2025 04:02:07 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.179 0.319 0.129 0.707 0.763 0.821 0.076 0.399 0.647 0.643 0.497 0.75
 0.325 0.734 0.411]
Testing against Diderot


03/18/2025 04:02:08 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.274 0.517 0.405 0.356 0.369 0.569 0.472 0.488 0.473 0.549 0.393 0.547
 0.606 0.671 0.386]
Testing against Guibert


03/18/2025 04:02:10 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.079 0.04  0.141 0.27  0.153 0.575 0.182 0.574 0.558 0.094 0.035 0.453
 0.294 0.497 0.765]
Testing against Jaucourt


03/18/2025 04:02:11 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.366 0.487 0.888 0.2   0.059 0.08  0.757 0.181 0.185 0.226 0.599 0.069
 0.775 0.202 0.321]
Testing against Jussieu


03/18/2025 04:02:12 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.048 0.336 0.541 0.091 0.046 0.069 0.51  0.088 0.191 0.058 0.34  0.022
 0.532 0.079 0.222]
Testing against Meister


03/18/2025 04:02:13 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.003 0.47  0.317 0.06  0.058 0.009 0.211 0.015 0.09  0.04  0.397 0.009
 0.442 0.028 0.003]
Testing against Morellet


03/18/2025 04:02:14 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.007 0.059 0.1   0.023 0.042 0.019 0.084 0.05  0.04  0.051 0.213 0.019
 0.243 0.04  0.081]
Testing against Naigeon


03/18/2025 04:02:15 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.    0.514 0.251 0.071 0.02  0.115 0.507 0.018 0.022 0.085 0.632 0.034
 0.448 0.258 0.114]
Testing against Pechmeja


03/18/2025 04:02:17 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.396 0.627 0.441 0.142 0.237 0.413 0.419 0.215 0.388 0.128 0.305 0.281
 0.322 0.408 0.474]
Testing against Raynal


03/18/2025 04:02:18 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.684 0.293 0.131 0.524 0.631 0.632 0.07  0.524 0.481 0.656 0.045 0.374
 0.031 0.147 0.416]
Testing against Rivière


03/18/2025 04:02:19 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.017 0.497 0.053 0.067 0.192 0.163 0.09  0.169 0.252 0.092 0.254 0.052
 0.235 0.358 0.327]
Testing against Saint-Lambert


03/18/2025 04:02:20 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.9   0.835 0.895 0.796 0.851 0.64  0.899 0.515 0.567 0.748 0.927 0.766
 0.787 0.339 0.595]
Testing against Condorcet


03/18/2025 04:02:21 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.218 0.226 0.122 0.483 0.474 0.485 0.195 0.786 0.629 0.412 0.286 0.401
 0.537 0.746 0.676]
Testing against La Grange


03/18/2025 04:02:23 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.025 0.003 0.    0.24  0.097 0.167 0.001 0.559 0.279 0.075 0.001 0.414
 0.    0.53  0.456]
Testing against Marmontel


03/18/2025 04:02:24 [ruzicka:INFO] Predicting on 15 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.843 0.344 0.079 0.399 0.708 0.502 0.134 0.328 0.475 0.728 0.101 0.771
 0.176 0.617 0.374]
Baudeau


03/18/2025 04:02:25 [ruzicka:INFO] Predicting on 15 documents


Chastellux


03/18/2025 04:02:27 [ruzicka:INFO] Predicting on 15 documents


Deleyre


03/18/2025 04:02:28 [ruzicka:INFO] Predicting on 15 documents


dHolbach


03/18/2025 04:02:29 [ruzicka:INFO] Predicting on 15 documents


Diderot


03/18/2025 04:02:31 [ruzicka:INFO] Predicting on 15 documents


Guibert


03/18/2025 04:02:32 [ruzicka:INFO] Predicting on 15 documents


Jaucourt


03/18/2025 04:02:33 [ruzicka:INFO] Predicting on 15 documents


Jussieu


03/18/2025 04:02:35 [ruzicka:INFO] Predicting on 15 documents


Meister


03/18/2025 04:02:36 [ruzicka:INFO] Predicting on 15 documents


Morellet


03/18/2025 04:02:37 [ruzicka:INFO] Predicting on 15 documents


Naigeon


03/18/2025 04:02:38 [ruzicka:INFO] Predicting on 15 documents


Pechmeja


03/18/2025 04:02:39 [ruzicka:INFO] Predicting on 15 documents


Raynal


03/18/2025 04:02:41 [ruzicka:INFO] Predicting on 15 documents


Rivière


03/18/2025 04:02:42 [ruzicka:INFO] Predicting on 15 documents


Saint-Lambert


03/18/2025 04:02:43 [ruzicka:INFO] Predicting on 15 documents


Condorcet


03/18/2025 04:02:44 [ruzicka:INFO] Predicting on 15 documents


La Grange


03/18/2025 04:02:46 [ruzicka:INFO] Predicting on 15 documents


Marmontel


03/18/2025 04:02:47 [ruzicka:INFO] Fitting on 445 documents...
03/18/2025 04:02:47 [ruzicka:INFO] Predicting on 13 documents




########################## Working with: De la Poesie Dramatique ---- by Diderot
Testing against Baudeau


03/18/2025 04:02:48 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.545 0.047 0.019 0.208 0.066 0.269 0.028 0.049 0.127 0.004 0.011 0.328
 0.024]
Testing against Chastellux


03/18/2025 04:02:49 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.477 0.26  0.39  0.333 0.087 0.726 0.337 0.58  0.415 0.485 0.269 0.297
 0.398]
Testing against Deleyre


03/18/2025 04:02:51 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.416 0.352 0.369 0.47  0.175 0.391 0.4   0.467 0.171 0.663 0.136 0.057
 0.555]
Testing against dHolbach


03/18/2025 04:02:52 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.457 0.115 0.281 0.828 0.169 0.448 0.088 0.256 0.719 0.065 0.105 0.475
 0.161]
Testing against Diderot


03/18/2025 04:02:53 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.371 0.866 0.713 0.553 0.889 0.817 0.875 0.628 0.939 0.812 0.772 0.837
 0.73 ]
Testing against Guibert


03/18/2025 04:02:54 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.622 0.302 0.306 0.389 0.116 0.629 0.286 0.287 0.42  0.523 0.074 0.402
 0.178]
Testing against Jaucourt


03/18/2025 04:02:55 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.041 0.524 0.354 0.208 0.134 0.009 0.359 0.222 0.071 0.446 0.239 0.152
 0.352]
Testing against Jussieu


03/18/2025 04:02:56 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.076 0.343 0.179 0.08  0.163 0.007 0.339 0.102 0.005 0.225 0.103 0.029
 0.22 ]
Testing against Meister


03/18/2025 04:02:57 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.001 0.802 0.871 0.003 0.711 0.07  0.65  0.017 0.071 0.605 0.738 0.029
 0.653]
Testing against Morellet


03/18/2025 04:02:58 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.098 0.309 0.303 0.005 0.467 0.128 0.652 0.103 0.449 0.654 0.743 0.494
 0.499]
Testing against Naigeon


03/18/2025 04:02:58 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.002 0.847 0.891 0.063 0.985 0.011 0.812 0.049 0.21  0.715 0.885 0.312
 0.894]
Testing against Pechmeja


03/18/2025 04:03:00 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.417 0.249 0.033 0.451 0.066 0.204 0.192 0.19  0.243 0.077 0.106 0.284
 0.137]
Testing against Raynal


03/18/2025 04:03:01 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.686 0.123 0.138 0.578 0.101 0.522 0.22  0.625 0.238 0.194 0.283 0.491
 0.067]
Testing against Rivière


03/18/2025 04:03:02 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.164 0.185 0.2   0.071 0.318 0.181 0.228 0.058 0.286 0.312 0.204 0.676
 0.409]
Testing against Saint-Lambert


03/18/2025 04:03:03 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.261 0.041 0.12  0.138 0.08  0.062 0.086 0.24  0.109 0.072 0.066 0.019
 0.119]
Testing against Condorcet


03/18/2025 04:03:04 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.768 0.631 0.431 0.626 0.457 0.399 0.527 0.552 0.536 0.553 0.293 0.608
 0.538]
Testing against La Grange


03/18/2025 04:03:06 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.383 0.007 0.014 0.158 0.002 0.218 0.007 0.046 0.519 0.04  0.011 0.246
 0.004]
Testing against Marmontel


03/18/2025 04:03:08 [ruzicka:INFO] Predicting on 13 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.588 0.292 0.341 0.758 0.509 0.855 0.407 0.935 0.766 0.397 0.944 0.696
 0.6  ]
Baudeau


03/18/2025 04:03:09 [ruzicka:INFO] Predicting on 13 documents


Chastellux


03/18/2025 04:03:10 [ruzicka:INFO] Predicting on 13 documents


Deleyre


03/18/2025 04:03:11 [ruzicka:INFO] Predicting on 13 documents


dHolbach


03/18/2025 04:03:13 [ruzicka:INFO] Predicting on 13 documents


Diderot


03/18/2025 04:03:14 [ruzicka:INFO] Predicting on 13 documents


Guibert


03/18/2025 04:03:15 [ruzicka:INFO] Predicting on 13 documents


Jaucourt


03/18/2025 04:03:16 [ruzicka:INFO] Predicting on 13 documents


Jussieu


03/18/2025 04:03:17 [ruzicka:INFO] Predicting on 13 documents


Meister


03/18/2025 04:03:18 [ruzicka:INFO] Predicting on 13 documents


Morellet


03/18/2025 04:03:19 [ruzicka:INFO] Predicting on 13 documents


Naigeon


03/18/2025 04:03:20 [ruzicka:INFO] Predicting on 13 documents


Pechmeja


03/18/2025 04:03:21 [ruzicka:INFO] Predicting on 13 documents


Raynal


03/18/2025 04:03:22 [ruzicka:INFO] Predicting on 13 documents


Rivière


03/18/2025 04:03:23 [ruzicka:INFO] Predicting on 13 documents


Saint-Lambert


03/18/2025 04:03:24 [ruzicka:INFO] Predicting on 13 documents


Condorcet


03/18/2025 04:03:25 [ruzicka:INFO] Predicting on 13 documents


La Grange


03/18/2025 04:03:26 [ruzicka:INFO] Predicting on 13 documents


Marmontel


03/18/2025 04:03:28 [ruzicka:INFO] Fitting on 440 documents...
03/18/2025 04:03:28 [ruzicka:INFO] Predicting on 18 documents




########################## Working with: Les Incas ---- by Marmontel
Testing against Baudeau


03/18/2025 04:03:29 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.273 0.039 0.042 0.233 0.171 0.205 0.308 0.024 0.118 0.028 0.022 0.005
 0.112 0.406 0.14  0.087 0.374 0.071]
Testing against Chastellux


03/18/2025 04:03:31 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.429 0.428 0.247 0.648 0.733 0.493 0.349 0.225 0.528 0.574 0.604 0.391
 0.598 0.48  0.184 0.064 0.584 0.293]
Testing against Deleyre


03/18/2025 04:03:33 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.36  0.404 0.181 0.177 0.26  0.603 0.649 0.25  0.622 0.295 0.253 0.347
 0.379 0.349 0.461 0.492 0.368 0.393]
Testing against dHolbach


03/18/2025 04:03:34 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.469 0.257 0.283 0.305 0.309 0.215 0.206 0.084 0.18  0.091 0.235 0.104
 0.152 0.636 0.7   0.089 0.501 0.77 ]
Testing against Diderot


03/18/2025 04:03:36 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.636 0.653 0.623 0.563 0.701 0.58  0.608 0.832 0.379 0.494 0.628 0.443
 0.803 0.421 0.672 0.781 0.622 0.448]
Testing against Guibert


03/18/2025 04:03:37 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.312 0.045 0.104 0.302 0.421 0.168 0.589 0.019 0.325 0.088 0.098 0.036
 0.338 0.313 0.309 0.02  0.338 0.04 ]
Testing against Jaucourt


03/18/2025 04:03:39 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.113 0.    0.008 0.048 0.17  0.085 0.055 0.696 0.305 0.222 0.002 0.001
 0.098 0.203 0.06  0.661 0.011 0.01 ]
Testing against Jussieu


03/18/2025 04:03:40 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.018 0.    0.    0.    0.021 0.016 0.002 0.081 0.01  0.001 0.    0.
 0.009 0.009 0.    0.217 0.    0.   ]
Testing against Meister


03/18/2025 04:03:41 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.002 0.132 0.028 0.002 0.002 0.008 0.012 0.233 0.002 0.039 0.005 0.034
 0.002 0.017 0.017 0.12  0.038 0.026]
Testing against Morellet


03/18/2025 04:03:43 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.059 0.228 0.213 0.268 0.01  0.029 0.136 0.09  0.007 0.096 0.053 0.103
 0.049 0.011 0.015 0.082 0.043 0.035]
Testing against Naigeon


03/18/2025 04:03:44 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.003 0.02  0.009 0.013 0.002 0.021 0.005 0.185 0.    0.001 0.012 0.
 0.002 0.012 0.029 0.395 0.02  0.023]
Testing against Pechmeja


03/18/2025 04:03:46 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.809 0.732 0.895 0.577 0.728 0.657 0.527 0.764 0.631 0.77  0.8   0.982
 0.552 0.46  0.654 0.72  0.66  0.87 ]
Testing against Raynal


03/18/2025 04:03:47 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.522 0.657 0.656 0.864 0.628 0.666 0.448 0.567 0.88  0.731 0.844 0.588
 0.478 0.448 0.738 0.407 0.66  0.786]
Testing against Rivière


03/18/2025 04:03:49 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.026 0.059 0.014 0.088 0.014 0.032 0.105 0.063 0.004 0.002 0.01  0.002
 0.072 0.055 0.023 0.134 0.037 0.115]
Testing against Saint-Lambert


03/18/2025 04:03:50 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.196 0.165 0.169 0.262 0.26  0.226 0.218 0.699 0.389 0.282 0.079 0.117
 0.384 0.439 0.033 0.83  0.417 0.249]
Testing against Condorcet


03/18/2025 04:03:52 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.486 0.276 0.288 0.258 0.416 0.609 0.551 0.445 0.527 0.644 0.233 0.239
 0.804 0.843 0.397 0.737 0.413 0.426]
Testing against La Grange


03/18/2025 04:03:54 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.263 0.112 0.1   0.386 0.339 0.63  0.346 0.004 0.327 0.042 0.091 0.126
 0.395 0.416 0.398 0.019 0.344 0.038]
Testing against Marmontel


03/18/2025 04:03:55 [ruzicka:INFO] Predicting on 18 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.891 0.995 0.984 0.86  0.806 0.805 0.928 0.793 0.609 0.955 0.969 0.915
 0.751 0.636 0.892 0.351 0.751 0.798]
Baudeau


03/18/2025 04:03:56 [ruzicka:INFO] Predicting on 18 documents


Chastellux


03/18/2025 04:03:58 [ruzicka:INFO] Predicting on 18 documents


Deleyre


03/18/2025 04:04:00 [ruzicka:INFO] Predicting on 18 documents


dHolbach


03/18/2025 04:04:01 [ruzicka:INFO] Predicting on 18 documents


Diderot


03/18/2025 04:04:03 [ruzicka:INFO] Predicting on 18 documents


Guibert


03/18/2025 04:04:04 [ruzicka:INFO] Predicting on 18 documents


Jaucourt


03/18/2025 04:04:06 [ruzicka:INFO] Predicting on 18 documents


Jussieu


03/18/2025 04:04:07 [ruzicka:INFO] Predicting on 18 documents


Meister


03/18/2025 04:04:08 [ruzicka:INFO] Predicting on 18 documents


Morellet


03/18/2025 04:04:10 [ruzicka:INFO] Predicting on 18 documents


Naigeon


03/18/2025 04:04:11 [ruzicka:INFO] Predicting on 18 documents


Pechmeja


03/18/2025 04:04:13 [ruzicka:INFO] Predicting on 18 documents


Raynal


03/18/2025 04:04:14 [ruzicka:INFO] Predicting on 18 documents


Rivière


03/18/2025 04:04:16 [ruzicka:INFO] Predicting on 18 documents


Saint-Lambert


03/18/2025 04:04:17 [ruzicka:INFO] Predicting on 18 documents


Condorcet


03/18/2025 04:04:19 [ruzicka:INFO] Predicting on 18 documents


La Grange


03/18/2025 04:04:21 [ruzicka:INFO] Predicting on 18 documents


Marmontel
