## Corpus testing

This notebook takes works with known labels and looks how good it is attributed to its author. 

In [1]:
import pandas as pd
import numpy as np

import re

from sklearn.preprocessing import StandardScaler

import logging

logging.basicConfig(level="INFO")
logger = logging.getLogger("ruzicka")

from ruzicka.BDIVerifier import BDIVerifier

In [13]:
# load preprocessed data

corpus = pd.read_csv('03_tests/test_1000_200mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,la,et,les,vous,le,...,cela,beaucoup,voir,alors,ici,enfin,voit,rendre,cependant,seulement
0,Avis,Baudeau,0,0__Baudeau_Avis,3.7,1.7,2.9,2.7,0.2,2.0,...,0.2,0.1,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0
1,Explication,Baudeau,0,0__Baudeau_Explication,4.8,3.9,3.1,3.9,0.9,1.0,...,0.0,0.0,0.1,0.0,0.1,0.1,0.0,0.0,0.0,0.0
2,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,5.6,2.5,1.7,1.4,0.0,2.3,...,0.0,0.2,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.1
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1,4.1,2.5,1.8,2.7,0.2,2.6,...,0.0,0.0,0.1,0.0,0.2,0.0,0.0,0.0,0.0,0.0
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2,4.0,2.5,2.3,3.0,0.1,2.5,...,0.0,0.0,0.1,0.0,0.2,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5403,elements T1,Marmontel,99,99__Marmontel_elements T1,6.4,3.2,3.1,2.1,0.1,1.8,...,0.2,0.0,0.0,0.0,0.1,0.0,0.1,0.1,0.0,0.1
5404,elements T2,Marmontel,99,99__Marmontel_elements T2,4.9,3.4,3.8,2.3,0.0,1.7,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.3
5405,elements T3,Marmontel,99,99__Marmontel_elements T3,5.3,2.8,4.2,0.4,0.0,2.7,...,0.0,0.0,0.0,0.2,0.0,0.0,0.1,0.0,0.0,0.1
5406,Manuel d-epictete,Naigeon,99,99__Naigeon_Manuel d-epictete,4.2,3.5,3.0,1.0,0.0,1.8,...,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1


In [14]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Explication,Baudeau,0,0__Baudeau_Explication
2,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
3,Idees sur les besoins 1,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...
5403,elements T1,Marmontel,99,99__Marmontel_elements T1
5404,elements T2,Marmontel,99,99__Marmontel_elements T2
5405,elements T3,Marmontel,99,99__Marmontel_elements T3
5406,Manuel d-epictete,Naigeon,99,99__Naigeon_Manuel d-epictete


In [15]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(5408, 200)

In [16]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Explication,0,Baudeau,0,0__Baudeau_Explication
2,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
3,Idees sur les besoins 1,0,Baudeau,0,0__Baudeau_Idees sur les besoins 1
4,Idees sur les besoins 2,0,Baudeau,0,0__Baudeau_Idees sur les besoins 2
...,...,...,...,...,...
5403,elements T1,10,Marmontel,99,99__Marmontel_elements T1
5404,elements T2,10,Marmontel,99,99__Marmontel_elements T2
5405,elements T3,10,Marmontel,99,99__Marmontel_elements T3
5406,Manuel d-epictete,13,Naigeon,99,99__Naigeon_Manuel d-epictete


In [17]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [18]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [80]:
# Loop that takes a work from each author
# and looks whether its (known) author is a good much according to BDI;
# nb: not getting results for a work vs all authors
# (writing results in a .csv, visualisation done later with R)

# select an author, look for his unique works
for a in label_uniques.values:
    #print(author)
    
    # take a subset with only works by an author, find unique works
    unique_works = raw_df[raw_df.author == a]['work'].unique()
    # unique_works is a numpy array of works of each author
    
    for w in unique_works:
        print(f"\n\n#######################\n{a}: {w}\n")
        
        # select each work as a problem set (problems-metadata, problems_X-word freqs)
        problems = raw_df[raw_df.work == w].reset_index(drop=True).copy()
        problems_X = X[raw_df.work == w].reset_index(drop=True).copy()
        print(problems.iloc[1:2,:])
        
        # extract the rest of the corpus
        rest = raw_df[raw_df.work != w].reset_index(drop=True).copy()
        rest_X = X[raw_df.work != w].reset_index(drop=True).copy()
        #print(f"\nRest of the corpus:\n{rest.iloc[1:6,:]}")
        
        # scaling
        ss = StandardScaler(with_mean=False).fit(rest_X)
        rest_scaled_X = ss.transform(rest_X)
        problems_scaled_X = ss.transform(problems_X)
        
        #print(problems_scaled_X)
        
        # fit 
        bdi_mm.fit(rest_scaled_X, rest.author_label)
        
        # prediction
        bdi_mm.predict_proba(
            problems_scaled_X, 
            # get_loc - select the author of the work (? is that right??)
            [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
        )
        
        # output results
        res = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
        
        # writing
        fh = '03_tests/tests/ch1000/mfw200/'+a+'_'+w+'.csv'
        res.to_csv(fh)
        print('Results written in:',fh)
        
        rest_scaled_X = None
        res = None

12/19/2024 05:21:18 [ruzicka:INFO] Fitting on 5358 documents...
12/19/2024 05:21:18 [ruzicka:INFO] Predicting on 50 documents




#######################
Baudeau: Avis 

    work  author_label   author  chunk_num               tag
1  Avis              0  Baudeau          1  1__Baudeau_Avis 


12/19/2024 05:21:31 [ruzicka:INFO] Fitting on 5386 documents...
12/19/2024 05:21:31 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Avis .csv


#######################
Baudeau: Explication

          work  author_label   author  chunk_num                     tag
1  Explication             0  Baudeau          1  1__Baudeau_Explication


12/19/2024 05:21:38 [ruzicka:INFO] Fitting on 5390 documents...
12/19/2024 05:21:38 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Explication.csv


#######################
Baudeau: Idees sur l-administration

                         work  author_label   author  chunk_num  \
1  Idees sur l-administration             0  Baudeau          1   

                                     tag  
1  1__Baudeau_Idees sur l-administration  


12/19/2024 05:21:44 [ruzicka:INFO] Fitting on 5336 documents...
12/19/2024 05:21:44 [ruzicka:INFO] Predicting on 72 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Idees sur l-administration.csv


#######################
Baudeau: Idees sur les besoins 1

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 1             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 1  


12/19/2024 05:22:02 [ruzicka:INFO] Fitting on 5336 documents...
12/19/2024 05:22:02 [ruzicka:INFO] Predicting on 72 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Idees sur les besoins 1.csv


#######################
Baudeau: Idees sur les besoins 2

                      work  author_label   author  chunk_num  \
1  Idees sur les besoins 2             0  Baudeau          1   

                                  tag  
1  1__Baudeau_Idees sur les besoins 2  


12/19/2024 05:22:20 [ruzicka:INFO] Fitting on 5406 documents...
12/19/2024 05:22:20 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Idees sur les besoins 2.csv


#######################
Baudeau: Lettre du fermier

                work  author_label   author  chunk_num  \
1  Lettre du fermier             0  Baudeau          1   

                            tag  
1  1__Baudeau_Lettre du fermier  


12/19/2024 05:22:21 [ruzicka:INFO] Fitting on 5380 documents...
12/19/2024 05:22:21 [ruzicka:INFO] Predicting on 28 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Lettre du fermier.csv


#######################
Baudeau: Lettres  a un magistraTtxt

                         work  author_label   author  chunk_num  \
1  Lettres  a un magistraTtxt             0  Baudeau          1   

                                     tag  
1  1__Baudeau_Lettres  a un magistraTtxt  


12/19/2024 05:22:29 [ruzicka:INFO] Fitting on 5402 documents...
12/19/2024 05:22:29 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Lettres  a un magistraTtxt.csv


#######################
Baudeau: Resultats

        work  author_label   author  chunk_num                   tag
1  Resultats             0  Baudeau          1  1__Baudeau_Resultats


12/19/2024 05:22:31 [ruzicka:INFO] Fitting on 5271 documents...
12/19/2024 05:22:31 [ruzicka:INFO] Predicting on 137 documents


Results written in: 03_tests/tests/ch1000/mfw200/Baudeau_Resultats.csv


#######################
Chastellux: De la felicite

             work  author_label      author  chunk_num  \
1  De la felicite             1  Chastellux          1   

                            tag  
1  1__Chastellux_De la felicite  


12/19/2024 05:23:10 [ruzicka:INFO] Fitting on 5372 documents...
12/19/2024 05:23:10 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/tests/ch1000/mfw200/Chastellux_De la felicite.csv


#######################
Chastellux: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


12/19/2024 05:23:25 [ruzicka:INFO] Fitting on 5331 documents...
12/19/2024 05:23:25 [ruzicka:INFO] Predicting on 77 documents


Results written in: 03_tests/tests/ch1000/mfw200/Chastellux_eloge.csv


#######################
Chastellux: Voyage de Newport

                work  author_label      author  chunk_num  \
1  Voyage de Newport             1  Chastellux          1   

                               tag  
1  1__Chastellux_Voyage de Newport  


12/19/2024 05:23:50 [ruzicka:INFO] Fitting on 5326 documents...
12/19/2024 05:23:50 [ruzicka:INFO] Predicting on 82 documents


Results written in: 03_tests/tests/ch1000/mfw200/Chastellux_Voyage de Newport.csv


#######################
Chastellux: Voyages T1

         work  author_label      author  chunk_num                       tag
1  Voyages T1             1  Chastellux          1  1__Chastellux_Voyages T1


12/19/2024 05:24:18 [ruzicka:INFO] Fitting on 5333 documents...
12/19/2024 05:24:18 [ruzicka:INFO] Predicting on 75 documents


Results written in: 03_tests/tests/ch1000/mfw200/Chastellux_Voyages T1.csv


#######################
Chastellux: Voyages T2

         work  author_label      author  chunk_num                       tag
1  Voyages T2             1  Chastellux          1  1__Chastellux_Voyages T2


12/19/2024 05:24:43 [ruzicka:INFO] Fitting on 5336 documents...
12/19/2024 05:24:43 [ruzicka:INFO] Predicting on 72 documents


Results written in: 03_tests/tests/ch1000/mfw200/Chastellux_Voyages T2.csv


#######################
Condorcet: 11

  work  author_label     author  chunk_num              tag
1   11             2  Condorcet          1  1__Condorcet_11


12/19/2024 05:25:15 [ruzicka:INFO] Fitting on 5340 documents...
12/19/2024 05:25:15 [ruzicka:INFO] Predicting on 68 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_11.csv


#######################
Condorcet: 12

  work  author_label     author  chunk_num              tag
1   12             2  Condorcet          1  1__Condorcet_12


12/19/2024 05:25:46 [ruzicka:INFO] Fitting on 5352 documents...
12/19/2024 05:25:46 [ruzicka:INFO] Predicting on 56 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_12.csv


#######################
Condorcet: 13

  work  author_label     author  chunk_num              tag
1   13             2  Condorcet          1  1__Condorcet_13


12/19/2024 05:26:12 [ruzicka:INFO] Fitting on 5350 documents...
12/19/2024 05:26:12 [ruzicka:INFO] Predicting on 58 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_13.csv


#######################
Condorcet: 15

  work  author_label     author  chunk_num              tag
1   15             2  Condorcet          1  1__Condorcet_15


12/19/2024 05:26:39 [ruzicka:INFO] Fitting on 5338 documents...
12/19/2024 05:26:39 [ruzicka:INFO] Predicting on 70 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_15.csv


#######################
Condorcet: 17

  work  author_label     author  chunk_num              tag
1   17             2  Condorcet          1  1__Condorcet_17


12/19/2024 05:27:11 [ruzicka:INFO] Fitting on 5339 documents...
12/19/2024 05:27:11 [ruzicka:INFO] Predicting on 69 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_17.csv


#######################
Condorcet: 19

  work  author_label     author  chunk_num              tag
1   19             2  Condorcet          1  1__Condorcet_19


12/19/2024 05:27:42 [ruzicka:INFO] Fitting on 5338 documents...
12/19/2024 05:27:42 [ruzicka:INFO] Predicting on 70 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_19.csv


#######################
Condorcet: 20

  work  author_label     author  chunk_num              tag
1   20             2  Condorcet          1  1__Condorcet_20


12/19/2024 05:28:13 [ruzicka:INFO] Fitting on 5362 documents...
12/19/2024 05:28:13 [ruzicka:INFO] Predicting on 46 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_20.csv


#######################
Condorcet: 21

  work  author_label     author  chunk_num              tag
1   21             2  Condorcet          1  1__Condorcet_21


12/19/2024 05:28:35 [ruzicka:INFO] Fitting on 5361 documents...
12/19/2024 05:28:35 [ruzicka:INFO] Predicting on 47 documents


Results written in: 03_tests/tests/ch1000/mfw200/Condorcet_21.csv


#######################
Deleyre: Analyse de la philosophie

                        work  author_label   author  chunk_num  \
1  Analyse de la philosophie             3  Deleyre          1   

                                    tag  
1  1__Deleyre_Analyse de la philosophie  


12/19/2024 05:28:45 [ruzicka:INFO] Fitting on 5396 documents...
12/19/2024 05:28:45 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Analyse de la philosophie.csv


#######################
Deleyre: eloge de M Roux

              work  author_label   author  chunk_num  \
1  eloge de M Roux             3  Deleyre          1   

                          tag  
1  1__Deleyre_eloge de M Roux  


12/19/2024 05:28:48 [ruzicka:INFO] Fitting on 5343 documents...
12/19/2024 05:28:48 [ruzicka:INFO] Predicting on 65 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_eloge de M Roux.csv


#######################
Deleyre: Essai sur la vie

               work  author_label   author  chunk_num  \
1  Essai sur la vie             3  Deleyre          1   

                           tag  
1  1__Deleyre_Essai sur la vie  


12/19/2024 05:29:01 [ruzicka:INFO] Fitting on 5397 documents...
12/19/2024 05:29:01 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Essai sur la vie.csv


#######################
Deleyre: Fanatisme

        work  author_label   author  chunk_num                   tag
1  Fanatisme             3  Deleyre          1  1__Deleyre_Fanatisme


12/19/2024 05:29:03 [ruzicka:INFO] Fitting on 5392 documents...
12/19/2024 05:29:03 [ruzicka:INFO] Predicting on 16 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Fanatisme.csv


#######################
Deleyre: Idees sur l-education

                    work  author_label   author  chunk_num  \
1  Idees sur l-education             3  Deleyre          1   

                                tag  
1  1__Deleyre_Idees sur l-education  


12/19/2024 05:29:07 [ruzicka:INFO] Fitting on 5360 documents...
12/19/2024 05:29:07 [ruzicka:INFO] Predicting on 48 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Idees sur l-education.csv


#######################
Deleyre: L-esprit de Saint-evremont

                         work  author_label   author  chunk_num  \
1  L-esprit de Saint-evremont             3  Deleyre          1   

                                     tag  
1  1__Deleyre_L-esprit de Saint-evremont  


12/19/2024 05:29:17 [ruzicka:INFO] Fitting on 5405 documents...
12/19/2024 05:29:17 [ruzicka:INFO] Predicting on 3 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_L-esprit de Saint-evremont.csv


#######################
Deleyre: Rapport pour des Corses expatries

                                work  author_label   author  chunk_num  \
1  Rapport pour des Corses expatries             3  Deleyre          1   

                                            tag  
1  1__Deleyre_Rapport pour des Corses expatries  


12/19/2024 05:29:18 [ruzicka:INFO] Fitting on 5403 documents...
12/19/2024 05:29:18 [ruzicka:INFO] Predicting on 5 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Rapport pour des Corses expatries.csv


#######################
Deleyre: Sur la question

              work  author_label   author  chunk_num  \
1  Sur la question             3  Deleyre          1   

                          tag  
1  1__Deleyre_Sur la question  


12/19/2024 05:29:19 [ruzicka:INFO] Fitting on 5345 documents...
12/19/2024 05:29:19 [ruzicka:INFO] Predicting on 63 documents


Results written in: 03_tests/tests/ch1000/mfw200/Deleyre_Sur la question.csv


#######################
dHolbach: ethocratie

         work  author_label    author  chunk_num                     tag
1  ethocratie             4  dHolbach          1  1__dHolbach_ethocratie


12/19/2024 05:29:40 [ruzicka:INFO] Fitting on 5369 documents...
12/19/2024 05:29:40 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_ethocratie.csv


#######################
dHolbach: La contagion 2

             work  author_label    author  chunk_num  \
1  La contagion 2             4  dHolbach          1   

                          tag  
1  1__dHolbach_La contagion 2  


12/19/2024 05:29:54 [ruzicka:INFO] Fitting on 5385 documents...
12/19/2024 05:29:54 [ruzicka:INFO] Predicting on 23 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_La contagion 2.csv


#######################
dHolbach: La Morale 1

          work  author_label    author  chunk_num                      tag
1  La Morale 1             4  dHolbach          1  1__dHolbach_La Morale 1


12/19/2024 05:30:03 [ruzicka:INFO] Fitting on 5387 documents...
12/19/2024 05:30:03 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_La Morale 1.csv


#######################
dHolbach: La Morale 2

          work  author_label    author  chunk_num                      tag
1  La Morale 2             4  dHolbach          1  1__dHolbach_La Morale 2


12/19/2024 05:30:10 [ruzicka:INFO] Fitting on 5342 documents...
12/19/2024 05:30:10 [ruzicka:INFO] Predicting on 66 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_La Morale 2.csv


#######################
dHolbach: La Morale 4

          work  author_label    author  chunk_num                      tag
1  La Morale 4             4  dHolbach          1  1__dHolbach_La Morale 4


12/19/2024 05:30:32 [ruzicka:INFO] Fitting on 5343 documents...
12/19/2024 05:30:32 [ruzicka:INFO] Predicting on 65 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_La Morale 4.csv


#######################
dHolbach: Le bon sens

          work  author_label    author  chunk_num                      tag
1  Le bon sens             4  dHolbach          1  1__dHolbach_Le bon sens


12/19/2024 05:30:54 [ruzicka:INFO] Fitting on 5366 documents...
12/19/2024 05:30:54 [ruzicka:INFO] Predicting on 42 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_Le bon sens.csv


#######################
dHolbach: Le Christianisme

               work  author_label    author  chunk_num  \
1  Le Christianisme             4  dHolbach          1   

                            tag  
1  1__dHolbach_Le Christianisme  


12/19/2024 05:31:09 [ruzicka:INFO] Fitting on 5360 documents...
12/19/2024 05:31:09 [ruzicka:INFO] Predicting on 48 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_Le Christianisme.csv


#######################
dHolbach: Système Social 3

                work  author_label    author  chunk_num  \
1  Système Social 3             4  dHolbach          1   

                             tag  
1  1__dHolbach_Système Social 3  


12/19/2024 05:31:26 [ruzicka:INFO] Fitting on 5404 documents...
12/19/2024 05:31:26 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/tests/ch1000/mfw200/dHolbach_Système Social 3.csv


#######################
Diderot: De la suffisance

               work  author_label   author  chunk_num  \
1  De la suffisance             5  Diderot          1   

                           tag  
1  1__Diderot_De la suffisance  


12/19/2024 05:31:26 [ruzicka:INFO] Fitting on 5402 documents...
12/19/2024 05:31:26 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_De la suffisance.csv


#######################
Diderot: eloge de Richardson

                  work  author_label   author  chunk_num  \
1  eloge de Richardson             5  Diderot          1   

                              tag  
1  1__Diderot_eloge de Richardson  


12/19/2024 05:31:28 [ruzicka:INFO] Fitting on 5392 documents...
12/19/2024 05:31:28 [ruzicka:INFO] Predicting on 16 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_eloge de Richardson.csv


#######################
Diderot: L-Histoire et le secret de

                         work  author_label   author  chunk_num  \
1  L-Histoire et le secret de             5  Diderot          1   

                                     tag  
1  1__Diderot_L-Histoire et le secret de  


12/19/2024 05:31:30 [ruzicka:INFO] Fitting on 5377 documents...
12/19/2024 05:31:30 [ruzicka:INFO] Predicting on 31 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_L-Histoire et le secret de.csv


#######################
Diderot: Lettre sur les sourds et muets

                             work  author_label   author  chunk_num  \
1  Lettre sur les sourds et muets             5  Diderot          1   

                                         tag  
1  1__Diderot_Lettre sur les sourds et muets  


12/19/2024 05:31:35 [ruzicka:INFO] Fitting on 5378 documents...
12/19/2024 05:31:35 [ruzicka:INFO] Predicting on 30 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_Lettre sur les sourds et muets.csv


#######################
Diderot: Pensees Detachees

                work  author_label   author  chunk_num  \
1  Pensees Detachees             5  Diderot          1   

                            tag  
1  1__Diderot_Pensees Detachees  


12/19/2024 05:31:40 [ruzicka:INFO] Fitting on 5406 documents...
12/19/2024 05:31:40 [ruzicka:INFO] Predicting on 2 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_Pensees Detachees.csv


#######################
Diderot: Principes Philosophiques sur la matière

                                       work  author_label   author  chunk_num  \
1  Principes Philosophiques sur la matière             5  Diderot          1   

                                                 tag  
1  1__Diderot_Principes Philosophiques sur la mat...  


12/19/2024 05:31:41 [ruzicka:INFO] Fitting on 5384 documents...
12/19/2024 05:31:41 [ruzicka:INFO] Predicting on 24 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_Principes Philosophiques sur la matière.csv


#######################
Diderot: Salon 1763

         work  author_label   author  chunk_num                    tag
1  Salon 1763             5  Diderot          1  1__Diderot_Salon 1763


12/19/2024 05:31:44 [ruzicka:INFO] Fitting on 5387 documents...
12/19/2024 05:31:44 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_Salon 1763.csv


#######################
Diderot: Suite de l-apologie

                  work  author_label   author  chunk_num  \
1  Suite de l-apologie             5  Diderot          1   

                              tag  
1  1__Diderot_Suite de l-apologie  


12/19/2024 05:31:48 [ruzicka:INFO] Fitting on 5396 documents...
12/19/2024 05:31:48 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/tests/ch1000/mfw200/Diderot_Suite de l-apologie.csv


#######################
Guibert: Discurs

      work  author_label   author  chunk_num                 tag
1  Discurs             6  Guibert          1  1__Guibert_Discurs


12/19/2024 05:31:53 [ruzicka:INFO] Fitting on 5370 documents...
12/19/2024 05:31:53 [ruzicka:INFO] Predicting on 38 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Discurs.csv


#######################
Guibert: Eloge du roi

           work  author_label   author  chunk_num                      tag
1  Eloge du roi             6  Guibert          1  1__Guibert_Eloge du roi


12/19/2024 05:32:10 [ruzicka:INFO] Fitting on 5303 documents...
12/19/2024 05:32:10 [ruzicka:INFO] Predicting on 105 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Eloge du roi.csv


#######################
Guibert: Essai generale

             work  author_label   author  chunk_num                        tag
1  Essai generale             6  Guibert          1  1__Guibert_Essai generale


12/19/2024 05:32:49 [ruzicka:INFO] Fitting on 5302 documents...
12/19/2024 05:32:49 [ruzicka:INFO] Predicting on 106 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Essai generale.csv


#######################
Guibert: Oeuvres militaires T1

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T1             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T1  


12/19/2024 05:33:28 [ruzicka:INFO] Fitting on 5341 documents...
12/19/2024 05:33:28 [ruzicka:INFO] Predicting on 67 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Oeuvres militaires T1.csv


#######################
Guibert: Oeuvres militaires T2

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T2             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T2  


12/19/2024 05:33:55 [ruzicka:INFO] Fitting on 5347 documents...
12/19/2024 05:33:55 [ruzicka:INFO] Predicting on 61 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Oeuvres militaires T2.csv


#######################
Guibert: Oeuvres militaires T3

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T3             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T3  


12/19/2024 05:34:20 [ruzicka:INFO] Fitting on 5347 documents...
12/19/2024 05:34:20 [ruzicka:INFO] Predicting on 61 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Oeuvres militaires T3.csv


#######################
Guibert: Oeuvres militaires T4

                    work  author_label   author  chunk_num  \
1  Oeuvres militaires T4             6  Guibert          1   

                                tag  
1  1__Guibert_Oeuvres militaires T4  


12/19/2024 05:34:45 [ruzicka:INFO] Fitting on 5368 documents...
12/19/2024 05:34:45 [ruzicka:INFO] Predicting on 40 documents


Results written in: 03_tests/tests/ch1000/mfw200/Guibert_Oeuvres militaires T4.csv


#######################
Jaucourt: Ency 1-7

       work  author_label    author  chunk_num                   tag
1  Ency 1-7             7  Jaucourt          1  1__Jaucourt_Ency 1-7


12/19/2024 05:35:00 [ruzicka:INFO] Fitting on 5387 documents...
12/19/2024 05:35:00 [ruzicka:INFO] Predicting on 21 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 1-7.csv


#######################
Jaucourt: Ency 10

      work  author_label    author  chunk_num                  tag
1  Ency 10             7  Jaucourt          1  1__Jaucourt_Ency 10


12/19/2024 05:35:07 [ruzicka:INFO] Fitting on 5391 documents...
12/19/2024 05:35:07 [ruzicka:INFO] Predicting on 17 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 10.csv


#######################
Jaucourt: Ency 11

      work  author_label    author  chunk_num                  tag
1  Ency 11             7  Jaucourt          1  1__Jaucourt_Ency 11


12/19/2024 05:35:14 [ruzicka:INFO] Fitting on 5391 documents...
12/19/2024 05:35:14 [ruzicka:INFO] Predicting on 17 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 11.csv


#######################
Jaucourt: Ency 12

      work  author_label    author  chunk_num                  tag
1  Ency 12             7  Jaucourt          1  1__Jaucourt_Ency 12


12/19/2024 05:35:21 [ruzicka:INFO] Fitting on 5390 documents...
12/19/2024 05:35:21 [ruzicka:INFO] Predicting on 18 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 12.csv


#######################
Jaucourt: Ency 8

     work  author_label    author  chunk_num                 tag
1  Ency 8             7  Jaucourt          1  1__Jaucourt_Ency 8


12/19/2024 05:35:27 [ruzicka:INFO] Fitting on 5386 documents...
12/19/2024 05:35:27 [ruzicka:INFO] Predicting on 22 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 8.csv


#######################
Jaucourt: Ency 9

     work  author_label    author  chunk_num                 tag
1  Ency 9             7  Jaucourt          1  1__Jaucourt_Ency 9


12/19/2024 05:35:36 [ruzicka:INFO] Fitting on 5288 documents...
12/19/2024 05:35:36 [ruzicka:INFO] Predicting on 120 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Ency 9.csv


#######################
Jaucourt: Essais de teodicee T1

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T1             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T1  


12/19/2024 05:36:11 [ruzicka:INFO] Fitting on 5287 documents...
12/19/2024 05:36:11 [ruzicka:INFO] Predicting on 121 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Essais de teodicee T1.csv


#######################
Jaucourt: Essais de teodicee T2

                    work  author_label    author  chunk_num  \
1  Essais de teodicee T2             7  Jaucourt          1   

                                 tag  
1  1__Jaucourt_Essais de teodicee T2  


12/19/2024 05:36:46 [ruzicka:INFO] Fitting on 5401 documents...
12/19/2024 05:36:46 [ruzicka:INFO] Predicting on 7 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jaucourt_Essais de teodicee T2.csv


#######################
Jussieu: Exposition d-un nouvel ordre

                           work  author_label   author  chunk_num  \
1  Exposition d-un nouvel ordre             8  Jussieu          1   

                                       tag  
1  1__Jussieu_Exposition d-un nouvel ordre  


12/19/2024 05:36:48 [ruzicka:INFO] Fitting on 5389 documents...
12/19/2024 05:36:48 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Exposition d-un nouvel ordre.csv


#######################
Jussieu: HDI T1-4

       work  author_label   author  chunk_num                  tag
1  HDI T1-4             8  Jussieu          1  1__Jussieu_HDI T1-4


12/19/2024 05:36:53 [ruzicka:INFO] Fitting on 5346 documents...
12/19/2024 05:36:53 [ruzicka:INFO] Predicting on 62 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_HDI T1-4.csv


#######################
Jussieu: Memoire caractères 1-14

                       work  author_label   author  chunk_num  \
1  Memoire caractères 1-14             8  Jussieu          1   

                                   tag  
1  1__Jussieu_Memoire caractères 1-14  


12/19/2024 05:37:08 [ruzicka:INFO] Fitting on 5399 documents...
12/19/2024 05:37:08 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Memoire caractères 1-14.csv


#######################
Jussieu: Memoire famille

              work  author_label   author  chunk_num  \
1  Memoire famille             8  Jussieu          1   

                          tag  
1  1__Jussieu_Memoire famille  


12/19/2024 05:37:10 [ruzicka:INFO] Fitting on 5393 documents...
12/19/2024 05:37:10 [ruzicka:INFO] Predicting on 15 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Memoire famille.csv


#######################
Jussieu: Methode naturelle

                work  author_label   author  chunk_num  \
1  Methode naturelle             8  Jussieu          1   

                            tag  
1  1__Jussieu_Methode naturelle  


12/19/2024 05:37:14 [ruzicka:INFO] Fitting on 5382 documents...
12/19/2024 05:37:14 [ruzicka:INFO] Predicting on 26 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Methode naturelle.csv


#######################
Jussieu: Notice Historique 1-6

                    work  author_label   author  chunk_num  \
1  Notice Historique 1-6             8  Jussieu          1   

                                tag  
1  1__Jussieu_Notice Historique 1-6  


12/19/2024 05:37:21 [ruzicka:INFO] Fitting on 5393 documents...
12/19/2024 05:37:21 [ruzicka:INFO] Predicting on 15 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Notice Historique 1-6.csv


#######################
Jussieu: Principes

        work  author_label   author  chunk_num                   tag
1  Principes             8  Jussieu          1  1__Jussieu_Principes


12/19/2024 05:37:25 [ruzicka:INFO] Fitting on 5396 documents...
12/19/2024 05:37:25 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Principes.csv


#######################
Jussieu: RapporTtxt

         work  author_label   author  chunk_num                    tag
1  RapporTtxt             8  Jussieu          1  1__Jussieu_RapporTtxt


12/19/2024 05:37:28 [ruzicka:INFO] Fitting on 5325 documents...
12/19/2024 05:37:28 [ruzicka:INFO] Predicting on 83 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_RapporTtxt.csv


#######################
Jussieu: Traite des vertus

                work  author_label   author  chunk_num  \
1  Traite des vertus             8  Jussieu          1   

                            tag  
1  1__Jussieu_Traite des vertus  


12/19/2024 05:37:46 [ruzicka:INFO] Fitting on 5330 documents...
12/19/2024 05:37:46 [ruzicka:INFO] Predicting on 78 documents


Results written in: 03_tests/tests/ch1000/mfw200/Jussieu_Traite des vertus.csv


#######################
La Grange: Lucrece T1

         work  author_label     author  chunk_num                      tag
1  Lucrece T1             9  La Grange          1  1__La Grange_Lucrece T1


12/19/2024 05:37:58 [ruzicka:INFO] Fitting on 5315 documents...
12/19/2024 05:37:58 [ruzicka:INFO] Predicting on 93 documents


Results written in: 03_tests/tests/ch1000/mfw200/La Grange_Lucrece T1.csv


#######################
La Grange: Lucrece T2

         work  author_label     author  chunk_num                      tag
1  Lucrece T2             9  La Grange          1  1__La Grange_Lucrece T2


12/19/2024 05:38:11 [ruzicka:INFO] Fitting on 5350 documents...
12/19/2024 05:38:11 [ruzicka:INFO] Predicting on 58 documents


Results written in: 03_tests/tests/ch1000/mfw200/La Grange_Lucrece T2.csv


#######################
Marmontel: Belisaire

        work  author_label     author  chunk_num                     tag
1  Belisaire            10  Marmontel          1  1__Marmontel_Belisaire


12/19/2024 05:38:56 [ruzicka:INFO] Fitting on 5348 documents...
12/19/2024 05:38:56 [ruzicka:INFO] Predicting on 60 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_Belisaire.csv


#######################
Marmontel: Contes T1

        work  author_label     author  chunk_num                     tag
1  Contes T1            10  Marmontel          1  1__Marmontel_Contes T1


12/19/2024 05:39:42 [ruzicka:INFO] Fitting on 5344 documents...
12/19/2024 05:39:42 [ruzicka:INFO] Predicting on 64 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_Contes T1.csv


#######################
Marmontel: Contes T2

        work  author_label     author  chunk_num                     tag
1  Contes T2            10  Marmontel          1  1__Marmontel_Contes T2


12/19/2024 05:40:31 [ruzicka:INFO] Fitting on 5353 documents...
12/19/2024 05:40:31 [ruzicka:INFO] Predicting on 55 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_Contes T2.csv


#######################
Marmontel: Contes T3

        work  author_label     author  chunk_num                     tag
1  Contes T3            10  Marmontel          1  1__Marmontel_Contes T3


12/19/2024 05:41:14 [ruzicka:INFO] Fitting on 5216 documents...
12/19/2024 05:41:14 [ruzicka:INFO] Predicting on 192 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_Contes T3.csv


#######################
Marmontel: elements T1

          work  author_label     author  chunk_num                       tag
1  elements T1            10  Marmontel          1  1__Marmontel_elements T1


12/19/2024 05:43:21 [ruzicka:INFO] Fitting on 5238 documents...
12/19/2024 05:43:21 [ruzicka:INFO] Predicting on 170 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_elements T1.csv


#######################
Marmontel: elements T2

          work  author_label     author  chunk_num                       tag
1  elements T2            10  Marmontel          1  1__Marmontel_elements T2


12/19/2024 05:45:17 [ruzicka:INFO] Fitting on 5211 documents...
12/19/2024 05:45:17 [ruzicka:INFO] Predicting on 197 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_elements T2.csv


#######################
Marmontel: elements T3

          work  author_label     author  chunk_num                       tag
1  elements T3            10  Marmontel          1  1__Marmontel_elements T3


12/19/2024 05:47:27 [ruzicka:INFO] Fitting on 5333 documents...
12/19/2024 05:47:27 [ruzicka:INFO] Predicting on 75 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_elements T3.csv


#######################
Marmontel: Les Incas

        work  author_label     author  chunk_num                     tag
1  Les Incas            10  Marmontel          1  1__Marmontel_Les Incas


12/19/2024 05:48:24 [ruzicka:INFO] Fitting on 5400 documents...
12/19/2024 05:48:24 [ruzicka:INFO] Predicting on 8 documents


Results written in: 03_tests/tests/ch1000/mfw200/Marmontel_Les Incas.csv


#######################
Meister: De l-origine

           work  author_label   author  chunk_num                      tag
1  De l-origine            11  Meister          1  1__Meister_De l-origine


12/19/2024 05:48:24 [ruzicka:INFO] Fitting on 5389 documents...
12/19/2024 05:48:24 [ruzicka:INFO] Predicting on 19 documents


Results written in: 03_tests/tests/ch1000/mfw200/Meister_De l-origine.csv


#######################
Meister: De la morale

           work  author_label   author  chunk_num                      tag
1  De la morale            11  Meister          1  1__Meister_De la morale


12/19/2024 05:48:26 [ruzicka:INFO] Fitting on 5404 documents...
12/19/2024 05:48:26 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/tests/ch1000/mfw200/Meister_De la morale.csv


#######################
Morellet: FragmenTtxt

          work  author_label    author  chunk_num                      tag
1  FragmenTtxt            12  Morellet          1  1__Morellet_FragmenTtxt


12/19/2024 05:48:26 [ruzicka:INFO] Fitting on 5399 documents...
12/19/2024 05:48:26 [ruzicka:INFO] Predicting on 9 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_FragmenTtxt.csv


#######################
Morellet: Le cri

     work  author_label    author  chunk_num                 tag
1  Le cri            12  Morellet          1  1__Morellet_Le cri


12/19/2024 05:48:28 [ruzicka:INFO] Fitting on 5371 documents...
12/19/2024 05:48:28 [ruzicka:INFO] Predicting on 37 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Le cri.csv


#######################
Morellet: Memoires

       work  author_label    author  chunk_num                   tag
1  Memoires            12  Morellet          1  1__Morellet_Memoires


12/19/2024 05:48:34 [ruzicka:INFO] Fitting on 5402 documents...
12/19/2024 05:48:34 [ruzicka:INFO] Predicting on 6 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Memoires.csv


#######################
Morellet: Nouvelles

        work  author_label    author  chunk_num                    tag
1  Nouvelles            12  Morellet          1  1__Morellet_Nouvelles


12/19/2024 05:48:36 [ruzicka:INFO] Fitting on 5397 documents...
12/19/2024 05:48:36 [ruzicka:INFO] Predicting on 11 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Nouvelles.csv


#######################
Morellet: Observations

           work  author_label    author  chunk_num                       tag
1  Observations            12  Morellet          1  1__Morellet_Observations


12/19/2024 05:48:38 [ruzicka:INFO] Fitting on 5379 documents...
12/19/2024 05:48:38 [ruzicka:INFO] Predicting on 29 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Observations.csv


#######################
Morellet: Reflexions

         work  author_label    author  chunk_num                     tag
1  Reflexions            12  Morellet          1  1__Morellet_Reflexions


12/19/2024 05:48:43 [ruzicka:INFO] Fitting on 5382 documents...
12/19/2024 05:48:43 [ruzicka:INFO] Predicting on 26 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Reflexions.csv


#######################
Morellet: SupplemenTtxt

            work  author_label    author  chunk_num                        tag
1  SupplemenTtxt            12  Morellet          1  1__Morellet_SupplemenTtxt


12/19/2024 05:48:47 [ruzicka:INFO] Fitting on 5381 documents...
12/19/2024 05:48:47 [ruzicka:INFO] Predicting on 27 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_SupplemenTtxt.csv


#######################
Morellet: Theorie

      work  author_label    author  chunk_num                  tag
1  Theorie            12  Morellet          1  1__Morellet_Theorie


12/19/2024 05:48:52 [ruzicka:INFO] Fitting on 5370 documents...
12/19/2024 05:48:52 [ruzicka:INFO] Predicting on 38 documents


Results written in: 03_tests/tests/ch1000/mfw200/Morellet_Theorie.csv


#######################
Naigeon: Le militaire

           work  author_label   author  chunk_num                      tag
1  Le militaire            13  Naigeon          1  1__Naigeon_Le militaire


12/19/2024 05:49:00 [ruzicka:INFO] Fitting on 5395 documents...
12/19/2024 05:49:00 [ruzicka:INFO] Predicting on 13 documents


Results written in: 03_tests/tests/ch1000/mfw200/Naigeon_Le militaire.csv


#######################
Naigeon: Liberte

      work  author_label   author  chunk_num                 tag
1  Liberte            13  Naigeon          1  1__Naigeon_Liberte


12/19/2024 05:49:03 [ruzicka:INFO] Fitting on 5300 documents...
12/19/2024 05:49:03 [ruzicka:INFO] Predicting on 108 documents


Results written in: 03_tests/tests/ch1000/mfw200/Naigeon_Liberte.csv


#######################
Naigeon: Manuel d-epictete

                work  author_label   author  chunk_num  \
1  Manuel d-epictete            13  Naigeon          1   

                            tag  
1  1__Naigeon_Manuel d-epictete  


12/19/2024 05:49:18 [ruzicka:INFO] Fitting on 5396 documents...
12/19/2024 05:49:18 [ruzicka:INFO] Predicting on 12 documents


Results written in: 03_tests/tests/ch1000/mfw200/Naigeon_Manuel d-epictete.csv


#######################
Naigeon: Richesse

       work  author_label   author  chunk_num                  tag
1  Richesse            13  Naigeon          1  1__Naigeon_Richesse


12/19/2024 05:49:20 [ruzicka:INFO] Fitting on 5391 documents...
12/19/2024 05:49:20 [ruzicka:INFO] Predicting on 17 documents


Results written in: 03_tests/tests/ch1000/mfw200/Naigeon_Richesse.csv


#######################
Naigeon: Unitaires

        work  author_label   author  chunk_num                   tag
1  Unitaires            13  Naigeon          1  1__Naigeon_Unitaires


12/19/2024 05:49:24 [ruzicka:INFO] Fitting on 5372 documents...
12/19/2024 05:49:24 [ruzicka:INFO] Predicting on 36 documents


Results written in: 03_tests/tests/ch1000/mfw200/Naigeon_Unitaires.csv


#######################
Pechmeja: eloge

    work  author_label    author  chunk_num                tag
1  eloge            14  Pechmeja          0  0__Pechmeja_eloge


12/19/2024 05:49:28 [ruzicka:INFO] Fitting on 5351 documents...
12/19/2024 05:49:28 [ruzicka:INFO] Predicting on 57 documents


Results written in: 03_tests/tests/ch1000/mfw200/Pechmeja_eloge.csv


#######################
Pechmeja: Telephe

      work  author_label    author  chunk_num                  tag
1  Telephe            14  Pechmeja          1  1__Pechmeja_Telephe


12/19/2024 05:49:34 [ruzicka:INFO] Fitting on 5361 documents...
12/19/2024 05:49:34 [ruzicka:INFO] Predicting on 47 documents


Results written in: 03_tests/tests/ch1000/mfw200/Pechmeja_Telephe.csv


#######################
Raynal: Anecdotes historiques 1

                      work  author_label  author  chunk_num  \
1  Anecdotes historiques 1            15  Raynal          1   

                                 tag  
1  1__Raynal_Anecdotes historiques 1  


12/19/2024 05:49:49 [ruzicka:INFO] Fitting on 5364 documents...
12/19/2024 05:49:49 [ruzicka:INFO] Predicting on 44 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Anecdotes historiques 1.csv


#######################
Raynal: Anecdotes litteraires2 1756

                          work  author_label  author  chunk_num  \
1  Anecdotes litteraires2 1756            15  Raynal          1   

                                     tag  
1  1__Raynal_Anecdotes litteraires2 1756  


12/19/2024 05:50:03 [ruzicka:INFO] Fitting on 5369 documents...
12/19/2024 05:50:03 [ruzicka:INFO] Predicting on 39 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Anecdotes litteraires2 1756.csv


#######################
Raynal: Anecdotes litteraires3 1756

                          work  author_label  author  chunk_num  \
1  Anecdotes litteraires3 1756            15  Raynal          1   

                                     tag  
1  1__Raynal_Anecdotes litteraires3 1756  


12/19/2024 05:50:16 [ruzicka:INFO] Fitting on 5404 documents...
12/19/2024 05:50:16 [ruzicka:INFO] Predicting on 4 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Anecdotes litteraires3 1756.csv


#######################
Raynal: Considerations

             work  author_label  author  chunk_num                       tag
1  Considerations            15  Raynal          1  1__Raynal_Considerations


12/19/2024 05:50:17 [ruzicka:INFO] Fitting on 5353 documents...
12/19/2024 05:50:17 [ruzicka:INFO] Predicting on 55 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Considerations.csv


#######################
Raynal: ecole 1

      work  author_label  author  chunk_num                tag
1  ecole 1            15  Raynal          1  1__Raynal_ecole 1


12/19/2024 05:50:35 [ruzicka:INFO] Fitting on 5352 documents...
12/19/2024 05:50:35 [ruzicka:INFO] Predicting on 56 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_ecole 1.csv


#######################
Raynal: ecole 3

      work  author_label  author  chunk_num                tag
1  ecole 3            15  Raynal          1  1__Raynal_ecole 3


12/19/2024 05:50:52 [ruzicka:INFO] Fitting on 5353 documents...
12/19/2024 05:50:52 [ruzicka:INFO] Predicting on 55 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_ecole 3.csv


#######################
Raynal: Histoire du Parlement 1748

                         work  author_label  author  chunk_num  \
1  Histoire du Parlement 1748            15  Raynal          1   

                                    tag  
1  1__Raynal_Histoire du Parlement 1748  


12/19/2024 05:51:09 [ruzicka:INFO] Fitting on 5374 documents...
12/19/2024 05:51:09 [ruzicka:INFO] Predicting on 34 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Histoire du Parlement 1748.csv


#######################
Raynal: Histoire du Parlement Band 2

                           work  author_label  author  chunk_num  \
1  Histoire du Parlement Band 2            15  Raynal          1   

                                      tag  
1  1__Raynal_Histoire du Parlement Band 2  


12/19/2024 05:51:20 [ruzicka:INFO] Fitting on 5388 documents...
12/19/2024 05:51:20 [ruzicka:INFO] Predicting on 20 documents


Results written in: 03_tests/tests/ch1000/mfw200/Raynal_Histoire du Parlement Band 2.csv


#######################
Rivière: De l-instruction publique  1775

                              work  author_label    author  chunk_num  \
1  De l-instruction publique  1775            16  Rivière          1   

                                           tag  
1  1__Rivière_De l-instruction publique  1775  


12/19/2024 05:51:26 [ruzicka:INFO] Fitting on 5327 documents...
12/19/2024 05:51:26 [ruzicka:INFO] Predicting on 81 documents


Results written in: 03_tests/tests/ch1000/mfw200/Rivière_De l-instruction publique  1775.csv


#######################
Rivière: L-Interet

        work  author_label    author  chunk_num                    tag
1  L-Interet            16  Rivière          1  1__Rivière_L-Interet


12/19/2024 05:51:44 [ruzicka:INFO] Fitting on 5249 documents...
12/19/2024 05:51:44 [ruzicka:INFO] Predicting on 159 documents


Results written in: 03_tests/tests/ch1000/mfw200/Rivière_L-Interet.csv


#######################
Rivière: L-ordre naturel

              work  author_label    author  chunk_num  \
1  L-ordre naturel            16  Rivière          1   

                           tag  
1  1__Rivière_L-ordre naturel  


12/19/2024 05:52:10 [ruzicka:INFO] Fitting on 5398 documents...
12/19/2024 05:52:10 [ruzicka:INFO] Predicting on 10 documents


Results written in: 03_tests/tests/ch1000/mfw200/Rivière_L-ordre naturel.csv


#######################
Saint-Lambert: Les deux Amis

            work  author_label         author  chunk_num  \
1  Les deux Amis            17  Saint-Lambert          1   

                              tag  
1  1__Saint-Lambert_Les deux Amis  


12/19/2024 05:52:14 [ruzicka:INFO] Fitting on 5337 documents...
12/19/2024 05:52:14 [ruzicka:INFO] Predicting on 71 documents


Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Les deux Amis.csv


#######################
Saint-Lambert: Oeuvres Phil T1

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T1            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T1  


12/19/2024 05:52:38 [ruzicka:INFO] Fitting on 5330 documents...
12/19/2024 05:52:38 [ruzicka:INFO] Predicting on 78 documents


Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Oeuvres Phil T1.csv


#######################
Saint-Lambert: Oeuvres Phil T2

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T2            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T2  


12/19/2024 05:53:04 [ruzicka:INFO] Fitting on 5319 documents...
12/19/2024 05:53:04 [ruzicka:INFO] Predicting on 89 documents


Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Oeuvres Phil T2.csv


#######################
Saint-Lambert: Oeuvres Phil T3

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T3            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T3  


12/19/2024 05:53:33 [ruzicka:INFO] Fitting on 5318 documents...
12/19/2024 05:53:33 [ruzicka:INFO] Predicting on 90 documents


Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Oeuvres Phil T3.csv


#######################
Saint-Lambert: Oeuvres Phil T4

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T4            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T4  


12/19/2024 05:54:03 [ruzicka:INFO] Fitting on 5363 documents...
12/19/2024 05:54:03 [ruzicka:INFO] Predicting on 45 documents


Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Oeuvres Phil T4.csv


#######################
Saint-Lambert: Oeuvres Phil T5

              work  author_label         author  chunk_num  \
1  Oeuvres Phil T5            17  Saint-Lambert          1   

                                tag  
1  1__Saint-Lambert_Oeuvres Phil T5  
Results written in: 03_tests/tests/ch1000/mfw200/Saint-Lambert_Oeuvres Phil T5.csv


# FP1

Test FP1 chunks with BDI

In [91]:
# load preprocessed data

corpus = pd.read_csv('03_tests/FP1_2000_100mfw_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,de,des,et,la,les,vous,...,toujours,après,cet,autre,donc,jamais,encore,avoir,ainsi,peu
0,Avis,Baudeau,0,0__Baudeau_Avis,3.45,2.20,2.50,1.40,2.35,0.65,...,0.10,0.00,0.05,0.05,0.40,0.00,0.00,0.30,0.00,0.05
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,4.40,2.85,2.80,2.60,2.40,1.65,...,0.10,0.10,0.05,0.15,0.10,0.10,0.05,0.15,0.05,0.05
2,Explication,Baudeau,0,0__Baudeau_Explication,4.90,2.60,3.80,3.10,4.65,0.95,...,0.15,0.10,0.00,0.00,0.10,0.00,0.10,0.10,0.05,0.20
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,5.60,1.90,2.20,3.65,1.85,0.10,...,0.15,0.00,0.10,0.00,0.10,0.25,0.10,0.20,0.05,0.00
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,4.90,1.85,2.50,2.35,2.75,0.10,...,0.20,0.05,0.05,0.10,0.15,0.15,0.10,0.10,0.05,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4422,elements T3,Marmontel,96,96__Marmontel_elements T3,4.15,1.20,3.35,3.00,1.15,0.00,...,0.00,0.00,0.15,0.20,0.00,0.05,0.05,0.20,0.00,0.00
4423,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,5.25,1.85,0.75,3.75,2.65,0.00,...,0.05,0.15,0.05,0.15,0.00,0.00,0.05,0.15,0.05,0.20
4424,elements T3,Marmontel,97,97__Marmontel_elements T3,4.60,0.95,3.10,3.40,1.55,0.00,...,0.00,0.05,0.00,0.30,0.05,0.15,0.05,0.10,0.10,0.00
4425,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,4.90,2.95,0.75,3.25,3.20,0.05,...,0.00,0.05,0.05,0.15,0.05,0.00,0.15,0.20,0.05,0.20


In [92]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4422,elements T3,Marmontel,96,96__Marmontel_elements T3
4423,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4424,elements T3,Marmontel,97,97__Marmontel_elements T3
4425,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [93]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4427, 100)

In [94]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4422,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4423,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4424,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4425,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [95]:
# select an "unknown" work

problems = raw_df[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,FP1,7,HDI,0,0__HDI_FP1
1,FP1,7,HDI,1,1__HDI_FP1
2,FP1,7,HDI,2,2__HDI_FP1


In [96]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "FP1"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "FP1"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4419,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4420,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4421,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4422,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


**Scaling**

In [97]:
ss = StandardScaler(with_mean=False).fit(rest_X)

In [98]:
rest_scaled_X = ss.transform(rest_X)
problems_scaled_X = ss.transform(problems_X)

**Verification**

In [99]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [100]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

01/21/2025 05:16:38 [ruzicka:INFO] Fitting on 4424 documents...


In [101]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [102]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

01/21/2025 05:16:43 [ruzicka:INFO] Predicting on 3 documents


Testing against Baudeau


01/21/2025 05:16:43 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.775 0.63  0.674]
Testing against Chastellux


01/21/2025 05:16:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.948 0.834 0.971]
Testing against Condorcet


01/21/2025 05:16:46 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.974 0.988 0.992]
Testing against Deleyre


01/21/2025 05:16:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.875 0.804 0.877]
Testing against dHolbach


01/21/2025 05:16:48 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.821 0.951 0.726]
Testing against Diderot


01/21/2025 05:16:49 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1. 1. 1.]
Testing against Guibert


01/21/2025 05:16:50 [ruzicka:INFO] Predicting on 3 documents
01/21/2025 05:16:50 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.915 0.751 0.908]
Testing against HDI
Bootstrap Match Strength (one per chunk, 0-1.0): [0.015 0.125 0.026]
Testing against Jaucourt


01/21/2025 05:16:51 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.833 0.678 0.825]
Testing against Jussieu


01/21/2025 05:16:51 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.532 0.406 0.531]
Testing against La Grange


01/21/2025 05:16:52 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.227 0.187 0.431]
Testing against Marmontel


01/21/2025 05:16:53 [ruzicka:INFO] Predicting on 3 documents
01/21/2025 05:16:53 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.98  0.967 0.95 ]
Testing against Meister
Bootstrap Match Strength (one per chunk, 0-1.0): [0.223 0.204 0.139]
Testing against Morellet


01/21/2025 05:16:54 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.946 0.871 0.802]
Testing against Naigeon


01/21/2025 05:16:54 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.789 0.614 0.462]
Testing against Pechmeja


01/21/2025 05:16:54 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.21  0.414 0.73 ]
Testing against Raynal


01/21/2025 05:16:55 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.685 0.574 0.6  ]
Testing against Rivière


01/21/2025 05:16:56 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.498 0.534 0.514]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.574 0.944 0.947]


In [103]:
authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '03_tests/tests/fp1/fp1_vs_' + a + '.csv'

    x.to_csv(fh)

01/21/2025 05:16:58 [ruzicka:INFO] Predicting on 3 documents


Baudeau


01/21/2025 05:16:58 [ruzicka:INFO] Predicting on 3 documents


Chastellux


01/21/2025 05:16:59 [ruzicka:INFO] Predicting on 3 documents


Condorcet


01/21/2025 05:17:01 [ruzicka:INFO] Predicting on 3 documents


Deleyre


01/21/2025 05:17:02 [ruzicka:INFO] Predicting on 3 documents


dHolbach


01/21/2025 05:17:03 [ruzicka:INFO] Predicting on 3 documents


Diderot


01/21/2025 05:17:05 [ruzicka:INFO] Predicting on 3 documents


Guibert


01/21/2025 05:17:05 [ruzicka:INFO] Predicting on 3 documents
01/21/2025 05:17:06 [ruzicka:INFO] Predicting on 3 documents


HDI
Jaucourt


01/21/2025 05:17:06 [ruzicka:INFO] Predicting on 3 documents


Jussieu


01/21/2025 05:17:07 [ruzicka:INFO] Predicting on 3 documents


La Grange


01/21/2025 05:17:07 [ruzicka:INFO] Predicting on 3 documents


Marmontel


01/21/2025 05:17:08 [ruzicka:INFO] Predicting on 3 documents
01/21/2025 05:17:08 [ruzicka:INFO] Predicting on 3 documents


Meister
Morellet


01/21/2025 05:17:09 [ruzicka:INFO] Predicting on 3 documents


Naigeon


01/21/2025 05:17:09 [ruzicka:INFO] Predicting on 3 documents


Pechmeja


01/21/2025 05:17:10 [ruzicka:INFO] Predicting on 3 documents


Raynal


01/21/2025 05:17:10 [ruzicka:INFO] Predicting on 3 documents


Rivière


01/21/2025 05:17:11 [ruzicka:INFO] Predicting on 3 documents


Saint-Lambert
