This notebook performs BDI analysis based on the frequencies of ngrams.

In [1]:
import pandas as pd
import numpy as np

import re

from sklearn.preprocessing import StandardScaler

import logging

logging.basicConfig(level="INFO")
logger = logging.getLogger("ruzicka")

from ruzicka.BDIVerifier import BDIVerifier

## FP

In [14]:
# load preprocessed data

corpus = pd.read_csv('06_tests/fp1/FP1_2000_500mfn_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,_de_,les_,des_,_des,oit_,_et_,...,i_le,nt_i,nt_c,ns_s,_à_c,nt_à,re_a,t_da,et_p,ne_l
0,Avis,Baudeau,0,0__Baudeau_Avis,0.637944,0.554734,0.425296,0.425296,0.110947,0.462278,...,0.027737,0.036982,0.064719,0.018491,0.018491,0.046228,0.027737,0.027737,0.036982,0.009246
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,0.777385,0.609541,0.521201,0.521201,0.114841,0.494700,...,0.053004,0.026502,0.026502,0.008834,0.026502,0.000000,0.017668,0.035336,0.035336,0.070671
2,Explication,Baudeau,0,0__Baudeau_Explication,0.856419,1.127327,0.480643,0.480643,0.061173,0.664161,...,0.017478,0.026217,0.043695,0.026217,0.043695,0.026217,0.008739,0.026217,0.078651,0.000000
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,0.962696,0.386797,0.378202,0.395393,0.189101,0.378202,...,0.000000,0.042977,0.034382,0.060168,0.042977,0.051573,0.017191,0.060168,0.017191,0.008595
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,0.852470,0.748086,0.382742,0.356646,0.226166,0.434934,...,0.034795,0.043493,0.052192,0.017397,0.017397,0.043493,0.043493,0.026096,0.034795,0.052192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4463,elements T3,Marmontel,96,96__Marmontel_elements T3,0.772668,0.297896,0.223422,0.214113,0.074474,0.623720,...,0.027928,0.009309,0.037237,0.027928,0.018619,0.027928,0.055856,0.121020,0.065165,0.065165
4464,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,0.988514,0.913199,0.451892,0.348334,0.169460,0.141216,...,0.160045,0.028243,0.065901,0.028243,0.009414,0.000000,0.018829,0.037658,0.000000,0.018829
4465,elements T3,Marmontel,97,97__Marmontel_elements T3,0.827950,0.373032,0.191065,0.236557,0.136475,0.564098,...,0.009098,0.009098,0.045492,0.000000,0.009098,0.009098,0.027295,0.063688,0.036393,0.072787
4466,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,0.916573,1.038159,0.617284,0.561167,0.112233,0.140292,...,0.046764,0.065470,0.056117,0.037411,0.046764,0.009353,0.037411,0.028058,0.000000,0.056117


In [15]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4463,elements T3,Marmontel,96,96__Marmontel_elements T3
4464,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4465,elements T3,Marmontel,97,97__Marmontel_elements T3
4466,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [16]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4468, 500)

In [17]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4463,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4464,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4465,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4466,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [18]:
# select an "unknown" work

problems = raw_df[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "FP1"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,FP1,7,HDI,0,0__HDI_FP1
1,FP1,7,HDI,1,1__HDI_FP1
2,FP1,7,HDI,2,2__HDI_FP1


In [19]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "FP1"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "FP1"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4460,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4461,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4462,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4463,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [20]:
# scaling
ss = StandardScaler(with_mean=False).fit(rest_X)

rest_scaled_X = ss.transform(rest_X)
problems_scaled_X = ss.transform(problems_X)

In [21]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, 
    random_state=rng
)

In [22]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

04/02/2025 02:08:20 [ruzicka:INFO] Fitting on 4465 documents...


In [23]:
# check unique authors
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [24]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

04/02/2025 02:08:23 [ruzicka:INFO] Predicting on 3 documents


Testing against Baudeau


04/02/2025 02:08:24 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.181 0.195 0.517]
Testing against Chastellux


04/02/2025 02:08:25 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.911 0.94  0.956]
Testing against Condorcet


04/02/2025 02:08:29 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.988 0.994 0.99 ]
Testing against Deleyre


04/02/2025 02:08:30 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.593 0.763 0.745]
Testing against dHolbach


04/02/2025 02:08:33 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.64  0.946 0.923]
Testing against Diderot


04/02/2025 02:08:36 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1. 1. 1.]
Testing against Guibert


04/02/2025 02:08:37 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.437 0.215 0.766]
Testing against HDI


04/02/2025 02:08:38 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.743 0.852 0.939]
Testing against Jaucourt


04/02/2025 02:08:39 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.565 0.668 0.451]
Testing against Jussieu


04/02/2025 02:08:40 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.047 0.286 0.293]
Testing against La Grange


04/02/2025 02:08:40 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.112 0.05  0.057]
Testing against Marmontel


04/02/2025 02:08:42 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.969 0.931 0.877]
Testing against Meister


04/02/2025 02:08:43 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.729 0.322 0.11 ]
Testing against Morellet


04/02/2025 02:08:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.639 0.811 0.442]
Testing against Naigeon


04/02/2025 02:08:44 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.859 0.498 0.491]
Testing against Pechmeja


04/02/2025 02:08:45 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.02  0.289 0.181]
Testing against Raynal


04/02/2025 02:08:46 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.137 0.41  0.26 ]
Testing against Rivière


04/02/2025 02:08:47 [ruzicka:INFO] Predicting on 3 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.388 0.181 0.076]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.329 0.845 0.923]


In [25]:
# populate results & output distributions for each author

authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '06_tests/fp1/mfn500/fp1_vs_' + a + '.csv'

    x.to_csv(fh)

04/02/2025 02:08:58 [ruzicka:INFO] Predicting on 3 documents


Baudeau


04/02/2025 02:08:59 [ruzicka:INFO] Predicting on 3 documents


Chastellux


04/02/2025 02:09:00 [ruzicka:INFO] Predicting on 3 documents


Condorcet


04/02/2025 02:09:04 [ruzicka:INFO] Predicting on 3 documents


Deleyre


04/02/2025 02:09:06 [ruzicka:INFO] Predicting on 3 documents


dHolbach


04/02/2025 02:09:09 [ruzicka:INFO] Predicting on 3 documents


Diderot


04/02/2025 02:09:11 [ruzicka:INFO] Predicting on 3 documents


Guibert


04/02/2025 02:09:13 [ruzicka:INFO] Predicting on 3 documents


HDI


04/02/2025 02:09:13 [ruzicka:INFO] Predicting on 3 documents


Jaucourt


04/02/2025 02:09:14 [ruzicka:INFO] Predicting on 3 documents


Jussieu


04/02/2025 02:09:15 [ruzicka:INFO] Predicting on 3 documents


La Grange


04/02/2025 02:09:16 [ruzicka:INFO] Predicting on 3 documents


Marmontel


04/02/2025 02:09:18 [ruzicka:INFO] Predicting on 3 documents


Meister


04/02/2025 02:09:19 [ruzicka:INFO] Predicting on 3 documents


Morellet


04/02/2025 02:09:20 [ruzicka:INFO] Predicting on 3 documents


Naigeon


04/02/2025 02:09:21 [ruzicka:INFO] Predicting on 3 documents


Pechmeja


04/02/2025 02:09:21 [ruzicka:INFO] Predicting on 3 documents


Raynal


04/02/2025 02:09:23 [ruzicka:INFO] Predicting on 3 documents


Rivière


04/02/2025 02:09:23 [ruzicka:INFO] Predicting on 3 documents


Saint-Lambert


## Editions

In [99]:
# load preprocessed data

corpus = pd.read_csv('06_tests/editions/ed1780_2k_500mfn_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,_de_,les_,des_,_des,oit_,_et_,...,i_le,nt_i,nt_c,ns_s,_à_c,nt_à,re_a,t_da,et_p,ne_l
0,Avis,Baudeau,0,0__Baudeau_Avis,0.637944,0.554734,0.425296,0.425296,0.110947,0.462278,...,0.027737,0.036982,0.064719,0.018491,0.018491,0.046228,0.027737,0.027737,0.036982,0.009246
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,0.777385,0.609541,0.521201,0.521201,0.114841,0.494700,...,0.053004,0.026502,0.026502,0.008834,0.026502,0.000000,0.017668,0.035336,0.035336,0.070671
2,Explication,Baudeau,0,0__Baudeau_Explication,0.856419,1.127327,0.480643,0.480643,0.061173,0.664161,...,0.017478,0.026217,0.043695,0.026217,0.043695,0.026217,0.008739,0.026217,0.078651,0.000000
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,0.962696,0.386797,0.378202,0.395393,0.189101,0.378202,...,0.000000,0.042977,0.034382,0.060168,0.042977,0.051573,0.017191,0.060168,0.017191,0.008595
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,0.852470,0.748086,0.382742,0.356646,0.226166,0.434934,...,0.034795,0.043493,0.052192,0.017397,0.017397,0.043493,0.043493,0.026096,0.034795,0.052192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4465,elements T3,Marmontel,96,96__Marmontel_elements T3,0.772668,0.297896,0.223422,0.214113,0.074474,0.623720,...,0.027928,0.009309,0.037237,0.027928,0.018619,0.027928,0.055856,0.121020,0.065165,0.065165
4466,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,0.988514,0.913199,0.451892,0.348334,0.169460,0.141216,...,0.160045,0.028243,0.065901,0.028243,0.009414,0.000000,0.018829,0.037658,0.000000,0.018829
4467,elements T3,Marmontel,97,97__Marmontel_elements T3,0.827950,0.373032,0.191065,0.236557,0.136475,0.564098,...,0.009098,0.009098,0.045492,0.000000,0.009098,0.009098,0.027295,0.063688,0.036393,0.072787
4468,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,0.916573,1.038159,0.617284,0.561167,0.112233,0.140292,...,0.046764,0.065470,0.056117,0.037411,0.046764,0.009353,0.037411,0.028058,0.000000,0.056117


In [100]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4465,elements T3,Marmontel,96,96__Marmontel_elements T3
4466,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4467,elements T3,Marmontel,97,97__Marmontel_elements T3
4468,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [101]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4470, 500)

In [102]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4465,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4466,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4467,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4468,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [103]:
# select an "unknown" work from the following:
# ed1770-nch1774-nch1780
# ed1770-CH1774-nch1780
# ed1770-nch1774-CH1780
# ed1770-CH1774-CH1780
# ed1774-nch1780
# ed1774-CH1780
# ed1780

problems = raw_df[raw_df.work == "ed1780"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "ed1780"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,ed1780,7,HDI,0,0__HDI_ed1780
1,ed1780,7,HDI,1,1__HDI_ed1780
2,ed1780,7,HDI,10,10__HDI_ed1780
3,ed1780,7,HDI,11,11__HDI_ed1780
4,ed1780,7,HDI,12,12__HDI_ed1780
5,ed1780,7,HDI,13,13__HDI_ed1780
6,ed1780,7,HDI,14,14__HDI_ed1780
7,ed1780,7,HDI,15,15__HDI_ed1780
8,ed1780,7,HDI,16,16__HDI_ed1780
9,ed1780,7,HDI,17,17__HDI_ed1780


In [104]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "ed1780"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "ed1780"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4422,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4423,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4424,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4425,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [105]:
# Scaling

sts = StandardScaler(with_mean=False).fit(rest_X)
rest_scaled_X = sts.transform(rest_X)
problems_scaled_X = sts.transform(problems_X)

In [106]:
# Verification
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [107]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [108]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

04/02/2025 03:43:49 [ruzicka:INFO] Fitting on 4427 documents...


In [109]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

04/02/2025 03:43:50 [ruzicka:INFO] Predicting on 43 documents


Testing against Baudeau


04/02/2025 03:44:04 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.778 0.948 0.942 0.88  0.645 0.889 0.541 0.704 0.865 0.669 0.83  0.5
 0.538 0.67  0.521 0.595 0.92  0.336 0.789 0.903 0.989 0.81  0.972 0.811
 0.705 0.435 0.505 0.61  0.902 0.546 0.74  0.818 0.888 0.938 0.496 0.815
 0.721 0.33  0.609 0.515 0.707 0.53  0.847]
Testing against Chastellux


04/02/2025 03:44:21 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.954 0.99  0.991 0.949 0.989 0.993 0.984 0.993 0.966 0.982 0.992 0.97
 0.991 0.992 0.879 0.883 0.958 0.945 0.949 0.994 0.997 0.985 0.999 0.994
 0.972 0.955 0.912 0.929 0.927 0.884 0.87  0.881 0.945 0.992 0.955 0.85
 0.837 0.945 0.981 0.925 0.957 0.975 0.955]
Testing against Condorcet


04/02/2025 03:45:14 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.991 1.    0.991 0.995 0.984 0.995 0.985 0.994 0.998 0.985 0.995 0.997
 0.981 0.985 1.    0.999 0.988 0.99  0.999 0.998 0.997 0.998 0.999 0.998
 1.    0.998 1.    0.992 0.996 0.999 0.992 0.983 1.    0.984 0.998 0.999
 1.    0.998 0.973 0.995 0.986 0.996 0.967]
Testing against Deleyre


04/02/2025 03:45:36 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.98  0.758 0.975 0.975 0.845 0.952 0.956 0.885 0.953 0.933 0.634 0.861
 0.936 0.957 0.89  0.749 0.953 0.872 0.889 0.944 0.941 0.807 0.937 0.913
 0.972 0.895 0.851 0.803 0.98  0.936 0.936 0.953 0.908 0.879 0.817 0.971
 0.642 0.814 0.959 0.942 0.971 0.953 0.988]
Testing against dHolbach


04/02/2025 03:46:20 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.995 0.976 0.963 0.997 0.993 0.967 0.984 0.943 0.989 0.989 0.995 0.961
 0.95  0.958 1.    1.    0.991 0.998 0.991 0.944 0.976 0.985 0.978 0.893
 0.918 0.976 0.991 0.988 0.996 0.961 0.993 0.991 0.98  0.997 0.996 0.797
 0.96  0.989 0.953 0.964 0.997 0.991 0.988]
Testing against Diderot


04/02/2025 03:46:56 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.897 0.934 0.927 0.943 0.832 0.98  0.99  0.98  0.968 0.958 0.981 0.998
 0.986 0.994 0.902 0.962 0.959 0.996 0.99  0.947 0.786 0.864 0.503 0.993
 0.807 0.994 0.963 0.953 0.914 0.993 0.672 0.636 0.856 0.807 0.956 0.998
 0.972 0.997 0.999 0.994 0.942 0.965 0.96 ]
Testing against Guibert


04/02/2025 03:47:14 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.941 0.967 0.919 0.86  0.791 0.882 0.719 0.827 0.756 0.994 0.765 0.699
 0.989 0.743 0.628 0.807 0.826 0.64  0.713 0.815 0.9   0.85  0.924 0.975
 0.874 0.858 0.69  0.779 0.949 0.646 0.951 0.863 0.818 0.831 0.918 0.898
 0.962 0.907 0.773 0.902 0.805 0.834 0.85 ]
Testing against HDI


04/02/2025 03:47:18 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.993 0.956 0.985 0.985 0.991 0.966 0.998 1.    0.994 0.987 0.908 0.805
 0.895 0.964 0.925 0.85  0.986 0.988 0.783 0.822 0.994 0.999 0.986 0.94
 0.993 0.966 0.901 0.998 0.839 0.865 0.999 0.999 0.982 0.994 0.987 0.779
 0.862 0.832 0.576 0.9   0.996 0.991 0.998]
Testing against Jaucourt


04/02/2025 03:47:34 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.921 0.558 0.762 0.697 0.624 0.645 0.809 0.519 0.851 0.693 0.8   0.864
 0.913 0.683 0.659 0.926 0.907 0.841 0.872 0.75  0.576 0.656 0.432 0.802
 0.7   0.837 0.745 0.434 0.905 0.935 0.66  0.738 0.541 0.467 0.647 0.848
 0.931 0.843 0.967 0.907 0.843 0.893 0.871]
Testing against Jussieu


04/02/2025 03:47:46 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.728 0.51  0.54  0.332 0.329 0.231 0.276 0.445 0.126 0.499 0.262 0.275
 0.389 0.371 0.123 0.418 0.369 0.12  0.038 0.044 0.282 0.222 0.066 0.474
 0.657 0.192 0.135 0.09  0.735 0.163 0.396 0.383 0.398 0.16  0.056 0.409
 0.322 0.55  0.533 0.732 0.807 0.791 0.955]
Testing against La Grange


04/02/2025 03:47:56 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.039 0.138 0.013 0.02  0.035 0.013 0.03  0.047 0.091 0.004 0.017 0.033
 0.01  0.048 0.012 0.003 0.001 0.038 0.008 0.065 0.014 0.021 0.003 0.129
 0.068 0.018 0.017 0.001 0.012 0.058 0.005 0.012 0.002 0.002 0.003 0.028
 0.015 0.016 0.274 0.038 0.002 0.054 0.059]
Testing against Marmontel


04/02/2025 03:48:27 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.393 0.461 0.503 0.518 0.414 0.888 0.595 0.717 0.759 0.333 0.285 0.801
 0.841 0.807 0.188 0.67  0.31  0.714 0.566 0.62  0.255 0.56  0.114 0.797
 0.55  0.751 0.713 0.51  0.439 0.687 0.26  0.124 0.117 0.309 0.545 0.479
 0.407 0.816 0.93  0.841 0.385 0.64  0.21 ]
Testing against Meister


04/02/2025 03:48:32 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.009 0.061 0.021 0.    0.012 0.03  0.045 0.101 0.07  0.048 0.04  0.231
 0.143 0.085 0.005 0.35  0.04  0.825 0.684 0.052 0.005 0.107 0.02  0.268
 0.008 0.09  0.024 0.041 0.    0.023 0.    0.    0.018 0.002 0.061 0.06
 0.234 0.724 0.277 0.156 0.01  0.054 0.011]
Testing against Morellet


04/02/2025 03:48:43 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.452 0.808 0.684 0.449 0.639 0.677 0.632 0.281 0.934 0.764 0.885 0.88
 0.626 0.773 0.553 0.486 0.499 0.74  0.923 0.88  0.752 0.797 0.559 0.894
 0.879 0.435 0.872 0.788 0.374 0.712 0.763 0.462 0.798 0.737 0.763 0.964
 0.952 0.742 0.804 0.634 0.382 0.537 0.182]
Testing against Naigeon


04/02/2025 03:48:53 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.386 0.86  0.641 0.382 0.751 0.802 0.385 0.262 0.29  0.308 0.745 0.811
 0.49  0.636 0.478 0.816 0.242 0.933 0.968 0.906 0.161 0.418 0.073 0.762
 0.087 0.384 0.698 0.521 0.134 0.502 0.172 0.038 0.297 0.41  0.395 0.688
 0.477 0.742 0.863 0.555 0.305 0.376 0.086]
Testing against Pechmeja


04/02/2025 03:49:00 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.233 0.404 0.56  0.299 0.575 0.655 0.273 0.055 0.316 0.233 0.179 0.236
 0.451 0.178 0.104 0.103 0.213 0.252 0.196 0.608 0.369 0.449 0.428 0.359
 0.354 0.411 0.088 0.037 0.134 0.098 0.242 0.219 0.485 0.363 0.275 0.14
 0.479 0.197 0.069 0.122 0.243 0.03  0.601]
Testing against Raynal


04/02/2025 03:49:22 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.927 0.702 0.79  0.911 0.725 0.951 0.949 0.662 0.713 0.957 0.697 0.867
 0.967 0.986 0.911 0.939 0.981 0.632 0.699 0.919 0.538 0.884 0.931 0.926
 0.889 0.954 0.939 0.756 0.931 0.891 0.979 0.992 0.859 0.983 0.772 0.879
 0.86  0.754 0.901 0.968 0.808 0.932 0.726]
Testing against Rivière


04/02/2025 03:49:34 [ruzicka:INFO] Predicting on 43 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.165 0.622 0.573 0.061 0.253 0.084 0.203 0.014 0.284 0.35  0.686 0.382
 0.23  0.329 0.09  0.216 0.211 0.751 0.804 0.871 0.602 0.756 0.449 0.208
 0.29  0.105 0.436 0.434 0.021 0.288 0.185 0.174 0.395 0.509 0.221 0.516
 0.523 0.418 0.236 0.133 0.19  0.176 0.093]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.951 0.955 0.927 0.967 0.963 0.976 0.905 0.941 0.796 0.91  0.85  0.857
 0.954 0.922 0.858 0.833 0.866 0.841 0.927 0.979 0.953 0.907 0.914 0.881
 0.577 0.955 0.905 0.932 0.976 0.739 0.924 0.971 0.858 0.82  0.881 0.731
 0.619 0.929 0.723 0.923 0.973 0.962 0.93 ]


# nb change path

In [110]:
authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '06_tests/editions/mfn500/ed1780_vs_' + a + '.csv'

    x.to_csv(fh)

04/02/2025 03:50:17 [ruzicka:INFO] Predicting on 43 documents


Baudeau


04/02/2025 03:50:31 [ruzicka:INFO] Predicting on 43 documents


Chastellux


04/02/2025 03:50:47 [ruzicka:INFO] Predicting on 43 documents


Condorcet


04/02/2025 03:51:39 [ruzicka:INFO] Predicting on 43 documents


Deleyre


04/02/2025 03:52:01 [ruzicka:INFO] Predicting on 43 documents


dHolbach


04/02/2025 03:52:46 [ruzicka:INFO] Predicting on 43 documents


Diderot


04/02/2025 03:53:21 [ruzicka:INFO] Predicting on 43 documents


Guibert


04/02/2025 03:53:42 [ruzicka:INFO] Predicting on 43 documents


HDI


04/02/2025 03:53:46 [ruzicka:INFO] Predicting on 43 documents


Jaucourt


04/02/2025 03:54:02 [ruzicka:INFO] Predicting on 43 documents


Jussieu


04/02/2025 03:54:14 [ruzicka:INFO] Predicting on 43 documents


La Grange


04/02/2025 03:54:24 [ruzicka:INFO] Predicting on 43 documents


Marmontel


04/02/2025 03:54:55 [ruzicka:INFO] Predicting on 43 documents


Meister


04/02/2025 03:55:00 [ruzicka:INFO] Predicting on 43 documents


Morellet


04/02/2025 03:55:12 [ruzicka:INFO] Predicting on 43 documents


Naigeon


04/02/2025 03:55:22 [ruzicka:INFO] Predicting on 43 documents


Pechmeja


04/02/2025 03:55:29 [ruzicka:INFO] Predicting on 43 documents


Raynal


04/02/2025 03:55:50 [ruzicka:INFO] Predicting on 43 documents


Rivière


04/02/2025 03:56:03 [ruzicka:INFO] Predicting on 43 documents


Saint-Lambert


## Other groups of fragments
(The very same code, just for paths simplicity)

In [26]:
# load preprocessed data

corpus = pd.read_csv('06_tests/other_tests/sauvage_2k_500mfn_rfreq.csv') # rel freq

corpus

Unnamed: 0,work,author,chunk_num,tag,_de_,les_,des_,_des,oit_,_et_,...,se_d,i_le,nt_c,ns_s,_à_c,nt_à,re_a,t_da,et_p,ne_l
0,Avis,Baudeau,0,0__Baudeau_Avis,0.637944,0.554734,0.425296,0.425296,0.110947,0.462278,...,0.064719,0.027737,0.064719,0.018491,0.018491,0.046228,0.027737,0.027737,0.036982,0.009246
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens,0.777385,0.609541,0.521201,0.521201,0.114841,0.494700,...,0.017668,0.053004,0.026502,0.008834,0.026502,0.000000,0.017668,0.035336,0.035336,0.070671
2,Explication,Baudeau,0,0__Baudeau_Explication,0.856419,1.127327,0.480643,0.480643,0.061173,0.664161,...,0.026217,0.017478,0.043695,0.026217,0.043695,0.026217,0.008739,0.026217,0.078651,0.000000
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration,0.962696,0.386797,0.378202,0.395393,0.189101,0.378202,...,0.068764,0.000000,0.034382,0.060168,0.042977,0.051573,0.017191,0.060168,0.017191,0.008595
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance,0.852470,0.748086,0.382742,0.356646,0.226166,0.434934,...,0.008699,0.034795,0.052192,0.017397,0.017397,0.043493,0.043493,0.026096,0.034795,0.052192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4466,elements T3,Marmontel,96,96__Marmontel_elements T3,0.772668,0.297896,0.223422,0.214113,0.074474,0.623720,...,0.037237,0.027928,0.037237,0.027928,0.018619,0.027928,0.055856,0.121020,0.065165,0.065165
4467,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19,0.988514,0.913199,0.451892,0.348334,0.169460,0.141216,...,0.000000,0.160045,0.065901,0.028243,0.009414,0.000000,0.018829,0.037658,0.000000,0.018829
4468,elements T3,Marmontel,97,97__Marmontel_elements T3,0.827950,0.373032,0.191065,0.236557,0.136475,0.564098,...,0.027295,0.009098,0.045492,0.000000,0.009098,0.009098,0.027295,0.063688,0.036393,0.072787
4469,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19,0.916573,1.038159,0.617284,0.561167,0.112233,0.140292,...,0.018706,0.046764,0.056117,0.037411,0.046764,0.009353,0.037411,0.028058,0.000000,0.056117


In [27]:
raw_df = corpus.iloc[:, :4] # leaves metadata
X = corpus.iloc[:, 4:] # leaves only word columns

raw_df

Unnamed: 0,work,author,chunk_num,tag
0,Avis,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...
4466,elements T3,Marmontel,96,96__Marmontel_elements T3
4467,Histoire Generale T19,Deleyre,97,97__Deleyre_Histoire Generale T19
4468,elements T3,Marmontel,97,97__Marmontel_elements T3
4469,Histoire Generale T19,Deleyre,98,98__Deleyre_Histoire Generale T19


In [28]:
# numer of rows and columns in the X (word frequencies subset)
X.shape

(4471, 500)

In [29]:
# create numeric author labels
labels, label_uniques = raw_df.author.factorize()
raw_df.insert(1, "author_label", labels)
raw_df

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4466,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4467,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4468,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4469,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [30]:
# select an "unknown" work from the following:
# pencil-pensees-detachees
# ink-melanges
# fr-sauvage

problems = raw_df[raw_df.work == "fr-sauvage"].reset_index(drop=True).copy()
problems_X = X[raw_df.work == "fr-sauvage"].reset_index(drop=True).copy()
problems

Unnamed: 0,work,author_label,author,chunk_num,tag
0,fr-sauvage,7,HDI,0,0__HDI_fr-sauvage
1,fr-sauvage,7,HDI,1,1__HDI_fr-sauvage
2,fr-sauvage,7,HDI,2,2__HDI_fr-sauvage
3,fr-sauvage,7,HDI,3,3__HDI_fr-sauvage


In [31]:
# extract the rest of the corpus

rest = raw_df[raw_df.work != "fr-sauvage"].reset_index(drop=True).copy()
rest_X = X[raw_df.work != "fr-sauvage"].reset_index(drop = True).copy()
rest

Unnamed: 0,work,author_label,author,chunk_num,tag
0,Avis,0,Baudeau,0,0__Baudeau_Avis
1,Eclaircissemens,0,Baudeau,0,0__Baudeau_Eclaircissemens
2,Explication,0,Baudeau,0,0__Baudeau_Explication
3,Idees sur l-administration,0,Baudeau,0,0__Baudeau_Idees sur l-administration
4,Idees sur la puissance,0,Baudeau,0,0__Baudeau_Idees sur la puissance
...,...,...,...,...,...
4462,elements T3,11,Marmontel,96,96__Marmontel_elements T3
4463,Histoire Generale T19,3,Deleyre,97,97__Deleyre_Histoire Generale T19
4464,elements T3,11,Marmontel,97,97__Marmontel_elements T3
4465,Histoire Generale T19,3,Deleyre,98,98__Deleyre_Histoire Generale T19


In [32]:
# Scaling

sts = StandardScaler(with_mean=False).fit(rest_X)
rest_scaled_X = sts.transform(rest_X)
problems_scaled_X = sts.transform(problems_X)

In [33]:
# Verification
label_uniques.values

array(['Baudeau', 'Chastellux', 'Condorcet', 'Deleyre', 'dHolbach',
       'Diderot', 'Guibert', 'HDI', 'Jaucourt', 'Jussieu', 'La Grange',
       'Marmontel', 'Meister', 'Morellet', 'Naigeon', 'Pechmeja',
       'Raynal', 'Rivière', 'Saint-Lambert'], dtype=object)

In [34]:
# set verifier
rng = np.random.default_rng(42)

bdi_mm = BDIVerifier(
    metric='minmax', nb_bootstrap_iter=1000, rnd_prop=0.35, random_state=rng
)

In [35]:
# fit 
bdi_mm.fit(rest_scaled_X, rest.author_label)

04/03/2025 09:12:34 [ruzicka:INFO] Fitting on 4467 documents...


In [36]:
for label in label_uniques.values:
    print(f"Testing against {label}")
    code = label_uniques.get_loc(label)
    print(
        f"Bootstrap Match Strength (one per chunk, 0-1.0): {bdi_mm.predict_proba(problems_scaled_X, [code] * problems_scaled_X.shape[0])}"
    )

04/03/2025 09:12:39 [ruzicka:INFO] Predicting on 4 documents


Testing against Baudeau


04/03/2025 09:12:40 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.719 0.914 0.902 0.752]
Testing against Chastellux


04/03/2025 09:12:41 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.895 0.953 0.809 0.865]
Testing against Condorcet


04/03/2025 09:12:46 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.997 0.923 0.99  1.   ]
Testing against Deleyre


04/03/2025 09:12:48 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.877 0.767 0.915 0.808]
Testing against dHolbach


04/03/2025 09:12:52 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.947 0.913 0.824 0.782]
Testing against Diderot


04/03/2025 09:12:56 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.724 0.668 0.978 0.962]
Testing against Guibert


04/03/2025 09:12:57 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.645 0.75  0.834 0.883]
Testing against HDI


04/03/2025 09:12:58 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [1. 1. 1. 1.]
Testing against Jaucourt


04/03/2025 09:12:59 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.466 0.472 0.802 0.833]
Testing against Jussieu


04/03/2025 09:13:00 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.293 0.191 0.214 0.259]
Testing against La Grange


04/03/2025 09:13:01 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.005 0.003 0.005 0.034]
Testing against Marmontel


04/03/2025 09:13:04 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.131 0.29  0.348 0.427]
Testing against Meister


04/03/2025 09:13:05 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.018 0.007 0.071 0.173]
Testing against Morellet


04/03/2025 09:13:06 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.783 0.779 0.916 0.955]
Testing against Naigeon


04/03/2025 09:13:07 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.233 0.488 0.522 0.549]
Testing against Pechmeja


04/03/2025 09:13:07 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.59  0.328 0.075 0.472]
Testing against Raynal


04/03/2025 09:13:09 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.742 0.941 0.928 0.63 ]
Testing against Rivière


04/03/2025 09:13:10 [ruzicka:INFO] Predicting on 4 documents


Bootstrap Match Strength (one per chunk, 0-1.0): [0.398 0.39  0.513 0.566]
Testing against Saint-Lambert
Bootstrap Match Strength (one per chunk, 0-1.0): [0.681 0.766 0.763 0.483]


In [37]:
authors = label_uniques.values

fh = ''

for a in authors:

    print(a)
    
    bdi_mm.predict_proba(
        problems_scaled_X, [label_uniques.get_loc(a)] * problems_scaled_X.shape[0]
    )

    
    x = pd.DataFrame(dict(zip(problems.tag, bdi_mm._dist_arrays)))
    
    fh = '06_tests/other_tests/mfn500/sauvage_vs_' + a + '.csv'

    x.to_csv(fh)

04/03/2025 09:13:25 [ruzicka:INFO] Predicting on 4 documents


Baudeau


04/03/2025 09:13:26 [ruzicka:INFO] Predicting on 4 documents


Chastellux


04/03/2025 09:13:28 [ruzicka:INFO] Predicting on 4 documents


Condorcet


04/03/2025 09:13:33 [ruzicka:INFO] Predicting on 4 documents


Deleyre


04/03/2025 09:13:35 [ruzicka:INFO] Predicting on 4 documents


dHolbach


04/03/2025 09:13:39 [ruzicka:INFO] Predicting on 4 documents


Diderot


04/03/2025 09:13:42 [ruzicka:INFO] Predicting on 4 documents


Guibert


04/03/2025 09:13:44 [ruzicka:INFO] Predicting on 4 documents


HDI


04/03/2025 09:13:44 [ruzicka:INFO] Predicting on 4 documents


Jaucourt


04/03/2025 09:13:46 [ruzicka:INFO] Predicting on 4 documents


Jussieu


04/03/2025 09:13:47 [ruzicka:INFO] Predicting on 4 documents


La Grange


04/03/2025 09:13:48 [ruzicka:INFO] Predicting on 4 documents


Marmontel


04/03/2025 09:13:51 [ruzicka:INFO] Predicting on 4 documents


Meister


04/03/2025 09:13:51 [ruzicka:INFO] Predicting on 4 documents


Morellet


04/03/2025 09:13:52 [ruzicka:INFO] Predicting on 4 documents


Naigeon


04/03/2025 09:13:53 [ruzicka:INFO] Predicting on 4 documents


Pechmeja


04/03/2025 09:13:54 [ruzicka:INFO] Predicting on 4 documents


Raynal


04/03/2025 09:13:56 [ruzicka:INFO] Predicting on 4 documents


Rivière


04/03/2025 09:13:57 [ruzicka:INFO] Predicting on 4 documents


Saint-Lambert
