# Imports

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import gensim
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
from sklearn import utils
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import multiprocessing


In [2]:
from google.colab import drive
import glob

drive.mount('/content/drive')

Mounted at /content/drive


# Load and preprocess data

In [3]:
# CHANGE TO YOUR PATH
colab_resources_path = "/content/drive/My Drive/projectml"

In [4]:
data_files = glob.glob(colab_resources_path + "/*.csv")
data_files += glob.glob(colab_resources_path + "/*.py")
for data_file in data_files:
  print('Copying file {} to colab root.'.format(data_file))
  !cp "$data_file" .

Copying file /content/drive/My Drive/projectml/random.csv to colab root.
Copying file /content/drive/My Drive/projectml/nam.csv to colab root.
Copying file /content/drive/My Drive/projectml/am.csv to colab root.
Copying file /content/drive/My Drive/projectml/am_additional.csv to colab root.
Copying file /content/drive/My Drive/projectml/missclassified_roberta_mcc.csv to colab root.
Copying file /content/drive/My Drive/projectml/miss_5models.csv to colab root.
Copying file /content/drive/My Drive/projectml/missclassified_lr_robertas_ens_mcc.csv to colab root.
Copying file /content/drive/My Drive/projectml/data_preprocess.py to colab root.


In [5]:
from data_preprocess import getTrainData

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Load full data

In [6]:
train = getTrainData(include_random=True, no_title=False, n_sentences=-1)

Count amount of words in data

In [7]:
train['text'].apply(lambda x: len(x.split(' '))).sum()

1207120

Function `tokenize_text` tokenizes texts, removes stopwords, punctuation, digits and single letter words.

Arguments:


*   `text` - text from train data

Returns:


*   list of `TaggedDocument` objects consisting of 2 parts: tokens and tags





In [8]:
stemmer=SnowballStemmer('english')
s=stopwords.words('english')
def tokenize_text(text):
    tokens = []
    for sent in nltk.sent_tokenize(text):
        sent=re.sub('[^a-zA-Z]',' ',sent.lower())
        for word in nltk.word_tokenize(sent):
            if len(word) < 2 or word in s:
                continue
            tokens.append(stemmer.stem(word))
    return tokens

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Apply `tokenize_text` to data 

In [None]:
train_tagged = train.apply(
    lambda r: TaggedDocument(words=tokenize_text(r['text']), tags=[r.label]), axis=1)

Example of tokenized text

In [9]:
train_tagged[1]

TaggedDocument(words=['briberi', 'act', 'guidanc', 'detail', 'briberi', 'act', 'creat', 'new', 'offenc', 'section', 'commit', 'commerci', 'organis', 'fail', 'prevent', 'person', 'associ', 'bribe', 'anoth', 'person', 'behalf', 'organis', 'prove', 'adequ', 'procedur', 'place', 'prevent', 'person', 'associ', 'bribe', 'defenc', 'section', 'offenc', 'guidanc', 'publish', 'section', 'act', 'help', 'commerci', 'organis', 'size', 'sector', 'understand', 'sort', 'procedur', 'put', 'place', 'prevent', 'briberi', 'mention', 'section', 'quick', 'start', 'guid', 'also', 'publish', 'set', 'key', 'point', 'report', 'assess', 'impact', 'awar', 'briberi', 'act', 'among', 'small', 'medium', 'size', 'enterpris', 'export', 'good', 'oversea', 'also', 'avail', 'citi', 'london', 'polic', 'commiss', 'short', 'crime', 'prevent', 'video', 'design', 'use', 'compani', 'part', 'ongo', 'intern', 'train', 'staff', 'demonstr', 'impact', 'corrupt', 'investig', 'could', 'compani', 'individu', 'ignor', 'act', 'break', '

Split data on train and test

In [10]:
tr_tagged, ts_tagged = train_test_split(train_tagged, test_size=0.2, random_state=0)

# Model 1

In [12]:
cores = multiprocessing.cpu_count()

Create Doc2Vec model with distributed bag of words and train it

In [13]:
model_dbow = Doc2Vec(dm=0, vector_size=300, negative=5, hs=0, min_count=2, sample = 1e-6, workers=cores)
model_dbow.build_vocab([x for x in tqdm(tr_tagged.values)])

for epoch in range(20):
    model_dbow.train(utils.shuffle([x for x in tqdm(tr_tagged.values)]), total_examples=len(tr_tagged.values), epochs=1)
    model_dbow.alpha -= 0.002
    model_dbow.min_alpha = model_dbow.alpha

100%|██████████| 1276/1276 [00:00<00:00, 402824.92it/s]
100%|██████████| 1276/1276 [00:00<00:00, 796774.14it/s]
100%|██████████| 1276/1276 [00:00<00:00, 906953.38it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1714813.17it/s]
100%|██████████| 1276/1276 [00:00<00:00, 293047.80it/s]
100%|██████████| 1276/1276 [00:00<00:00, 827447.73it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1293361.99it/s]
100%|██████████| 1276/1276 [00:00<00:00, 301211.84it/s]
100%|██████████| 1276/1276 [00:00<00:00, 252057.26it/s]
100%|██████████| 1276/1276 [00:00<00:00, 352193.47it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1277310.72it/s]
100%|██████████| 1276/1276 [00:00<00:00, 288856.43it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1768064.72it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1284669.20it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1215519.40it/s]
100%|██████████| 1276/1276 [00:00<00:00, 401074.03it/s]
100%|██████████| 1276/1276 [00:00<00:00, 956727.19it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1

Function `vec_for_learning` gets vectors from tokenized documents.

Arguments:


*   `model` - trained Doc2Vec model
*   `tagged_docs` - tokenized documents


Returns:


*   zip object of lists with labels and Doc2Vec vectors

In [11]:
def vec_for_learning(model, tagged_docs):
    sents = tagged_docs.values
    targets, regressors = zip(*[(doc.tags[0], model.infer_vector(doc.words, steps=20)) for doc in sents])
    return targets, regressors

Get train vectors from tokenized train and test data, train Logistic regression on train data and predict test data.

In [None]:
y_train_1, x_train_1 = vec_for_learning(model_dbow, tr_tagged)
y_test_1, x_test_1 = vec_for_learning(model_dbow, ts_tagged)

lr1 = LogisticRegression(solver='liblinear', penalty='l2', C=15, max_iter=1000, random_state=0)

lr1.fit(x_train_1, y_train_1)
predictions = lr1.predict(x_test_1)

In [14]:
print('Accuracy score: ', accuracy_score(y_test_1, predictions))
print('Precision score: ', precision_score(y_test_1, predictions))
print('Recall score: ', recall_score(y_test_1, predictions))
print('F1 score: ',f1_score(y_test_1, predictions))
print('MCC score: ', matthews_corrcoef(y_test_1, predictions))


Accuracy score:  0.6959247648902821
Precision score:  0.7090909090909091
Recall score:  0.7048192771084337
F1 score:  0.7069486404833837
MCC score:  0.39100142800519605


# Model 2

Create Doc2Vec model with distributed memory and train it

In [15]:
model_dbow2 = Doc2Vec(dm=1, vector_size=300, negative=5, hs=0, min_count=2, sample = 1e-6, workers=cores)
model_dbow2.build_vocab([x for x in tqdm(tr_tagged.values)])

for epoch in range(20):
    model_dbow2.train(utils.shuffle([x for x in tqdm(tr_tagged.values)]), total_examples=len(tr_tagged.values), epochs=1)
    model_dbow2.alpha -= 0.002
    model_dbow2.min_alpha = model_dbow2.alpha

100%|██████████| 1276/1276 [00:00<00:00, 150094.85it/s]
100%|██████████| 1276/1276 [00:00<00:00, 501117.22it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1341672.58it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1148483.24it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1752433.50it/s]
100%|██████████| 1276/1276 [00:00<00:00, 493265.61it/s]
100%|██████████| 1276/1276 [00:00<00:00, 287769.22it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1806864.25it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1773925.06it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1540567.62it/s]
100%|██████████| 1276/1276 [00:00<00:00, 726967.12it/s]
100%|██████████| 1276/1276 [00:00<00:00, 681774.76it/s]
100%|██████████| 1276/1276 [00:00<00:00, 430392.59it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1068676.50it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1081854.03it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1803211.56it/s]
100%|██████████| 1276/1276 [00:00<00:00, 1353548.79it/s]
100%|██████████| 1276/1276 [00:00<00:0

CPU times: user 17.3 s, sys: 569 ms, total: 17.8 s
Wall time: 12.9 s


Get train vectors from tokenized train and test data, train Logistic regression on train data and predict test data.

In [16]:
y_train_2, x_train_2 = vec_for_learning(model_dbow2, tr_tagged)
y_test_2, x_test_2 = vec_for_learning(model_dbow2, ts_tagged)

lr2 = LogisticRegression(solver='liblinear', penalty='l2', C=15, max_iter=1000, random_state=0)

lr2.fit(x_train_2, y_train_2)
predictions = lr2.predict(x_test_2)

print('Accuracy score: ', accuracy_score(y_test_2, predictions))
print('Precision score: ', precision_score(y_test_2, predictions))
print('Recall score: ', recall_score(y_test_2, predictions))
print('F1 score: ',f1_score(y_test_2, predictions))
print('MCC score: ', matthews_corrcoef(y_test_2, predictions))


Accuracy score:  0.8683385579937304
Precision score:  0.8780487804878049
Recall score:  0.8674698795180723
F1 score:  0.8727272727272728
MCC score:  0.7364320419331226


# Model 3

Get train/test vectors by concatenating train/test vectors from Doc2Vec with distributed memory and Doc2Vec with distributed bag of words. Train Logistic regression on train data and predict test data.

1.   List item
2.   List item



In [17]:
x_train_3 = np.concatenate([x_train_1, x_train_2], axis=1)
x_test_3 =  np.concatenate([x_test_1, x_test_2], axis=1)

lr2 = LogisticRegression(solver='liblinear', penalty='l2', C=15, max_iter=1000, random_state=0)

lr2.fit(x_train_3, y_train_2)
predictions = lr2.predict(x_test_3)

print('Accuracy score: ', accuracy_score(y_test_2, predictions))
print('Precision score: ', precision_score(y_test_2, predictions))
print('Recall score: ', recall_score(y_test_2, predictions))
print('F1 score: ',f1_score(y_test_2, predictions))
print('MCC score: ', matthews_corrcoef(y_test_2, predictions))

Accuracy score:  0.8714733542319749
Precision score:  0.8980891719745223
Recall score:  0.8493975903614458
F1 score:  0.8730650154798762
MCC score:  0.744295118983762


# Cross-validation for model 2

Do 6-fold cross-validation with Doc2Vec model with distributed memory. 

In [41]:
n = 6
kf = KFold(n_splits=n, shuffle=True, random_state=0)

def cross_validation_for_2(kf_, model, X):

    i = 0

    accuracy = np.zeros(n)
    precision = np.zeros(n)
    recall = np.zeros(n)
    f1 = np.zeros(n)
    mcc = np.zeros(n)
    missclassified_idxs = []

    for train_index, test_index in kf_.split(X):
        X_train, X_val = X.iloc[train_index], X.iloc[test_index]

        train_tagged = X_train.apply(
    lambda r: TaggedDocument(words=tokenize_text(r['text']), tags=[r['label']]), axis=1)
        test_tagged = X_val.apply(
    lambda r: TaggedDocument(words=tokenize_text(r['text']), tags=[r['label']]), axis=1)

        model_dbow = Doc2Vec(dm=1, vector_size=300, negative=5, hs=0, min_count=2, sample = 1e-6, workers=cores)
        model_dbow.build_vocab([x for x in tqdm(train_tagged.values)])

        for epoch in range(20):
            model_dbow.train(utils.shuffle([x for x in tqdm(train_tagged.values)]), total_examples=len(train_tagged.values), epochs=1)
            model_dbow.alpha -= 0.002
            model_dbow.min_alpha = model_dbow.alpha
            
        y_train, X_train_cv = vec_for_learning(model_dbow,train_tagged)
        y_val, X_test_cv = vec_for_learning(model_dbow,test_tagged)
        
        model.fit(X_train_cv, y_train)
        predictions = model.predict(X_test_cv)

        accuracy[i] = accuracy_score(y_val, predictions)
        precision[i] = precision_score(y_val, predictions)
        recall[i] = recall_score(y_val, predictions)
        f1[i] = f1_score(y_val, predictions)
        mcc[i] = matthews_corrcoef(y_val, predictions)
        
        i += 1
    return np.mean(accuracy), np.mean(precision), np.mean(recall), np.mean(f1), np.mean(mcc), missclassified_idxs

In [20]:
lr = LogisticRegression(solver='liblinear', penalty='l2', C=15, max_iter=1000, random_state=0)
accuracy, precision, recall, f1, mcc, missclassified_idxs = cross_validation_for_2(kf, lr, train)

100%|██████████| 1329/1329 [00:00<00:00, 929038.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1070526.22it/s]
100%|██████████| 1329/1329 [00:00<00:00, 620392.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 449027.71it/s]
100%|██████████| 1329/1329 [00:00<00:00, 382714.04it/s]
100%|██████████| 1329/1329 [00:00<00:00, 595855.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1693265.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1241753.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1834231.66it/s]
100%|██████████| 1329/1329 [00:00<00:00, 391448.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1317162.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 531638.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1952444.84it/s]
100%|██████████| 1329/1329 [00:00<00:00, 591995.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2109054.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2086163.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1371611.72it/s]
100%|██████████| 1329/1329 [00:00<00:00

Fold index:  0
Accuracy score:  0.868421052631579
Precision score:  0.855072463768116
Recall score:  0.8872180451127819
F1 score:  0.8708487084870848
MCC score:  0.7373633501503364


100%|██████████| 1329/1329 [00:00<00:00, 455522.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 682029.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1352968.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 614782.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1997216.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 419999.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 310334.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1727372.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 795523.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 391889.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1393557.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 298039.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1364226.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 666694.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1109520.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1931472.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 653791.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

Fold index:  1
Accuracy score:  0.8759398496240601
Precision score:  0.9230769230769231
Recall score:  0.8391608391608392
F1 score:  0.8791208791208791
MCC score:  0.7559071349320328


100%|██████████| 1329/1329 [00:00<00:00, 1015157.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 757162.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2135720.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2018186.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 683870.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 449679.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 800665.04it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1452757.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2138998.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 441558.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1261138.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1063581.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1063784.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 604449.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 386991.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 284951.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1410483.30it/s]
100%|██████████| 1329/1329 [00:00<00:00

Fold index:  2
Accuracy score:  0.8421052631578947
Precision score:  0.8394160583941606
Recall score:  0.8518518518518519
F1 score:  0.8455882352941176
MCC score:  0.6841450702283616


100%|██████████| 1329/1329 [00:00<00:00, 939529.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 614646.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 742339.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1728443.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 802856.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 408697.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 437160.22it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1125652.26it/s]
100%|██████████| 1329/1329 [00:00<00:00, 745018.71it/s]
100%|██████████| 1329/1329 [00:00<00:00, 736163.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 265717.90it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1138062.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 381196.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 445831.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 391833.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 311339.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1363559.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 191

Fold index:  3
Accuracy score:  0.849624060150376
Precision score:  0.8134328358208955
Recall score:  0.8790322580645161
F1 score:  0.8449612403100775
MCC score:  0.7013843945170944


100%|██████████| 1329/1329 [00:00<00:00, 777437.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 981724.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1349365.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 367233.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 454038.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2124325.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1111289.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2304353.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1397050.13it/s]
100%|██████████| 1329/1329 [00:00<00:00, 721023.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1201083.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 413058.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 301440.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2072975.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1484482.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1584488.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2103483.02it/s]
100%|██████████| 1329/1329 [00:00<00:0

Fold index:  4
Accuracy score:  0.8796992481203008
Precision score:  0.8992248062015504
Recall score:  0.8592592592592593
F1 score:  0.8787878787878787
MCC score:  0.7602806026407116


100%|██████████| 1330/1330 [00:00<00:00, 755065.55it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1900008.28it/s]
100%|██████████| 1330/1330 [00:00<00:00, 2232262.63it/s]
100%|██████████| 1330/1330 [00:00<00:00, 2109045.11it/s]
100%|██████████| 1330/1330 [00:00<00:00, 730160.25it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1210332.90it/s]
100%|██████████| 1330/1330 [00:00<00:00, 2161342.24it/s]
100%|██████████| 1330/1330 [00:00<00:00, 749184.03it/s]
100%|██████████| 1330/1330 [00:00<00:00, 575985.99it/s]
100%|██████████| 1330/1330 [00:00<00:00, 282695.20it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1042306.49it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1352017.53it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1318464.74it/s]
100%|██████████| 1330/1330 [00:00<00:00, 321004.97it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1319088.28it/s]
100%|██████████| 1330/1330 [00:00<00:00, 2110641.06it/s]
100%|██████████| 1330/1330 [00:00<00:00, 1191461.84it/s]
100%|██████████| 1330/1330 [00:00<00:

Fold index:  5
Accuracy score:  0.8566037735849057
Precision score:  0.8560606060606061
Recall score:  0.8560606060606061
F1 score:  0.8560606060606061
MCC score:  0.7132034632034632


In [21]:
print('Accuracy score: ', accuracy)
print('Precision score: ', precision)
print('Recall score: ', recall)
print('F1 score: ', f1)
print('MCC score: ', mcc)      

Accuracy score:  0.8620655412115195
Precision score:  0.8643806155537086
Recall score:  0.8620971432516424
F1 score:  0.8625612580101074
MCC score:  0.7253806692786666


# Cross-validation for model 3


Do 6-fold cross-validation with concatenated vectors from Doc2Vec model with distributed memory and vactors from Doc2Vec model with distributed bag of words.

In [37]:
n = 6
kf = KFold(n_splits=n, shuffle=True, random_state=0)
def cross_validation_for_3(kf_, model, X):
    i = 0

    accuracy = np.zeros(n)
    precision = np.zeros(n)
    recall = np.zeros(n)
    f1 = np.zeros(n)
    mcc = np.zeros(n)
    missclassified_idxs = []

    for train_index, test_index in kf_.split(X):

        X_train, X_val = X.iloc[train_index], X.iloc[test_index]

        train_tagged = X_train.apply(
    lambda r: TaggedDocument(words=tokenize_text(r['text']), tags=[r['label']]), axis=1)
        test_tagged = X_val.apply(
    lambda r: TaggedDocument(words=tokenize_text(r['text']), tags=[r['label']]), axis=1)

        model_dbow = Doc2Vec(dm=1, vector_size=300, negative=5, hs=0, min_count=2, sample = 1e-6, workers=cores)
        model_dbow.build_vocab([x for x in tqdm(train_tagged.values)])

        model_dbow2 = Doc2Vec(dm=0, vector_size=300, negative=5, hs=0, min_count=2, sample = 1e-6, workers=cores)        
        model_dbow2.build_vocab([x for x in tqdm(train_tagged.values)])

        for epoch in range(20):

            model_dbow.train(utils.shuffle([x for x in tqdm(train_tagged.values)]), total_examples=len(train_tagged.values), epochs=1)
            model_dbow.alpha -= 0.002
            model_dbow.min_alpha = model_dbow.alpha

            model_dbow2.train(utils.shuffle([x for x in tqdm(train_tagged.values)]), total_examples=len(train_tagged.values), epochs=1)
            model_dbow2.alpha -= 0.002
            model_dbow2.min_alpha = model_dbow.alpha

        y_train1, X_train_cv1 = vec_for_learning(model_dbow,train_tagged)
        y_val, X_test_cv1 = vec_for_learning(model_dbow,test_tagged)
        y_train2, X_train_cv2 = vec_for_learning(model_dbow2,train_tagged)
        y_val, X_test_cv2 = vec_for_learning(model_dbow2,test_tagged)
        
        X_train_cv = np.concatenate([X_train_cv1, X_train_cv2], axis=1)
        X_test_cv =  np.concatenate([X_test_cv1, X_test_cv2], axis=1)

        model.fit(X_train_cv, y_train1)
        predictions = model.predict(X_test_cv)

        accuracy[i] = accuracy_score(y_val, predictions)
        precision[i] = precision_score(y_val, predictions)
        recall[i] = recall_score(y_val, predictions)
        f1[i] = f1_score(y_val, predictions)
        mcc[i] = matthews_corrcoef(y_val, predictions)
        
        i += 1
    return np.mean(accuracy), np.mean(precision), np.mean(recall), np.mean(f1), np.mean(mcc), missclassified_idxs

In [38]:
lr = LogisticRegression(solver='liblinear', penalty='l2', C=15, max_iter=1000, random_state=0)
accuracy, precision, recall, f1, mcc, missclassified_idxs = cross_validation_for_3(kf, lr, train)

100%|██████████| 1329/1329 [00:00<00:00, 1183488.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 464519.17it/s]
100%|██████████| 1329/1329 [00:00<00:00, 667972.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 457091.43it/s]
100%|██████████| 1329/1329 [00:00<00:00, 395475.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 462208.13it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1461901.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 983109.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 961903.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1352311.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1284680.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1847606.90it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1881279.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 436373.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2106662.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1374995.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 845477.02it/s]
100%|██████████| 1329/1329 [00:00<00:00,

In [39]:
print('Accuracy score: ', accuracy)
print('Precision score: ', precision)
print('Recall score: ', recall)
print('F1 score: ', f1)
print('MCC score: ', mcc)      

Accuracy score:  0.8677211897668701
Precision score:  0.8838972274094824
Recall score:  0.8493790511192888
F1 score:  0.8657170722133856
MCC score:  0.7368339325852231


# Hyperparameter search

This could be used to tune hyperparameters of Logistic regression, but we decided not to tune hyperparameters, because


1.   Tuning logistic regression will not increase accuracy significantly
2.   We are not going to use Doc2Vec + LR for ensemble
3.   It takes long time to compute



In [None]:
C_range = np.arange(10, 22)

# liblinear
penalty=['none', 'l1', 'l2']
intercept_scaling = np.arange(0.8, 1.3, 0.1)
verbose = np.arange(0, 6)

# lbfgs
penalty=['none','l2']
verbose = np.arange(0, 6)

# saga
penalty=['none', 'l1', 'l2', 'elasticnet']
verbose = np.arange(0, 6)

solvers = {}
solvers['liblinear'] = {
    'penalty': ['_l1', 'l2'],
    'intercept_scaling': np.arange(0.8, 1.3, 0.1),
    'verbose': np.arange(0, 6)
}

solvers['lbfgs'] = {
    'verbose': np.arange(0, 6)
}

solvers['saga'] = {
    'penalty': ['none', 'l1', 'l2', 'elasticnet'],
    'l1_ratio': np.arange(0, 0.2, 0.02)
}

solvers['saga'] = {
    'penalty': ['elasticnet'],
    'l1_ratio': np.arange(0, 0.2, 0.02)
}

    
# Liblinear
results_array = []
mg = np.array(np.meshgrid(solvers['lbfgs']['verbose'], C_range)).T.reshape(-1,2)

for params in mg:
    verbose=int(params[0])
    C=int(params[1])

    name = f'saga-{verbose}-{C}'
    print(name)

    lr = LogisticRegression(solver='lbfgs', verbose=verbose, penalty='l2', C=C, max_iter=1000, multi_class='ovr', random_state=0)
    
    accuracy, precision, recall, f1, mcc, missclassified_idxs = cross_validation_for_2(kf, lr, train)
    
    results_array.append([name , accuracy, precision, recall, f1, mcc])
    print([name , accuracy, precision, recall, f1, mcc])
results = pd.DataFrame(results_array, columns = ['model', 'accuracy', 'precision', 'recall', 'f1', 'mcc'])
results

saga-0-10


100%|██████████| 1329/1329 [00:00<00:00, 1180230.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 950422.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 718236.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 406492.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 355998.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 666216.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1751800.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1740858.84it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1254327.19it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2158044.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1057728.66it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1213371.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1345782.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1622302.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 729706.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 606421.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1404088.17it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-0-10', 0.8601976639712489, 0.8698672112030824, 0.8492918413829562, 0.859030175336688, 0.7211909929230961]
saga-0-11


100%|██████████| 1329/1329 [00:00<00:00, 655791.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1009093.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1064190.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 416049.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1937514.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1541972.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 387880.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 372335.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1844549.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 316951.73it/s]
100%|██████████| 1329/1329 [00:00<00:00, 341600.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 636618.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 608407.55it/s]
100%|██████████| 1329/1329 [00:00<00:00, 480951.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1145311.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 353829.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1047000.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-0-11', 0.8620773632193691, 0.8612027153437785, 0.8636540977938828, 0.862336117335424, 0.7240478868027552]
saga-0-12


100%|██████████| 1329/1329 [00:00<00:00, 1014418.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 983977.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 677470.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1163722.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 404986.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 405251.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1166645.04it/s]
100%|██████████| 1329/1329 [00:00<00:00, 475616.90it/s]
100%|██████████| 1329/1329 [00:00<00:00, 758192.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 554870.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 598414.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1331954.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 371937.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 397449.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 929503.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 322079.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1160572.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 17

['saga-0-12', 0.8689766870005201, 0.8836829643203309, 0.8516065721781625, 0.8668301712321475, 0.7389891755866714]
saga-0-13


100%|██████████| 1329/1329 [00:00<00:00, 801010.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1004003.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1252073.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1217878.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1635151.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 357712.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 681779.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1853751.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1394254.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1659490.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1413702.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 286843.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 665420.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1624666.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 430110.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1378054.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 346914.99it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-0-13', 0.8821369461389322, 0.8809814813382305, 0.885101239911653, 0.8826841690178568, 0.764769045010667]
saga-0-14


100%|██████████| 1329/1329 [00:00<00:00, 679949.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 982070.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 581861.17it/s]
100%|██████████| 1329/1329 [00:00<00:00, 312684.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1271494.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1344808.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 626317.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 445083.84it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1306664.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 590928.66it/s]
100%|██████████| 1329/1329 [00:00<00:00, 393604.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 473958.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 295386.04it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1363892.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 675091.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1963448.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1973877.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1

['saga-0-14', 0.8702156334231805, 0.8747162152001726, 0.8681336348993395, 0.8700301218252657, 0.7428945560530026]
saga-0-15


100%|██████████| 1329/1329 [00:00<00:00, 900812.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1109520.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1613380.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 321670.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1296332.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 742241.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 308992.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1541545.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 344769.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 407651.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 339933.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 597068.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 769496.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 354301.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 567698.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 982070.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 308019.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 503

['saga-0-15', 0.8564359010734384, 0.8712062696611125, 0.8408846535049138, 0.8544474543677, 0.7153491124726593]
saga-0-16


100%|██████████| 1329/1329 [00:00<00:00, 1167622.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1005997.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 432681.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 624773.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1819265.67it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1635630.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1092557.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1920161.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1709362.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 404339.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 651651.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1682024.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1830016.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 403374.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 701867.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 296769.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1856220.45it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-0-16', 0.8583132359199886, 0.864059136759726, 0.8528878125793802, 0.8574918692618191, 0.7182957988692867]
saga-0-17


100%|██████████| 1329/1329 [00:00<00:00, 320118.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 665103.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1926799.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1336745.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1161056.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1182734.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 688431.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1879376.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2008731.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1352311.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2037364.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 319513.36it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1345457.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1304523.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1315297.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1272364.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 621430.33it/s]
100%|██████████| 1329/1329 [00:00<00

['saga-0-17', 0.8539036269920083, 0.860390238899817, 0.8464904569743279, 0.8530382060959542, 0.7084392851733616]
saga-0-18


100%|██████████| 1329/1329 [00:00<00:00, 622123.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 880605.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1676460.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1653583.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1657024.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1289435.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 338611.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 637783.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 470478.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1596285.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1643830.73it/s]
100%|██████████| 1329/1329 [00:00<00:00, 712452.71it/s]
100%|██████████| 1329/1329 [00:00<00:00, 678460.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1290629.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1004546.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 347520.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1630368.53it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-0-18', 0.8595781907599186, 0.8691522396591672, 0.8503927113151787, 0.8588264049382808, 0.7208480965334226]
saga-0-19


100%|██████████| 1329/1329 [00:00<00:00, 495135.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1197471.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1903117.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1614782.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 632931.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 415150.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 617917.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1219743.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1334505.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1272364.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 454779.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 715103.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 675582.36it/s]
100%|██████████| 1329/1329 [00:00<00:00, 877555.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 609471.90it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1534754.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 641748.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-0-19', 0.8664751501394997, 0.8787821727828079, 0.8531245578500813, 0.8649292159469225, 0.7348222683205433]
saga-0-20


100%|██████████| 1329/1329 [00:00<00:00, 876589.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 523156.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 389234.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 391366.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1359899.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1159848.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1767913.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1263424.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 490128.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1417657.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 480288.65it/s]
100%|██████████| 1329/1329 [00:00<00:00, 420601.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 882557.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1049168.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 503225.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1304218.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 398529.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-0-20', 0.8576866695039486, 0.868641608953585, 0.8483454623098087, 0.8569703828667654, 0.7181246444793817]
saga-0-21


100%|██████████| 1329/1329 [00:00<00:00, 751041.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 995753.84it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1680503.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 787877.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1297841.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1248987.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1854985.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1265719.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 372310.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 685468.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1156719.24it/s]
100%|██████████| 1329/1329 [00:00<00:00, 382032.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 621846.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 489010.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 387880.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 417201.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 221182.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1

['saga-0-21', 0.8620726344162293, 0.8681053934180025, 0.8566945264172202, 0.8615872568680069, 0.7255191829714089]
saga-1-10


100%|██████████| 1329/1329 [00:00<00:00, 968084.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1022419.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1283793.19it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1591727.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1254609.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1858696.24it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1586743.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 363639.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 434265.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 565681.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1971782.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 436304.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 354211.73it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1405149.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1955870.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 576266.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 414872.73it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-1-10', 0.8727100770794912, 0.891363121070356, 0.8518601580146571, 0.8701811196234113, 0.747881556975714]
saga-1-11


100%|██████████| 1329/1329 [00:00<00:00, 445653.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1137597.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1168846.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 901541.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1294225.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 866505.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 449425.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 520664.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 989918.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1286756.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 905789.73it/s]
100%|██████████| 1329/1329 [00:00<00:00, 317891.65it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1349365.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 913958.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1289435.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2012357.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 454408.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-1-11', 0.8489076464746773, 0.8564722296689974, 0.842142692977441, 0.8483758927512492, 0.6995784116025933]
saga-1-12


100%|██████████| 1329/1329 [00:00<00:00, 812806.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1022044.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 927646.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1301477.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 814945.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1914227.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1256872.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1140857.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 258568.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 769283.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 721396.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1655547.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 447945.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1581790.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1933482.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 387961.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 598157.53it/s]
100%|██████████| 1329/1329 [00:00<00:00,

['saga-1-12', 0.8639381472549297, 0.871132569447803, 0.8566117521833424, 0.8633231242596947, 0.7287028869097275]
saga-1-13


100%|██████████| 1329/1329 [00:00<00:00, 1016268.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 517377.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 291768.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 265237.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1765673.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1241476.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1190566.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 838734.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 398216.17it/s]
100%|██████████| 1329/1329 [00:00<00:00, 319623.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1719380.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 727421.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 335413.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 391586.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 514702.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 456343.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1245637.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 6

['saga-1-13', 0.8620608124083794, 0.8813716693252983, 0.8393827388734011, 0.8592742084135355, 0.7261075774653701]
saga-1-14


100%|██████████| 1329/1329 [00:00<00:00, 345068.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 951233.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 492075.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 410292.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 892162.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1515148.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 319019.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1216284.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1778631.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 353627.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1823431.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1303303.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1189549.73it/s]
100%|██████████| 1329/1329 [00:00<00:00, 331819.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1307890.67it/s]
100%|██████████| 1329/1329 [00:00<00:00, 883816.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1411197.47it/s]
100%|██████████| 1329/1329 [00:00<00:00,

['saga-1-14', 0.8764978483945712, 0.8905900561814137, 0.8608309253470542, 0.8749131106824444, 0.7542876264313291]
saga-1-15


100%|██████████| 1329/1329 [00:00<00:00, 861683.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 455559.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 596238.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1606406.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 459086.64it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1218144.67it/s]
100%|██████████| 1329/1329 [00:00<00:00, 439920.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1235971.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1316229.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1007815.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1664446.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 661315.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 966908.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1340603.66it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1342864.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1281137.67it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1284976.95it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-1-15', 0.873986853927271, 0.8705480731482945, 0.8818457083171288, 0.8755099334727584, 0.7488794401322442]
saga-1-16


100%|██████████| 1329/1329 [00:00<00:00, 634444.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1011289.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 626177.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 409749.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1304218.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 413365.22it/s]
100%|██████████| 1329/1329 [00:00<00:00, 602945.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 298982.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1038225.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 346871.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1263711.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1073205.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 484757.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1588099.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 602945.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1278199.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 406284.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-1-16', 0.8614437035986192, 0.874116786691021, 0.8447625437297197, 0.8590095213437435, 0.723274014722238]
saga-1-17


100%|██████████| 1329/1329 [00:00<00:00, 291752.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 574662.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 397053.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 415429.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 353784.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1498851.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 304769.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1305440.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1667433.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 600347.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 883956.55it/s]
100%|██████████| 1329/1329 [00:00<00:00, 728848.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 386187.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 449498.43it/s]
100%|██████████| 1329/1329 [00:00<00:00, 426980.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 362292.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 554594.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 6635

['saga-1-17', 0.8495389416938574, 0.8592711695826525, 0.8396607422215797, 0.8486891233939775, 0.7004859022613633]
saga-1-18


100%|██████████| 1329/1329 [00:00<00:00, 751750.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 631927.22it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1627987.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 319147.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1870546.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 407502.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 519354.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 432580.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1244525.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 406403.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1982300.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 535777.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 548968.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 481741.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 310593.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 608341.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 540400.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 425

['saga-1-18', 0.8520357497517379, 0.8609925034845886, 0.8393357836906223, 0.8498461954649997, 0.7041717674216184]
saga-1-19


100%|██████████| 1329/1329 [00:00<00:00, 977249.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1037259.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 382740.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 501550.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1310042.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1891493.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 480785.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1704136.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 744322.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 314289.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1590819.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1322474.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1995071.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1282906.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 824834.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1093844.19it/s]
100%|██████████| 1329/1329 [00:00<00:00, 388502.23it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-1-19', 0.8595734619567788, 0.8817384004884005, 0.8333611341392891, 0.8561402742668798, 0.721645384833537]
saga-1-20


100%|██████████| 1329/1329 [00:00<00:00, 1351328.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1027886.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 350096.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1300870.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 319623.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 422194.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1338993.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1321847.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 353829.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 443843.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 296517.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 630283.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1528023.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1424177.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1329096.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1876213.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1877477.27it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-1-20', 0.8645765356788196, 0.8786234891040744, 0.8478598368241831, 0.8626041149648905, 0.7301012793269156]
saga-1-21


100%|██████████| 1329/1329 [00:00<00:00, 890879.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 528814.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 680115.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 552944.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1530120.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1040162.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 378298.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1572864.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 279298.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 775383.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2039601.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 249071.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1273527.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1272655.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1160814.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 605302.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 390051.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-1-21', 0.8595687331536389, 0.8674105431728399, 0.8518268574578705, 0.8589727772354311, 0.7201349446028571]
saga-2-10


100%|██████████| 1329/1329 [00:00<00:00, 762861.64it/s]
100%|██████████| 1329/1329 [00:00<00:00, 837474.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1280254.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 869072.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1684566.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 337280.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 822279.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1282906.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 650966.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 639099.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 316717.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 652796.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1638034.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 340244.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 404017.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 322975.26it/s]
100%|██████████| 1329/1329 [00:00<00:00, 301065.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 815

['saga-2-10', 0.8564122570577387, 0.8595198202886837, 0.8535435942623266, 0.8560462959575234, 0.7135009615433002]
saga-2-11


100%|██████████| 1329/1329 [00:00<00:00, 137836.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 412600.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 326016.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 313687.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 356089.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 365283.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 438777.55it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1938188.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 569321.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 292426.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1942917.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1047000.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 816976.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2012357.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 358956.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 430708.55it/s]
100%|██████████| 1329/1329 [00:00<00:00, 325825.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 406

['saga-2-11', 0.8551662174303684, 0.8625102576195899, 0.8486532934042837, 0.8547650193966922, 0.7118816469421017]
saga-2-12


100%|██████████| 1329/1329 [00:00<00:00, 1134356.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 732680.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 652796.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1679490.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1920161.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1374995.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 989215.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 947675.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 900521.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 901395.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1940887.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 303640.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 361681.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 354843.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1148852.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 342818.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 533776.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 5

['saga-2-12', 0.8432401759114768, 0.8464201950239564, 0.840321074050559, 0.8430363532623342, 0.6869419315465474]
saga-2-13


100%|██████████| 1329/1329 [00:00<00:00, 867719.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 750636.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1303303.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 360744.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1703615.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1761766.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 708288.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 493906.61it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1085959.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1858696.24it/s]
100%|██████████| 1329/1329 [00:00<00:00, 307357.19it/s]
100%|██████████| 1329/1329 [00:00<00:00, 906968.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1511450.66it/s]
100%|██████████| 1329/1329 [00:00<00:00, 513469.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1800461.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 250482.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 496457.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-2-13', 0.8708587506502105, 0.8878608897774759, 0.850776354270978, 0.8685666410561956, 0.7428962374218214]
saga-2-14


100%|██████████| 1329/1329 [00:00<00:00, 667013.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 949613.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1497643.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 460109.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 523992.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1863044.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1142728.58it/s]
100%|██████████| 1329/1329 [00:00<00:00, 914407.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1376353.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1296935.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 514370.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 751243.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 906968.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 324952.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1170564.89it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1125197.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 399901.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-2-14', 0.8652149241027097, 0.8727425289091383, 0.8584213136420268, 0.864656487644662, 0.7321626127095318]
saga-2-15


100%|██████████| 1329/1329 [00:00<00:00, 1140390.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 850118.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1363559.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1258575.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 804826.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 839239.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 628719.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 300594.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 327549.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 489912.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1303608.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1703095.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1556178.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 336465.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 409177.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 558204.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 506932.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 4

['saga-2-15', 0.8457984584101764, 0.8636967163487045, 0.8268411758649451, 0.8433032173083769, 0.694925567815134]
saga-2-16


100%|██████████| 1329/1329 [00:00<00:00, 985891.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1003823.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1160814.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 273353.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 916964.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 765585.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 896899.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 863953.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 876313.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1756769.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 321614.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 487684.17it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2000082.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 407353.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1767352.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 305437.26it/s]
100%|██████████| 1329/1329 [00:00<00:00, 337893.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 10

['saga-2-16', 0.8583179647231285, 0.8749655361247477, 0.8395169938322175, 0.8561694663712522, 0.7183335032838917]
saga-2-17


100%|██████████| 1329/1329 [00:00<00:00, 760364.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 916362.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2047091.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1552710.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 909781.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1885734.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 283964.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1016638.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 848824.43it/s]
100%|██████████| 1329/1329 [00:00<00:00, 598993.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 542029.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 579261.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 884657.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 538729.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 381744.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 370553.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 551467.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 181

['saga-2-17', 0.8633281316498795, 0.8818853517140591, 0.8431877817418281, 0.8609269150255144, 0.7294396333994242]
saga-2-18


100%|██████████| 1329/1329 [00:00<00:00, 1005815.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1024674.64it/s]
100%|██████████| 1329/1329 [00:00<00:00, 676812.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 481783.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 479916.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 455783.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1254891.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 400505.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 417514.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1322474.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 778306.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1356920.65it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1328779.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 331109.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1238442.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 562769.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1348386.55it/s]
100%|██████████| 1329/1329 [00:00<00:00,

['saga-2-18', 0.8563980706483191, 0.8719996071113324, 0.8378502876946566, 0.854187818335521, 0.7136605055417203]
saga-2-19


100%|██████████| 1329/1329 [00:00<00:00, 750030.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 388393.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1589005.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1010923.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1144840.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1664943.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 539354.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 315355.85it/s]
100%|██████████| 1329/1329 [00:00<00:00, 425156.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 432144.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 595219.44it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1340926.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 531435.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1940212.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 853242.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1637071.96it/s]
100%|██████████| 1329/1329 [00:00<00:00, 690991.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

['saga-2-19', 0.8532983401900979, 0.8774218948015741, 0.824141265680598, 0.8490942212187506, 0.7093652549869615]
saga-2-20


100%|██████████| 1329/1329 [00:00<00:00, 580407.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 690307.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1228345.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1978080.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 428786.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1279960.97it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1871174.90it/s]
100%|██████████| 1329/1329 [00:00<00:00, 279887.03it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1623719.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 725055.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 340265.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1530961.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1270335.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1361893.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 351775.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1673942.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 421874.67it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-2-20', 0.8721000614744409, 0.8866099070852665, 0.8557647057505574, 0.8704872895578927, 0.7453642858061998]
saga-2-21


100%|██████████| 1329/1329 [00:00<00:00, 869479.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 828265.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1485273.12it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1896641.72it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1074032.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1274401.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 953838.13it/s]
100%|██████████| 1329/1329 [00:00<00:00, 628436.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 764116.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1868665.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1335784.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1657024.38it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1186006.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1479360.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1483691.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 654022.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1338671.95it/s]
100%|██████████| 1329/1329 [00:00<00:

['saga-2-21', 0.8639523336643496, 0.8680363698908592, 0.8608933982448413, 0.8638493317702265, 0.7289599766464344]
saga-3-10


100%|██████████| 1329/1329 [00:00<00:00, 1028076.36it/s]
100%|██████████| 1329/1329 [00:00<00:00, 991855.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1081744.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2010180.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 502001.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1135743.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 993446.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 502545.08it/s]
100%|██████████| 1329/1329 [00:00<00:00, 861683.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1282611.60it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1730052.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1332272.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 367015.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1096426.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1673942.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1754557.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1315918.32it/s]
100%|██████████| 1329/1329 [00:00<00:

['saga-3-10', 0.8495507637017071, 0.8604566838624702, 0.8382580618149378, 0.848340203200095, 0.7010428893922062]
saga-3-11


100%|██████████| 1329/1329 [00:00<00:00, 969094.23it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1004365.77it/s]
100%|██████████| 1329/1329 [00:00<00:00, 425806.28it/s]
100%|██████████| 1329/1329 [00:00<00:00, 312072.00it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1359899.98it/s]
100%|██████████| 1329/1329 [00:00<00:00, 321466.55it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1371949.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1299657.27it/s]
100%|██████████| 1329/1329 [00:00<00:00, 307272.48it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1693780.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 914557.84it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1695841.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 321373.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1513091.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2021113.13it/s]
100%|██████████| 1329/1329 [00:00<00:00, 718421.19it/s]
100%|██████████| 1329/1329 [00:00<00:00, 314697.12it/s]
100%|██████████| 1329/1329 [00:00<00:00,

['saga-3-11', 0.8670969877524, 0.8768782230429456, 0.8572345322203839, 0.866651387694778, 0.734598878568339]
saga-3-12


100%|██████████| 1329/1329 [00:00<00:00, 1110846.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 457616.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 519887.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 751243.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 549293.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 772374.95it/s]
100%|██████████| 1329/1329 [00:00<00:00, 462668.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1050354.25it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1885096.39it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1338993.52it/s]
100%|██████████| 1329/1329 [00:00<00:00, 358794.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1244525.57it/s]
100%|██████████| 1329/1329 [00:00<00:00, 342734.26it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1328779.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 415522.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 497166.43it/s]
100%|██████████| 1329/1329 [00:00<00:00, 619152.51it/s]
100%|██████████| 1329/1329 [00:00<00:00, 3

['saga-3-12', 0.8495389416938574, 0.8637984152236422, 0.8345342572705334, 0.8472272717592483, 0.702300737068013]
saga-3-13


100%|██████████| 1329/1329 [00:00<00:00, 954164.67it/s]
100%|██████████| 1329/1329 [00:00<00:00, 290264.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1836649.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 586946.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1446724.63it/s]
100%|██████████| 1329/1329 [00:00<00:00, 437331.71it/s]
100%|██████████| 1329/1329 [00:00<00:00, 330579.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 460756.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1237617.68it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1966218.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 667892.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 327375.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1085325.16it/s]
100%|██████████| 1329/1329 [00:00<00:00, 981033.09it/s]
100%|██████████| 1329/1329 [00:00<00:00, 343092.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1112176.78it/s]
100%|██████████| 1329/1329 [00:00<00:00, 350999.94it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1

['saga-3-13', 0.8677046389558803, 0.8703634741392179, 0.8666368148399841, 0.867887959054232, 0.7364231761958339]
saga-3-14


100%|██████████| 1329/1329 [00:00<00:00, 877969.76it/s]
100%|██████████| 1329/1329 [00:00<00:00, 829005.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1019241.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1603633.49it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1322160.82it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2039601.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1942240.42it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1616656.04it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1325304.33it/s]
100%|██████████| 1329/1329 [00:00<00:00, 394747.54it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1149799.92it/s]
100%|██████████| 1329/1329 [00:00<00:00, 854026.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 467088.15it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1525097.13it/s]
100%|██████████| 1329/1329 [00:00<00:00, 558596.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 542451.34it/s]
100%|██████████| 1329/1329 [00:00<00:00, 414749.26it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-3-14', 0.8651936444885799, 0.8718875771084811, 0.8575847395598387, 0.8643799158932216, 0.7306886579146156]
saga-3-15


100%|██████████| 1329/1329 [00:00<00:00, 911119.65it/s]
100%|██████████| 1329/1329 [00:00<00:00, 979309.56it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1312510.01it/s]
100%|██████████| 1329/1329 [00:00<00:00, 435793.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1809227.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 375571.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1930135.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1306358.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 806107.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1547967.24it/s]
100%|██████████| 1329/1329 [00:00<00:00, 399328.75it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1928799.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1282021.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1397400.35it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1909636.87it/s]
100%|██████████| 1329/1329 [00:00<00:00, 635457.14it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1675956.11it/s]
100%|██████████| 1329/1329 [00:00<00:0

['saga-3-15', 0.8526694093724879, 0.8587089096200301, 0.8486499718814376, 0.8529682164199729, 0.7064146474806273]
saga-3-16


100%|██████████| 1329/1329 [00:00<00:00, 982416.29it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1027318.47it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1355930.43it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1322474.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1292125.64it/s]
100%|██████████| 1329/1329 [00:00<00:00, 419430.40it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1337066.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 664785.93it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1595372.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1551413.86it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1591727.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1928799.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1228886.69it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1179232.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 518919.20it/s]
100%|██████████| 1329/1329 [00:00<00:00, 286283.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 316124.88it/s]
100%|██████████| 1329/1329 [00:00<00:

['saga-3-16', 0.8545443798174682, 0.8615166687033188, 0.8480508147033333, 0.8540242779594568, 0.7101609504819996]
saga-3-17


100%|██████████| 1329/1329 [00:00<00:00, 1124970.74it/s]
100%|██████████| 1329/1329 [00:00<00:00, 928574.05it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1042886.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1420547.91it/s]
100%|██████████| 1329/1329 [00:00<00:00, 944304.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 313581.80it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1687626.41it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1336104.99it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1487651.46it/s]
100%|██████████| 1329/1329 [00:00<00:00, 535057.59it/s]
100%|██████████| 1329/1329 [00:00<00:00, 371021.70it/s]
100%|██████████| 1329/1329 [00:00<00:00, 443525.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1945630.02it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1326250.30it/s]
100%|██████████| 1329/1329 [00:00<00:00, 405015.62it/s]
100%|██████████| 1329/1329 [00:00<00:00, 2043339.45it/s]
100%|██████████| 1329/1329 [00:00<00:00, 585159.56it/s]
100%|██████████| 1329/1329 [00:00<00:00

['saga-3-17', 0.8501678725114674, 0.8473624657289162, 0.8596559307510073, 0.8522760203361291, 0.7021849930934025]
saga-3-18


100%|██████████| 1329/1329 [00:00<00:00, 440999.21it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1701016.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 574071.06it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1105559.31it/s]
100%|██████████| 1329/1329 [00:00<00:00, 619772.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 429414.53it/s]
100%|██████████| 1329/1329 [00:00<00:00, 674274.83it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1917519.79it/s]
100%|██████████| 1329/1329 [00:00<00:00, 304919.32it/s]
100%|██████████| 1329/1329 [00:00<00:00, 892305.11it/s]
100%|██████████| 1329/1329 [00:00<00:00, 665341.37it/s]
100%|██████████| 1329/1329 [00:00<00:00, 465450.07it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1520935.88it/s]
100%|██████████| 1329/1329 [00:00<00:00, 516850.26it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1185754.10it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1889569.50it/s]
100%|██████████| 1329/1329 [00:00<00:00, 1924803.18it/s]
100%|██████████| 1329/1329 [00:00<00:00, 

In [None]:
results[results['mcc '] >= 0.80]

# Conclusion

Accuracy and MCC score are lower than accuracy and MCC of RoBERTa and logistic regression, therefore we decided not to use Doc2Vec in the ensembles.