In [None]:
from pymongo import MongoClient

import warnings
from sklearnex import patch_sklearn

patch_sklearn()
import numpy as np
import pandas as pd
from category_encoders import (
    BackwardDifferenceEncoder,
    BaseNEncoder,
    BinaryEncoder,
    CatBoostEncoder,
    CountEncoder,
    GLMMEncoder,
    HelmertEncoder,
    JamesSteinEncoder,
    LeaveOneOutEncoder,
    MEstimateEncoder,
    QuantileEncoder,
    SummaryEncoder,
    TargetEncoder,
    WOEEncoder,
)
from sklearn import set_config
from sklearn.base import clone as model_clone
from sklearn.cluster import *
from sklearn.compose import *
from sklearn.cross_decomposition import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.feature_selection import *
from sklearn.gaussian_process import *
from sklearn.linear_model import *
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.multioutput import *
from sklearn.multiclass import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.pipeline import *
from sklearn.preprocessing import *
from sklearn.svm import *
from sklearn.tree import *
from sklearn.utils import *
from sklearn.dummy import *
from sklearn.semi_supervised import *
from sklearn.discriminant_analysis import *
import sklearnex, daal4py

from tqdm import tqdm, trange
from xgboost import XGBClassifier, XGBRFClassifier
from BorutaShap import BorutaShap

from sklearn.calibration import *
pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 50
set_config(display="diagram")
warnings.filterwarnings("ignore")
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import parallel_backend
from joblib.memory import Memory

sns.set()
from pprint import pprint
from helpers import PolynomialWrapper as PWrapper
from helpers import NestedCVWrapper as NCVWrapper
from helpers import ColumnSelectors
import sklearn
from helpers import DFCollection
from helpers import plot_mean_std_max
from helpers import CustomMetrics

mem = Memory(location='../data/joblib_memory/')

In [None]:
cls_names = [
    sklearn.ensemble._weight_boosting.AdaBoostClassifier,
    sklearn.naive_bayes.BernoulliNB,
    # sklearn.naive_bayes.CategoricalNB,
    # sklearn.naive_bayes.ComplementNB,
    sklearn.tree._classes.DecisionTreeClassifier,
    sklearn.tree._classes.ExtraTreeClassifier,
    sklearn.ensemble._forest.ExtraTreesClassifier,
    sklearn.naive_bayes.GaussianNB,
    # sklearn.gaussian_process._gpc.GaussianProcessClassifier,
    sklearn.ensemble._gb.GradientBoostingClassifier,
    sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier,
    sklearn.neighbors.KNeighborsClassifier,
    sklearn.svm._classes.LinearSVC,
    sklearn.linear_model.LogisticRegression,
    # sklearn.linear_model._logistic.LogisticRegressionCV,
    # sklearn.neural_network._multilayer_perceptron.MLPClassifier,
    sklearn.naive_bayes.MultinomialNB,
    # sklearn.neighbors._nearest_centroid.NearestCentroid,
    sklearn.svm.NuSVC,
    sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier,
    sklearn.linear_model._perceptron.Perceptron,
    # sklearn.neighbors._classification.RadiusNeighborsClassifier,
    sklearn.ensemble._forest.RandomForestClassifier,
    sklearn.linear_model._ridge.RidgeClassifier,
    # sklearn.linear_model._ridge.RidgeClassifierCV,
    sklearn.linear_model._stochastic_gradient.SGDClassifier,
    sklearn.svm.SVC,
]
db = DFCollection()
column_selector = ColumnSelectors()
classifiers = [f() for f in cls_names]
dtype_info = column_selector.dtype_info
ordinal = column_selector.ordinal_cols
nominal = column_selector.nominal_cols
binary = column_selector.binary_cols
ratio = column_selector.ratio_cols


final_data = db.final_data
final_pred_data = db.final_pred_data
baseline_prediction_data = db.baseline_prediction_data
data_logit = db.data_logits
prediction_data = db.prediction_data
master_data = db.master
given_data = db.data

ordinal_data, nominal_data, binary_data, ratio_data = db.categorise_data()
nominal_categories = db.nominal_categories
ordinal_categories = db.ordinal_categories
class_labels, n_classes, class_priors = class_distribution(final_data.target.to_numpy().reshape(-1,1))

def gen_balanced_trained_test(data, p):
    Y = data.target
    X_2 = Y_2 = Y[Y == 2].index
    X_0 = Y_0 = Y[Y == 0].index
    X_1 = Y_1 = Y[Y == 1].index
    train_size = int(p * Y_2.shape[0])
    test_size = int((1 - p) * Y_2.shape[0])

    train_idx_2 = np.random.choice(Y_2, (train_size,))
    train_idx_1 = np.random.choice(Y_1, (train_size,))
    train_idx_0 = np.random.choice(Y_0, (train_size,))
    train_idx = np.r_[train_idx_0, train_idx_1, train_idx_2]
    # train_idx.shape

    test_idx_2 = np.random.choice(np.setdiff1d(Y_2, train_idx_2), (test_size,))
    test_idx_1 = np.random.choice(np.setdiff1d(Y_1, train_idx_1), (test_size,))
    test_idx_0 = np.random.choice(np.setdiff1d(Y_0, train_idx_0), (test_size,))
    test_idx = np.r_[test_idx_0, test_idx_1, test_idx_2]
    # test_idx.shape
    return train_idx, test_idx


def gen_nominal_maps(bs: pd.DataFrame = master_data) -> tuple[defaultdict, defaultdict]:
    nominal_master_db = bs.loc[:, nominal]
    nominal_cont_map = defaultdict(dict)
    nominal_indvl_map = defaultdict(dict)
    for c in nominal:
        un = sorted(nominal_master_db[c].unique().tolist())
        n = len(un)
        new_id = list(range(n))
        nominal_indvl_map[c] = dict(zip(un, new_id))
    start = 0
    for c in nominal:
        un = sorted(nominal_master_db[c].unique().tolist())
        n = len(un)
        new_id = list(range(start, start + n))
        nominal_cont_map[c] = dict(zip(un, new_id))
        start += n
    return nominal_indvl_map, nominal_cont_map


# nominal_indvl_map, nominal_cont_map = gen_nominal_maps()
# nominal_master_db = bs.loc[:, nominal]

# nominal_master_db_indvl = nominal_master_db.copy()
# nominal_master_db_cont = nominal_master_db.copy()


# nominal_indvl_map
def nm_indvl_data_trnsform(row):
    for c in nominal:
        curr = row[c]
        row[c] = nominal_indvl_map[c][curr]
    return row


# test1_nominal = nominal_master_db_indvl.apply(nm_indvl_data_trnsform, axis=1)


def nm_cont_data_trnsform(row):
    for c in nominal:
        curr = row[c]
        row[c] = nominal_cont_map[c][curr]
    return row


# test2_nominal = nominal_master_db_cont.apply(nm_cont_data_trnsform, axis=1)
# prediction_data = pd.read_pickle("../data/pred_data.pkl")
# est_ = [("cnb",CategoricalNB()),]


def wf_create(cat_encoder=TargetEncoder, model=None):
    """
    :param cat_encoder: category_encoders
    :param model: scikit-learn Model
    :return pipe: sklearn.pipeline.Pipline
    Examples of model param:

    model = ComplementNB(norm=True,fit_prior=True,)
    model = MultinomialNB()
    model = LogisticRegression(n_jobs=-1, max_iter=10000,random_state=19)
    """
    _steps = []
    encoder__name = cat_encoder.__class__.__name__
    _steps.append(
        ("PW" + encoder__name, PolynomialWrapper(feature_encoder=cat_encoder))
    )
    if model is None:
        passordinal_columns
    else:
        model__name = model.__class__.__name__
        _steps.append((model__name, model))
    pipe = Pipeline(steps=_steps)
    return pipe

In [None]:
classifiers = [
    AdaBoostClassifier(),
    BaggingClassifier(),
    BernoulliNB(),
    CalibratedClassifierCV(),
    CategoricalNB(),
    ComplementNB(),
    DecisionTreeClassifier(),
    DummyClassifier(),
    ExtraTreeClassifier(),
    ExtraTreesClassifier(),
    GaussianNB(),
    GaussianProcessClassifier(),
    GradientBoostingClassifier(),
    HistGradientBoostingClassifier(),
    KNeighborsClassifier(),
    LabelPropagation(),
    LabelSpreading(),
    LinearDiscriminantAnalysis(),
    LinearSVC(),
    LogisticRegression(),
    LogisticRegressionCV(),
    MLPClassifier(),
    MultinomialNB(),
    NearestCentroid(),
    NuSVC(),
    PassiveAggressiveClassifier(),
    Perceptron(),
    QuadraticDiscriminantAnalysis(),
    RadiusNeighborsClassifier(),
    RandomForestClassifier(),
    RidgeClassifier(),
    RidgeClassifierCV(),
    SGDClassifier(),
    SVC(),
]


# combiners = [sklearn.multioutput.ClassifierChain,
#  sklearn.multioutput.MultiOutputClassifier,
#  sklearn.multiclass.OneVsOneClassifier,
#  sklearn.multiclass.OneVsRestClassifier,
#  sklearn.multiclass.OutputCodeClassifier,
#  sklearn.ensemble._stacking.StackingClassifier,
#  sklearn.ensemble._voting.VotingClassifier
# ]

```python
Template
X = final_data.loc[:,categories]
y = final_data.target.to_numpy().reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=10, test_size=0.3)
```

In [None]:
# ohe = OneHotEncoder(min_frequency=0.0001, handle_unknown='infrequent_if_exist', sparse=False,dtype=np.int32)
# X_train_t = ohe.fit_transform(nominal_data)
reports = []
fig, ax = plt.subplots(3,1,figsize=(10,10))
i = 0
for categories in [nominal, ordinal]:
    X = final_data.loc[:,categories]
    y = final_data.target.to_numpy().reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=10, test_size=0.3)
    feature_names = categories
    model = QuadraticDiscriminantAnalysis(priors=class_priors,store_covariance=True,reg_param=0.0001 )
    # model = De
    # ovr_qda.fit(X_train, y_train)
    # y_pred = QDA_.fit(X_train, y_train)
    cv_= RepeatedStratifiedKFold(n_splits=3,n_repeats=5, random_state=10)
    pipe =  Pipeline(steps=[('polynomialwrapper',
                 PWrapper(feature_encoder=WOEEncoder())),
                (model.__class__.__name__,
                 QuadraticDiscriminantAnalysis(priors=class_priors,
                                               reg_param=0.0001,
                                               store_covariance=True))], memory=mem)
    def analyze_model(ax=ax,i = i,X=X,y=y, pipe=pipe, feature_names=categories):
        with parallel_backend('multiprocessing'):
            cv_model = cross_validate(pipe, X, y, cv = cv_,return_train_score=True,n_jobs=-1)
            ax[i].plot(np.arange(15), cv_model['test_score'], label=f'{feature_names[0].split("__")[0].upper()}  Data')
            ax[i].legend()
            ax[2].plot(np.arange(15), cv_model['test_score'], label=f'{feature_names[0].split("__")[0].upper()}  Data')
            ax[2].legend()
    analyze_model()
    y_pred = model.fit(X_train, y_train).predict(X_test)
    print(classification_report(y_test,y_pred, output_dict=False))
    c_report = classification_report(y_test,y_pred, output_dict=True)
    reports.append(c_report)
    # break
    i+=1

In [None]:
ordinal_proc_data_v2 = data_logit.drop(binary+nominal+ratio+['label'], axis=1)

In [None]:
# reports
ordinal_proc_data.corrwith(final_data.target,method='kendall').sort_index().plot()

In [None]:
# feature_correls = ordinal_proc_data.corrwith(final_data.target, method='kendall').sort_values(ascending=False).plot()
ordinal_proc_data.loc[:,ordinal] = ordinal_proc_data.loc[:,ordinal].astype('category')

In [None]:
# ordinal_data
cols = ordinal
reports = []
transformed_f = {}

category_encoders = [TargetEncoder(cols = ordinal), 
                     WOEEncoder(cols = ordinal),
                     JamesSteinEncoder(cols = ordinal),
                     HelmertEncoder(cols = ordinal),
                     # OrdinalEncoder(),
                     SummaryEncoder(cols = ordinal),
                     LeaveOneOutEncoder(cols = ordinal),
                     BackwardDifferenceEncoder(cols = ordinal)]
for ce in category_encoders:
    fail = 0
    encoder = PWrapper(ce)
    try:
        f_tra = encoder.fit_transform(ordinal_proc_data,final_data.target)
    except:
        print(f'{ce} failed')
        print('Trying to run Solo')
        fail=1
    try:
        if fail == 1:
            f_tra = ce.fit_transform(ordinal_proc_data,final_data.target)
    except:
        print("Even Solo Failed !!! SKIPPING!!!!!")
        fail=0
        continue
    fcorr = f_tra.corrwith(final_data.target,method='kendall').sort_values(ascending=False)
    reports.append(fcorr)
    fail=0
    transformed_f[ce.__class__.__name__]= f_tra 


In [None]:
sns.set()
threshold = 0.25
for i in range(7):
    p_l: pd.Series = reports[i]
    print(list(transformed_f.keys())[i])
    print(p_l[(p_l>threshold)|(p_l<-threshold)].index)
    print(p_l[(p_l>threshold)|(p_l<-threshold)].shape)
    print(p_l[(p_l>threshold)|(p_l<-threshold)].mean())
    print(p_l[(p_l>threshold)|(p_l<-threshold)])
    print()
    
    
    print('-'*40)
# plt.hlines(0,)

In [None]:
for i, ct in tqdm(enumerate(cat_ct)):
    pipe = make_pipeline(ct,
                             PowerTransformer(),
                             OneVsOneClassifier(LogisticRegressionCV(n_jobs=-1,
                                                                      fit_intercept=False,
                                                                      max_iter=10000000,
                                                                      random_state=0)))
    print(pipe)
    break

In [26]:
def chosen_metrics(y_pred,y_test,chosen_encoder=None,chosen_pipe=None):
    print('-'*90)
    print(chosen_encoder)
    print('-'*30)
    print(classification_report(y_test, y_pred))
    print('-'*90)
    print("cohen_kappa_score\t|")
    print('-'*30)
    print(sklearn.metrics.cohen_kappa_score(y_pred, y_test))
    print('-'*90)
    print("balanced_accuracy_score\t|")
    print('-'*30)
    print(sklearn.metrics.balanced_accuracy_score(y_test, y_pred))
    print('-'*90)
    print("accuracy_score\t|")
    print('-'*30)
    print(
        sklearn.metrics.accuracy_score(
            y_test,
            y_pred,
        )
        * 0.85
    )
    print('-'*90)
    print("f1_score_micro\t|")
    print('-'*30)
    print(sklearn.metrics.f1_score(y_test, y_pred, average="micro"))
    print('-'*90)
    print("f1_score_macro\t|")
    print('-'*30)
    print(sklearn.metrics.f1_score(y_test, y_pred, average="macro"))
    print('-'*90)
    print("f1_score_weighted\t|")
    print('-'*30)
    print(sklearn.metrics.f1_score(y_test, y_pred, average="weighted"))
    print('-'*90)

    
category_encoders = [
    PWrapper(TargetEncoder(cols = ordinal,smoothing=0)), 
    PWrapper(WOEEncoder(cols = ordinal,regularization=0.00000001)),
    PWrapper(JamesSteinEncoder(cols = ordinal,model='pooled')),
    HelmertEncoder(cols = ordinal),
    # OrdinalEncoder(),
    SummaryEncoder(cols = ordinal,quantiles=np.linspace(0.01,1,num=20),m=0),
    PWrapper(LeaveOneOutEncoder(cols = ordinal)),
    BackwardDifferenceEncoder(cols = ordinal)
]


cat_ct = []
for encoder in category_encoders:
    c_ct = ColumnTransformer([
        # ('cat_encoder', PWrapper(encoder),ordinal),
        ('cat_encoder', encoder,ordinal),
        ('numeric',MaxAbsScaler(),numeric_prob)
    ], remainder='drop',n_jobs=-1,sparse_threshold=0,)
    cat_ct.append(c_ct)


# model = LogisticRegressionCV(n_jobs=-1,fit_intercept=False,max_iter=10000000,random_state=0,scoring='f1_macro')
model = XGBRFClassifier(n_jobs=-1,max_bin=2048,verbosity=0,tree_method='approx',learning_rate=1, random_state=42,base_score=0.82002,importance_type='total_gain', num_parallel_trees=30,subsample=1.0)
# model = XGBRFClassifier(n_jobs=-1,max_bin=256,verbosity=0,tree_method='exact',)
ovo_model = OneVsOneClassifier(estimator=model, n_jobs=-1) 
ovr_model = OneVsRestClassifier(estimator=model, n_jobs=-1)
# Loading Data
X = data_logit.drop(binary+nominal+ratio+['label'], axis=1)
y = final_data.target.to_numpy().reshape(-1,1)
# Numeric Columns Isolator
numeric_prob =list( set(X.columns) - set(ordinal))
# c_ct
final_model = ovo_model
try:
    with parallel_backend('loky'):
        for i, ct in tqdm(enumerate(cat_ct)):
            curr_flow = Pipeline([("Column_transformer",ct),
                            ("model",final_model)], memory=Memory(location=',,/data/joblib_memory'))
            y_pred = curr_flow.fit(X_train, y_train).predict(X_test)
            chosen_metrics(y_test,y_pred,
                           chosen_encoder=category_encoders[i].__class__.__name__,
                          chosen_pipe=curr_flow)
            
except KeyboardInterrupt:
    print('STOPPED')
        

1it [00:02,  2.98s/it]

------------------------------------------------------------------------------------------
PolynomialWrapper
------------------------------
              precision    recall  f1-score   support

           0       0.72      0.71      0.71       256
           1       0.85      0.73      0.79       433
           2       0.23      0.44      0.30        71

    accuracy                           0.70       760
   macro avg       0.60      0.63      0.60       760
weighted avg       0.75      0.70      0.72       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.4872004580027808
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.6252508616460116
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.5916447368

2it [00:05,  2.55s/it]

------------------------------------------------------------------------------------------
PolynomialWrapper
------------------------------
              precision    recall  f1-score   support

           0       0.70      0.71      0.71       248
           1       0.85      0.73      0.79       437
           2       0.24      0.44      0.31        75

    accuracy                           0.69       760
   macro avg       0.60      0.63      0.60       760
weighted avg       0.74      0.69      0.71       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.48495647601799374
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.6271328215349031
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.590526315

3it [00:07,  2.39s/it]

------------------------------------------------------------------------------------------
PolynomialWrapper
------------------------------
              precision    recall  f1-score   support

           0       0.69      0.69      0.69       254
           1       0.83      0.72      0.77       427
           2       0.24      0.41      0.30        79

    accuracy                           0.68       760
   macro avg       0.59      0.60      0.59       760
weighted avg       0.72      0.68      0.69       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.45651794746572716
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.6043364080423751
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.574868421

4it [00:18,  5.70s/it]

------------------------------------------------------------------------------------------
HelmertEncoder
------------------------------
              precision    recall  f1-score   support

           0       0.73      0.72      0.73       257
           1       0.87      0.75      0.81       432
           2       0.24      0.45      0.31        71

    accuracy                           0.71       760
   macro avg       0.61      0.64      0.61       760
weighted avg       0.77      0.71      0.73       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.5140097408285544
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.6401828611095889
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.6050657894736

5it [00:21,  5.01s/it]

------------------------------------------------------------------------------------------
SummaryEncoder
------------------------------
              precision    recall  f1-score   support

           0       0.68      0.67      0.67       258
           1       0.80      0.72      0.76       412
           2       0.26      0.39      0.31        90

    accuracy                           0.66       760
   macro avg       0.58      0.59      0.58       760
weighted avg       0.70      0.66      0.68       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.4416272905382045
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.5929521754764473
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.5648026315789

6it [00:24,  4.01s/it]

------------------------------------------------------------------------------------------
PolynomialWrapper
------------------------------
              precision    recall  f1-score   support

           0       0.90      0.33      0.48       680
           1       0.07      0.55      0.13        49
           2       0.07      0.32      0.12        31

    accuracy                           0.35       760
   macro avg       0.35      0.40      0.24       760
weighted avg       0.81      0.35      0.45       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.0158313270591669
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.40198466483367534
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.294144736

7it [00:36,  5.18s/it]

------------------------------------------------------------------------------------------
BackwardDifferenceEncoder
------------------------------
              precision    recall  f1-score   support

           0       0.73      0.72      0.73       255
           1       0.87      0.75      0.81       430
           2       0.26      0.47      0.33        75

    accuracy                           0.71       760
   macro avg       0.62      0.65      0.62       760
weighted avg       0.76      0.71      0.73       760

------------------------------------------------------------------------------------------
cohen_kappa_score	|
------------------------------
0.517221283291567
------------------------------------------------------------------------------------------
balanced_accuracy_score	|
------------------------------
0.6464660282717739
------------------------------------------------------------------------------------------
accuracy_score	|
------------------------------
0.606





#### Logit Data
        ------------------------------------------------------------------------------------------
        HelmertEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.67      0.66      0.67       252
                   1       0.77      0.86      0.81       372
                   2       0.34      0.24      0.28       136

            accuracy                           0.68       760
           macro avg       0.59      0.59      0.59       760
        weighted avg       0.66      0.68      0.67       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.47171683376212825
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.5860691947029708
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.5804605263157894
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.6828947368421052
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.5854230771592995
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.6675997660948015
        ------------------------------------------------------------------------------------------
        ------------------------------------------------------------------------------------------
        SummaryEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.66      0.65      0.66       252
                   1       0.74      0.81      0.77       372
                   2       0.32      0.24      0.27       136

            accuracy                           0.66       760
           macro avg       0.57      0.57      0.57       760
        weighted avg       0.64      0.66      0.64       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.4297562316715543
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.5675268315211389
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.5569736842105263
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.6552631578947369
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.5675982905982906
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.6444995951417003
        ------------------------------------------------------------------------------------------
        ------------------------------------------------------------------------------------------
        BackwardDifferenceEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.61      0.69      0.65       252
                   1       0.75      0.78      0.77       372
                   2       0.37      0.24      0.29       136

            accuracy                           0.66       760
           macro avg       0.58      0.57      0.57       760
        weighted avg       0.64      0.66      0.64       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.43063673067904973
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.5713670772968685
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.5569736842105263
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.6552631578947369
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.5695615697198475
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.6429621528314725
        ------------------------------------------------------------------------------------------



        ------------------------------------------------------------------------------------------
        TargetEncoder                 |
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.68      0.70      0.69       243
                   1       0.85      0.74      0.79       431
                   2       0.22      0.35      0.27        86

            accuracy                           0.68       760
           macro avg       0.58      0.60      0.58       760
        weighted avg       0.73      0.68      0.70       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	            |
        ------------------------------
        0.4680776915570837
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.5967866461760237
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.5804605263157894
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.6828947368421052
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.58440308303322
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.700656623720791
        ------------------------------------------------------------------------------------------


        ------------------------------------------------------------------------------------------
        WOEEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.66      0.70      0.68       237
                   1       0.84      0.74      0.78       423
                   2       0.26      0.36      0.31       100

            accuracy                           0.68       760
           macro avg       0.59      0.60      0.59       760
        weighted avg       0.71      0.68      0.69       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.45889542257580995
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.5985488424055619
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.57375
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.675
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.5888037627288679
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.6873255176745005
        ------------------------------------------------------------------------------------------


        ------------------------------------------------------------------------------------------
        JamesSteinEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.69      0.69      0.69       252
                   1       0.83      0.74      0.78       416
                   2       0.27      0.40      0.32        92

            accuracy                           0.69       760
           macro avg       0.60      0.61      0.60       760
        weighted avg       0.72      0.69      0.70       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.47628823176638835
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.613135606342128
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.5826973684210526
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.6855263157894737
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.6010899357813598
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.6988334950386921
        ------------------------------------------------------------------------------------------


        ------------------------------------------------------------------------------------------
        LeaveOneOutEncoder
        ------------------------------
                      precision    recall  f1-score   support

                   0       0.74      0.71      0.72       263
                   1       0.86      0.75      0.80       430
                   2       0.24      0.48      0.32        67

            accuracy                           0.71       760
           macro avg       0.61      0.64      0.61       760
        weighted avg       0.76      0.71      0.73       760

        ------------------------------------------------------------------------------------------
        cohen_kappa_score	|
        ------------------------------
        0.5092504937882029
        ------------------------------------------------------------------------------------------
        balanced_accuracy_score	|
        ------------------------------
        0.6437826342687483
        ------------------------------------------------------------------------------------------
        accuracy_score	|
        ------------------------------
        0.6028289473684211
        ------------------------------------------------------------------------------------------
        f1_score_micro	|
        ------------------------------
        0.7092105263157895
        ------------------------------------------------------------------------------------------
        f1_score_macro	|
        ------------------------------
        0.6126999287217255
        ------------------------------------------------------------------------------------------
        f1_score_weighted	|
        ------------------------------
        0.7306716211628146
        ------------------------------------------------------------------------------------------


In [None]:
pipe