In [None]:
from pymongo import MongoClient
import optuna
import os

os.environ["NEPTUNE_PROJECT"] = "mlop3n/SDP"
os.environ[
    "NEPTUNE_NOTEBOOK_PATH"
] = "PycharmProjects/sdpiit/notebooks/Pipeline_components_builder.ipynb"
import warnings
from sklearnex import patch_sklearn

patch_sklearn()
import numpy as np
import pandas as pd
from category_encoders import (
    BackwardDifferenceEncoder,
    BaseNEncoder,
    BinaryEncoder,
    CatBoostEncoder,
    CountEncoder,
    GLMMEncoder,
    HelmertEncoder,
    JamesSteinEncoder,
    LeaveOneOutEncoder,
    MEstimateEncoder,
    QuantileEncoder,
    SummaryEncoder,
    TargetEncoder,
    WOEEncoder,
)
from sklearn import set_config
from sklearn.base import clone as model_clone
from sklearn.cluster import *
from sklearn.compose import *
from sklearn.cross_decomposition import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.feature_selection import *
from sklearn.gaussian_process import *
from sklearn.linear_model import *
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.multioutput import *
from sklearn.multiclass import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.pipeline import *
from sklearn.preprocessing import *
from sklearn.svm import *
from sklearn.tree import *
from sklearn.utils import *
from sklearn.dummy import *
from sklearn.semi_supervised import *
from sklearn.discriminant_analysis import *
import sklearnex, daal4py

from tqdm import tqdm, trange
from xgboost import XGBClassifier, XGBRFClassifier
from BorutaShap import BorutaShap

from sklearn.calibration import *

pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 50
set_config(display="diagram")
warnings.filterwarnings("ignore")
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import parallel_backend
from joblib.memory import Memory

sns.set()
from pprint import pprint
from helpers import PolynomialWrapper as PWrapper
from helpers import NestedCVWrapper as NCVWrapper
from helpers import ColumnSelectors
import sklearn

from helpers import DFCollection
from helpers import plot_mean_std_max
from helpers import CustomMetrics
import gc

%matplotlib inline
CACHE_DIR = Memory(location="../data/joblib_memory/")
# OPTUNA_DB = "postgresql+psycopg2://postgres:302492@localhost:5433/optuna"
from REDIS_CONFIG import REDIS_URL

OPTUN_DB = REDIS_URL


def allow_stopping(func):
    def wrapper():
        try:
            value = func()
            return value
            # gc.collect()
        except KeyboardInterrupt as e:
            print("Program Stopped")
        gc.collect()

    return wrapper

In [None]:
db = DFCollection()
column_selector = ColumnSelectors()
# classifiers = [f() for f in cls_names]
dtype_info = column_selector.dtype_info
ordinal = column_selector.ordinal_cols
nominal = column_selector.nominal_cols
binary = column_selector.binary_cols
ratio = column_selector.ratio_cols


final_data = db.final_data
final_pred_data = db.final_pred_data
baseline_prediction_data = db.baseline_prediction_data
data_logit = db.data_logits
prediction_data = db.prediction_data
master_data = db.master
given_data = db.data

ordinal_data, nominal_data, binary_data, ratio_data = db.categorise_data()
nominal_categories = db.nominal_categories
ordinal_categories = db.ordinal_categories
class_labels, n_classes, class_priors = class_distribution(
    final_data.target.to_numpy().reshape(-1, 1)
)

encoder = OneHotEncoder(sparse=False, drop="first")
variance_thr = VarianceThreshold(0.001)
nominal_ohe_pipe = Pipeline(
    steps=[("ohe", encoder), ("var_th", variance_thr)],
    memory=Memory(location=CACHE_DIR),
)
ohe_nominal_data = nominal_ohe_pipe.fit_transform(
    nominal_data.drop(["nominal__v_12", "nominal__v_21"], axis=1)
)
n1df = pd.DataFrame(
    ohe_nominal_data,
    columns=nominal_ohe_pipe.get_feature_names_out(),
    index=nominal_data.index,
)
def train_test(X, y, test_size):
    """
    X_train, X_test, y_train, y_test
    """
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=10, test_size=test_size, stratify=y
    )
    return X_train, X_test, y_train, y_test

In [117]:
target = final_data.target
encoder = OneHotEncoder(sparse=False, drop="first", min_frequency=0.0001)
# nominal_data_test = final_pred_data.loc[:,nominal]
variance_thr = VarianceThreshold(0.001)
nominal_ohe_pipe = Pipeline(
    steps=[("ohe", encoder), ("var_th", variance_thr)],
    memory=Memory(location=CACHE_DIR),
)


scf  = {"F":f_classif,
        "CHI":chi2}

scf2 = {
    "MIC":mutual_info_classif,
    "F":f_classif,
    "CHI":chi2}


K_BEST = 40
ALPHA = 0.05
all_selectors = {}
PERCENTILE=5


def update_selectors(alpha_= ALPHA, k_best = K_BEST, percentile=PERCENTILE):
    global all_selectors, scf,scf2
    for criterion, _scf in scf.items():
        selectors = [
            SelectFpr(_scf,alpha=alpha_),
            SelectFdr(_scf,alpha=alpha_),
            SelectFwe(_scf,alpha=alpha_),
        ]
        
        for slctr in selectors:
            all_selectors[ criterion+ '-'+ slctr.__class__.__name__] = slctr
    for criterion, _scf in scf2.items():
        selectors = [
            SelectKBest(_scf,k=K_BEST),
            SelectPercentile(_scf,percentile=PERCENTILE),
        ]
        for slctr in selectors:
            all_selectors[ criterion+ '-'+ slctr.__class__.__name__] = slctr
            

# Initialize the selectors
update_selectors(alpha_ = ALPHA,k_best=K_BEST)
ct = ColumnTransformer(transformers=[('one_hot_enc',nominal_ohe_pipe,nominal)], sparse_threshold=0,n_jobs=-1)
ohe_nominal_feature_selection = make_pipeline(ct,
                                              FeatureUnion(transformer_list=list(all_selectors.items()),n_jobs=-1)
                                             )
# with open('../data/pipelines/ohe_nominal_features.pkl', 'wb') as fp:
#     pickle.dump(ohe_nominal_feature_selection, fp, protocol=-1)
# ohe_nominal_feature_selection
# n1df_test = pd.DataFrame

with parallel_backend('loky'):
    elite_ohe_nominal_features_train = ohe_nominal_feature_selection.fit_transform(final_data,target)
    elite_ohe_nominal_features_test = ohe_nominal_feature_selection.transform(final_pred_data)
    f_names = ohe_nominal_feature_selection.get_feature_names_out()
    elite_onf_df = pd.DataFrame(elite_ohe_nominal_features_train, columns=f_names)
    elite_onf_df = elite_onf_df.transpose().drop_duplicates(ignore_index=False,).transpose()
    
    f_names_t = {x: x.split('__')[1] +'__'+ x.split('__')[2] for x in elite_onf_df.columns}

    # elite_ohe_nominal_features_train = elite_ohe_nominal_features_train.transpose().drop_duplicates(ignore_index=False,).transpose()
    elite_onf_df.rename(columns=f_names_t,inplace=True)
    elite_onf_df_test = pd.DataFrame(elite_ohe_nominal_features_test, columns=f_names)
    elite_onf_df_test = elite_onf_df_test.transpose().drop_duplicates(ignore_index=False,).transpose()
    elite_onf_df_test.rename(columns=f_names_t,inplace=True)

# model = LogisticRegression(random_state=10,max_iter=10000, n_jobs=-1) # scoring='f1_macro', cv=RepeatedStratifiedKFold(n_repeats=5))
# model = SVC(random_state=10,break_ties=True,)
model = CategoricalNB(min_categories=2)
# model= RandomForestClassifier(random_state=20, n_jobs=-1)
# model = BernoulliNB(binarize=None)
# model= GaussianProcessClassifier(n_jobs=-1,random_state=10,n_restarts_optimizer=10)
ovo = OneVsOneClassifier(clone(model), n_jobs=-1)
ovr = OneVsRestClassifier(clone(model), n_jobs=-1)

tmp = ovo

with parallel_backend('loky'):
    X_train, X_test, y_train, y_test = train_test(elite_onf_df, target, test_size=0.3)
    y_pred2 = tmp.fit(X_train,y_train).predict(X_test)
    print(classification_report(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.53      0.42      0.47       373
           1       0.58      0.77      0.66       559
           2       0.40      0.19      0.26       207

    accuracy                           0.55      1139
   macro avg       0.50      0.46      0.46      1139
weighted avg       0.53      0.55      0.52      1139



In [123]:
encoder = OneHotEncoder(sparse=False, drop="first", min_frequency=0.0001,handle_unknown="infrequent_if_exist")
variance_thr = VarianceThreshold(0.001)
feature_selector = RFECV(estimator=LogisticRegression(max_iter=1000000,random_state=10), scoring='f1_macro', cv=RepeatedStratifiedKFold(n_repeats=2), step=6,n_jobs=-1)
rival_ohe = make_pipeline(encoder, variance_thr,feature_selector)
ct = ColumnTransformer(transformers=[('one_hot_enc',rival_ohe,nominal)], sparse_threshold=0,n_jobs=-1)
workflow = make_pipeline(rival_ohe, ovo)
workflow
with parallel_backend('loky'):
    X_train, X_test, y_train, y_test = train_test(final_data.loc[:,nominal], target, test_size=0.3)
    y_pred2 = workflow.fit(X_train,y_train).predict(X_test)
    print(classification_report(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.53      0.38      0.44       373
           1       0.56      0.77      0.65       559
           2       0.24      0.12      0.16       207

    accuracy                           0.52      1139
   macro avg       0.44      0.42      0.42      1139
weighted avg       0.49      0.52      0.49      1139



In [None]:
categorical_target_encoders_1 = FeatureUnion(transformer_list=[
    ('target_enc',PWrapper(TargetEncoder(cols=nominal+ordinal,drop_invariant=True, smoothing=0)) ),
    ('woe_enc', PWrapper(WOEEncoder(cols=nominal+ordinal,drop_invariant=True))),
    ('jame_enc',  PWrapper(JamesSteinEncoder(cols=nominal+ordinal,drop_invariant=True))),
],n_jobs=-1)

categorical_target_encoders_2 = FeatureUnion(transformer_list=[
      # ('summary_enc', PWrapper(SummaryEncoder(cols=nominal+ordinal, drop_invariant=True))),
    ('woe_enc', PWrapper(WOEEncoder(cols=nominal+ordinal,drop_invariant=True))),
    # ('backward_diff', PWrapper(BackwardDifferenceEncoder(cols=nominal+ordinal,drop_invariant=True))),
    # ('glmm_enc', PWrapper(GLMMEncoder(cols=nominal+ordinal, drop_invariant=True)))
            ],n_jobs=-1)
# feature_selector = RFECV(estimator=LogisticRegression(max_iter=1000000,random_state=10), scoring='f1_macro', cv=RepeatedStratifiedKFold(n_repeats=2), step=6,n_jobs=-1)
feature_selector = RFECV(estimator=LogisticRegression(random_state=10, max_iter=10000), scoring='f1_macro', cv=RepeatedStratifiedKFold(n_repeats=2), step=1,n_jobs=-1)


cat_feature_embedding_1 = ColumnTransformer(transformers=[('cat_enc_1',categorical_target_encoders_1,nominal+ordinal),], sparse_threshold=0, n_jobs=-1)
cat_feature_embedding_2 = ColumnTransformer(transformers=[('cat_enc_2',categorical_target_encoders_2,nominal+ordinal),], sparse_threshold=0, n_jobs=-1)

# feature_selection = FeatureUnion(transformer_list=list(all_selectors.items()),n_jobs=-1)

# f_gen_workflow_1 = make_pipeline(cat_feature_embedding_1,clone(feature_selector))
# f_gen_workflow_2 = make_pipeline(cat_feature_embedding_2,clone(feature_selector))

cat_features_best = FeatureUnion(transformer_list = [
    ('set1_cat_encs',cat_feature_embedding_1),
    ('set2_cat_encs',cat_feature_embedding_2)],n_jobs=-1)

cat_features_la_creme = make_pipeline(cat_features_best,feature_selector)


# workflow
"""
TEST
"""
X_train, X_test, y_train, y_test = train_test(final_data, target, test_size=0.3)

with parallel_backend('loky'):
    X_train_enc = cat_features_la_creme.fit_transform(X_train,y_train)
    X_test_enc = cat_features_la_creme.transform(X_test)
# cat_features_la_creme

<html>
<style>#sk-container-id-19 {color: black;background-color: white;}#sk-container-id-19 pre{padding: 0;}#sk-container-id-19 div.sk-toggleable {background-color: white;}#sk-container-id-19 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-19 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-19 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-19 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-19 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-19 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-19 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-19 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-19 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-19 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-19 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-19 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-19 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-19 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-19 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-19 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-19 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-19 div.sk-item {position: relative;z-index: 1;}#sk-container-id-19 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-19 div.sk-item::before, #sk-container-id-19 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-19 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-19 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-19 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-19 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-19 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-19 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-19 div.sk-label-container {text-align: center;}#sk-container-id-19 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-19 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-19" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,
                 FeatureUnion(n_jobs=-1,
                              transformer_list=[(&#x27;set1_cat_encs&#x27;,
                                                 Pipeline(steps=[(&#x27;columntransformer&#x27;,
                                                                  ColumnTransformer(n_jobs=-1,
                                                                                    sparse_threshold=0,
                                                                                    transformers=[(&#x27;cat_enc_1&#x27;,
                                                                                                   FeatureUnion(n_jobs=-1,
                                                                                                                transformer_list=[(&#x27;target_enc&#x27;,
                                                                                                                                   PolynomialWrapper(feature_encoder=TargetEncoder(cols=[&#x27;nominal__v_12&#x27;,
                                                                                                                                                                                         &#x27;nominal__v_18&#x27;,
                                                                                                                                                                                         &#x27;nominal...
                                                                 (&#x27;rfecv&#x27;,
                                                                  RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
                                                                        estimator=DecisionTreeClassifier(random_state=10),
                                                                        n_jobs=-1,
                                                                        scoring=&#x27;f1_macro&#x27;,
                                                                        step=6))]))])),
                (&#x27;rfecv&#x27;,
                 RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
                       estimator=DecisionTreeClassifier(random_state=10),
                       n_jobs=-1, scoring=&#x27;f1_macro&#x27;, step=6))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-313" type="checkbox" ><label for="sk-estimator-id-313" class="sk-toggleable__label sk-toggleable__label-arrow">Pipeline</label><div class="sk-toggleable__content"><pre>Pipeline(steps=[(&#x27;featureunion&#x27;,
                 FeatureUnion(n_jobs=-1,
                              transformer_list=[(&#x27;set1_cat_encs&#x27;,
                                                 Pipeline(steps=[(&#x27;columntransformer&#x27;,
                                                                  ColumnTransformer(n_jobs=-1,
                                                                                    sparse_threshold=0,
                                                                                    transformers=[(&#x27;cat_enc_1&#x27;,
                                                                                                   FeatureUnion(n_jobs=-1,
                                                                                                                transformer_list=[(&#x27;target_enc&#x27;,
                                                                                                                                   PolynomialWrapper(feature_encoder=TargetEncoder(cols=[&#x27;nominal__v_12&#x27;,
                                                                                                                                                                                         &#x27;nominal__v_18&#x27;,
                                                                                                                                                                                         &#x27;nominal...
                                                                 (&#x27;rfecv&#x27;,
                                                                  RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
                                                                        estimator=DecisionTreeClassifier(random_state=10),
                                                                        n_jobs=-1,
                                                                        scoring=&#x27;f1_macro&#x27;,
                                                                        step=6))]))])),
                (&#x27;rfecv&#x27;,
                 RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
                       estimator=DecisionTreeClassifier(random_state=10),
                       n_jobs=-1, scoring=&#x27;f1_macro&#x27;, step=6))])</pre></div></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-314" type="checkbox" ><label for="sk-estimator-id-314" class="sk-toggleable__label sk-toggleable__label-arrow">featureunion: FeatureUnion</label><div class="sk-toggleable__content"><pre>FeatureUnion(n_jobs=-1,
             transformer_list=[(&#x27;set1_cat_encs&#x27;,
                                Pipeline(steps=[(&#x27;columntransformer&#x27;,
                                                 ColumnTransformer(n_jobs=-1,
                                                                   sparse_threshold=0,
                                                                   transformers=[(&#x27;cat_enc_1&#x27;,
                                                                                  FeatureUnion(n_jobs=-1,
                                                                                               transformer_list=[(&#x27;target_enc&#x27;,
                                                                                                                  PolynomialWrapper(feature_encoder=TargetEncoder(cols=[&#x27;nominal__v_12&#x27;,
                                                                                                                                                                        &#x27;nominal__v_18&#x27;,
                                                                                                                                                                        &#x27;nominal__v_20&#x27;,
                                                                                                                                                                        &#x27;nominal__v_21&#x27;,
                                                                                                                                                                        &#x27;nominal...
                                                                                   &#x27;ordinal__v_15&#x27;,
                                                                                   &#x27;ordinal__v_17&#x27;,
                                                                                   &#x27;ordinal__v_19&#x27;,
                                                                                   &#x27;ordinal__v_22&#x27;,
                                                                                   &#x27;ordinal__v_23&#x27;,
                                                                                   &#x27;ordinal__v_24&#x27;,
                                                                                   &#x27;ordinal__v_29&#x27;,
                                                                                   &#x27;ordinal__v_31&#x27;,
                                                                                   &#x27;ordinal__v_33&#x27;,
                                                                                   &#x27;ordinal__v_5&#x27;,
                                                                                   &#x27;ordinal__v_6&#x27;])])),
                                                (&#x27;rfecv&#x27;,
                                                 RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
                                                       estimator=DecisionTreeClassifier(random_state=10),
                                                       n_jobs=-1,
                                                       scoring=&#x27;f1_macro&#x27;,
                                                       step=6))]))])</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>set1_cat_encs</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-315" type="checkbox" ><label for="sk-estimator-id-315" class="sk-toggleable__label sk-toggleable__label-arrow">columntransformer: ColumnTransformer</label><div class="sk-toggleable__content"><pre>ColumnTransformer(n_jobs=-1, sparse_threshold=0,
                  transformers=[(&#x27;cat_enc_1&#x27;,
                                 FeatureUnion(n_jobs=-1,
                                              transformer_list=[(&#x27;target_enc&#x27;,
                                                                 PolynomialWrapper(feature_encoder=TargetEncoder(cols=[&#x27;nominal__v_12&#x27;,
                                                                                                                       &#x27;nominal__v_18&#x27;,
                                                                                                                       &#x27;nominal__v_20&#x27;,
                                                                                                                       &#x27;nominal__v_21&#x27;,
                                                                                                                       &#x27;nominal__v_25&#x27;,
                                                                                                                       &#x27;nominal__v_3&#x27;,
                                                                                                                       &#x27;nominal__v_32&#x27;,
                                                                                                                       &#x27;nominal__v_4&#x27;,
                                                                                                                       &#x27;ordinal__v_0&#x27;,
                                                                                                                       &#x27;ordinal__v_10&#x27;,
                                                                                                                       &#x27;ordinal__...
                                 [&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;,
                                  &#x27;nominal__v_20&#x27;, &#x27;nominal__v_21&#x27;,
                                  &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                                  &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;,
                                  &#x27;ordinal__v_0&#x27;, &#x27;ordinal__v_10&#x27;,
                                  &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                                  &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;,
                                  &#x27;ordinal__v_22&#x27;, &#x27;ordinal__v_23&#x27;,
                                  &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                                  &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;,
                                  &#x27;ordinal__v_5&#x27;, &#x27;ordinal__v_6&#x27;])])</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-316" type="checkbox" ><label for="sk-estimator-id-316" class="sk-toggleable__label sk-toggleable__label-arrow">cat_enc_1</label><div class="sk-toggleable__content"><pre>[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;, &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;, &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;, &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;, &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;, &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;, &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;, &#x27;ordinal__v_6&#x27;]</pre></div></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>target_enc</label></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-317" type="checkbox" ><label for="sk-estimator-id-317" class="sk-toggleable__label sk-toggleable__label-arrow">feature_encoder: TargetEncoder</label><div class="sk-toggleable__content"><pre>TargetEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                    &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                    &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                    &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                    &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                    &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                    &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                    &#x27;ordinal__v_6&#x27;],
              drop_invariant=True, smoothing=0)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-318" type="checkbox" ><label for="sk-estimator-id-318" class="sk-toggleable__label sk-toggleable__label-arrow">TargetEncoder</label><div class="sk-toggleable__content"><pre>TargetEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                    &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                    &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                    &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                    &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                    &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                    &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                    &#x27;ordinal__v_6&#x27;],
              drop_invariant=True, smoothing=0)</pre></div></div></div></div></div></div></div></div></div></div></div><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>woe_enc</label></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-319" type="checkbox" ><label for="sk-estimator-id-319" class="sk-toggleable__label sk-toggleable__label-arrow">feature_encoder: WOEEncoder</label><div class="sk-toggleable__content"><pre>WOEEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                 &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                 &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                 &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                 &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                 &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                 &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                 &#x27;ordinal__v_6&#x27;],
           drop_invariant=True)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-320" type="checkbox" ><label for="sk-estimator-id-320" class="sk-toggleable__label sk-toggleable__label-arrow">WOEEncoder</label><div class="sk-toggleable__content"><pre>WOEEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                 &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                 &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                 &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                 &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                 &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                 &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                 &#x27;ordinal__v_6&#x27;],
           drop_invariant=True)</pre></div></div></div></div></div></div></div></div></div></div></div><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>jame_enc</label></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-321" type="checkbox" ><label for="sk-estimator-id-321" class="sk-toggleable__label sk-toggleable__label-arrow">feature_encoder: JamesSteinEncoder</label><div class="sk-toggleable__content"><pre>JamesSteinEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                        &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                        &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                        &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                        &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                        &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                        &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                        &#x27;ordinal__v_6&#x27;],
                  drop_invariant=True)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-322" type="checkbox" ><label for="sk-estimator-id-322" class="sk-toggleable__label sk-toggleable__label-arrow">JamesSteinEncoder</label><div class="sk-toggleable__content"><pre>JamesSteinEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                        &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                        &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                        &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                        &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                        &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                        &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                        &#x27;ordinal__v_6&#x27;],
                  drop_invariant=True)</pre></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-323" type="checkbox" ><label for="sk-estimator-id-323" class="sk-toggleable__label sk-toggleable__label-arrow">rfecv: RFECV</label><div class="sk-toggleable__content"><pre>RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
      estimator=DecisionTreeClassifier(random_state=10), n_jobs=-1,
      scoring=&#x27;f1_macro&#x27;, step=6)</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-324" type="checkbox" ><label for="sk-estimator-id-324" class="sk-toggleable__label sk-toggleable__label-arrow">estimator: DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-325" type="checkbox" ><label for="sk-estimator-id-325" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div></div></div></div></div></div></div></div></div></div></div><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>set2_cat_encs</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-326" type="checkbox" ><label for="sk-estimator-id-326" class="sk-toggleable__label sk-toggleable__label-arrow">columntransformer: ColumnTransformer</label><div class="sk-toggleable__content"><pre>ColumnTransformer(n_jobs=-1, sparse_threshold=0,
                  transformers=[(&#x27;cat_enc_2&#x27;,
                                 FeatureUnion(n_jobs=-1,
                                              transformer_list=[(&#x27;summary_enc&#x27;,
                                                                 SummaryEncoder(cols=[&#x27;nominal__v_12&#x27;,
                                                                                      &#x27;nominal__v_18&#x27;,
                                                                                      &#x27;nominal__v_20&#x27;,
                                                                                      &#x27;nominal__v_21&#x27;,
                                                                                      &#x27;nominal__v_25&#x27;,
                                                                                      &#x27;nominal__v_3&#x27;,
                                                                                      &#x27;nominal__v_32&#x27;,
                                                                                      &#x27;nominal__v_4&#x27;,
                                                                                      &#x27;ordinal__v_0&#x27;,
                                                                                      &#x27;ordinal__v_10&#x27;,
                                                                                      &#x27;ordinal__v_13&#x27;,
                                                                                      &#x27;ordinal__v_15&#x27;,
                                                                                      &#x27;ordinal__...
                                 [&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;,
                                  &#x27;nominal__v_20&#x27;, &#x27;nominal__v_21&#x27;,
                                  &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                                  &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;,
                                  &#x27;ordinal__v_0&#x27;, &#x27;ordinal__v_10&#x27;,
                                  &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                                  &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;,
                                  &#x27;ordinal__v_22&#x27;, &#x27;ordinal__v_23&#x27;,
                                  &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                                  &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;,
                                  &#x27;ordinal__v_5&#x27;, &#x27;ordinal__v_6&#x27;])])</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-327" type="checkbox" ><label for="sk-estimator-id-327" class="sk-toggleable__label sk-toggleable__label-arrow">cat_enc_2</label><div class="sk-toggleable__content"><pre>[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;, &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;, &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;, &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;, &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;, &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;, &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;, &#x27;ordinal__v_6&#x27;]</pre></div></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>summary_enc</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-328" type="checkbox" ><label for="sk-estimator-id-328" class="sk-toggleable__label sk-toggleable__label-arrow">SummaryEncoder</label><div class="sk-toggleable__content"><pre>SummaryEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                     &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                     &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                     &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                     &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                     &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                     &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                     &#x27;ordinal__v_6&#x27;],
               drop_invariant=True)</pre></div></div></div></div></div></div><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>backward_diff</label></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-329" type="checkbox" ><label for="sk-estimator-id-329" class="sk-toggleable__label sk-toggleable__label-arrow">BackwardDifferenceEncoder</label><div class="sk-toggleable__content"><pre>BackwardDifferenceEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;,
                                &#x27;nominal__v_20&#x27;, &#x27;nominal__v_21&#x27;,
                                &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                                &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                                &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;,
                                &#x27;ordinal__v_15&#x27;, &#x27;ordinal__v_17&#x27;,
                                &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                                &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;,
                                &#x27;ordinal__v_29&#x27;, &#x27;ordinal__v_31&#x27;,
                                &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                                &#x27;ordinal__v_6&#x27;],
                          drop_invariant=True)</pre></div></div></div></div></div></div><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><label>glmm_enc</label></div></div><div class="sk-serial"><div class="sk-item sk-dashed-wrapped"><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-330" type="checkbox" ><label for="sk-estimator-id-330" class="sk-toggleable__label sk-toggleable__label-arrow">feature_encoder: GLMMEncoder</label><div class="sk-toggleable__content"><pre>GLMMEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                  &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                  &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                  &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                  &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                  &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                  &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                  &#x27;ordinal__v_6&#x27;],
            drop_invariant=True)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-331" type="checkbox" ><label for="sk-estimator-id-331" class="sk-toggleable__label sk-toggleable__label-arrow">GLMMEncoder</label><div class="sk-toggleable__content"><pre>GLMMEncoder(cols=[&#x27;nominal__v_12&#x27;, &#x27;nominal__v_18&#x27;, &#x27;nominal__v_20&#x27;,
                  &#x27;nominal__v_21&#x27;, &#x27;nominal__v_25&#x27;, &#x27;nominal__v_3&#x27;,
                  &#x27;nominal__v_32&#x27;, &#x27;nominal__v_4&#x27;, &#x27;ordinal__v_0&#x27;,
                  &#x27;ordinal__v_10&#x27;, &#x27;ordinal__v_13&#x27;, &#x27;ordinal__v_15&#x27;,
                  &#x27;ordinal__v_17&#x27;, &#x27;ordinal__v_19&#x27;, &#x27;ordinal__v_22&#x27;,
                  &#x27;ordinal__v_23&#x27;, &#x27;ordinal__v_24&#x27;, &#x27;ordinal__v_29&#x27;,
                  &#x27;ordinal__v_31&#x27;, &#x27;ordinal__v_33&#x27;, &#x27;ordinal__v_5&#x27;,
                  &#x27;ordinal__v_6&#x27;],
            drop_invariant=True)</pre></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-332" type="checkbox" ><label for="sk-estimator-id-332" class="sk-toggleable__label sk-toggleable__label-arrow">rfecv: RFECV</label><div class="sk-toggleable__content"><pre>RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
      estimator=DecisionTreeClassifier(random_state=10), n_jobs=-1,
      scoring=&#x27;f1_macro&#x27;, step=6)</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-333" type="checkbox" ><label for="sk-estimator-id-333" class="sk-toggleable__label sk-toggleable__label-arrow">estimator: DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-334" type="checkbox" ><label for="sk-estimator-id-334" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-335" type="checkbox" ><label for="sk-estimator-id-335" class="sk-toggleable__label sk-toggleable__label-arrow">rfecv: RFECV</label><div class="sk-toggleable__content"><pre>RFECV(cv=RepeatedStratifiedKFold(n_repeats=2, n_splits=5, random_state=None),
      estimator=DecisionTreeClassifier(random_state=10), n_jobs=-1,
      scoring=&#x27;f1_macro&#x27;, step=6)</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-336" type="checkbox" ><label for="sk-estimator-id-336" class="sk-toggleable__label sk-toggleable__label-arrow">estimator: DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-337" type="checkbox" ><label for="sk-estimator-id-337" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeClassifier</label><div class="sk-toggleable__content"><pre>DecisionTreeClassifier(random_state=10)</pre></div></div></div></div></div></div></div></div></div></div></div></div></html>

In [139]:
XGBOOST_OPT_TRIAL_DATA = (X_train_enc,X_test_enc, y_train, y_test)
import joblib
# joblib.dump(XGBOOST_OPT_TRIAL_DATA, '../data/xgboost_optuna_trial_data/data.pkl')

In [None]:
estim = DecisionTreeClassifier(random_state=21)
estim = MLPClassifier(solver='adam', max_iter=1000,random_state=21)
# estim = RandomForestClassifier(random_state=10)
# estim = ExtraTreesClassifier(random_state=19)
# estim = XGBClassifier(random_state=10)
model = OneVsOneClassifier(estim,n_jobs=-1)

with parallel_backend('loky'):
    y_pred = model.fit(X_train_enc,y_train).predict(X_test_enc)
    print(classification_report(y_test,y_pred))

#### ROC AUC CURVE FOR MULTICLASS
[Link](https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html)

In [None]:
# X_train, X_test, y_train, y_test = train_test(final_data, target, test_size=0.3)
X_train, X_test, y_train, y_test = train_test(final_data, target, test_size=0.3)
with parallel_backend('loky'):
    y_pred = workflow.fit(X_train,y_train).predict(X_test)
    print(classification_report(y_test,y_pred))
with parallel_backend('loky'):
    y_prob = workflow.predict_proba(X_test)

macro_roc_auc_ovo = roc_auc_score(y_test, y_prob, multi_class="ovo", average="macro")
weighted_roc_auc_ovo = roc_auc_score(
    y_test, y_prob, multi_class="ovo", average="weighted"
)
macro_roc_auc_ovr = roc_auc_score(y_test, y_prob, multi_class="ovr", average="macro")
weighted_roc_auc_ovr = roc_auc_score(
    y_test, y_prob, multi_class="ovr", average="weighted"
)
print(
    "One-vs-One ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
    "(weighted by prevalence)".format(macro_roc_auc_ovo, weighted_roc_auc_ovo)
)
print(
    "One-vs-Rest ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
    "(weighted by prevalence)".format(macro_roc_auc_ovr, weighted_roc_auc_ovr)
)

#### ENd

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_auc_score

# Binarize the output
n_classes = 3
model = AdaBoostClassifier(base_estimator=RandomForestClassifier(n_jobs=-1),n_estimators=30, random_state=10)
# workflow = make_pipeline(cat_feature_embedding,StandardScaler(), model)

# shuffle and split training and test sets
## Do all Transformations Ahead then Apply ML Model
X_train, X_test, y_train, y_test = train_test(final_data, target, test_size=0.3)
X_train_enc = cat_feature_embedding.fit_transform(X_train,y_train)
X_test_enc = cat_feature_embedding.transform(X_test)
y_train = label_binarize(y_train, classes=[0, 1, 2])
y_test = label_binarize(y_test, classes=[0,1,2])
workflow = make_pipeline(StandardScaler(), OneVsRestClassifier(model,n_jobs=-1))

# Learn to predict each class against the other
# classifier = OneVsRestClassifier(
#     svm.SVC(kernel="linear", probability=True, random_state=random_state)
# )
with parallel_backend('loky'):
    y_score = workflow.fit(X_train_enc, y_train).decision_function(X_test_enc)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])


# First aggregate all false positive rates
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# Then interpolate all ROC curves at this points
mean_tpr = np.zeros_like(all_fpr)
for i in range(n_classes):
    mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])

# Finally average it and compute AUC
mean_tpr /= n_classes

fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# Plot all ROC curves
plt.figure()
plt.plot(
    fpr["micro"],
    tpr["micro"],
    label="micro-average ROC curve (area = {0:0.2f})".format(roc_auc["micro"]),
    color="deeppink",
    linestyle=":",
    linewidth=4,
)

plt.plot(
    fpr["macro"],
    tpr["macro"],
    label="macro-average ROC curve (area = {0:0.2f})".format(roc_auc["macro"]),
    color="navy",
    linestyle=":",
    linewidth=4,
)

colors = cycle(["aqua", "darkorange", "cornflowerblue"])
for i, color in zip(range(n_classes), colors):
    plt.plot(
        fpr[i],
        tpr[i],
        color=color,
        lw=lw,
        label="ROC curve of class {0} (area = {1:0.2f})".format(i, roc_auc[i]),
    )

plt.plot([0, 1], [0, 1], "k--", lw=lw)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Some extension of Receiver operating characteristic to multiclass")
plt.legend(loc="lower right")
plt.show()


In [None]:
# ct = ColumnTransformer(transformers=[('target',
#                                   PWrapper(JamesSteinEncoder(cols=nominal+ordinal,drop_invariant=True)),
#                                   # JamesSteinEncoder(cols=nominal+ordinal,drop_invariant=True),
#                                       nominal+ordinal
#                                      )],
#                        sparse_threshold=0, 
#                        n_jobs=-1)


# # model = LogisticRegression(max_iter=100000,n_jobs=-1,random_state=0)
# model = AdaBoostClassifier(base_estimator=RandomForestClassifier(random_state=10, n_jobs=-1), random_state=10)
# # model = XGBRFClassifier(random_state=0)
# # model = RandomForestClassifier(random_state=0)
# # model = MLPClassifier(solver='adam',activation='tanh')
# # model.pr
# ovo = OneVsOneClassifier(clone(model), n_jobs=-1)
# ovr = OneVsRestClassifier(clone(model), n_jobs=-1)
# categorical_target_encoders
# # workflow = make_pipeline(cat_feature_embedding,StandardScaler(), model)

# workflow = make_pipeline(cat_feature_embedding,StandardScaler(), model)
# # X_train_enc = cat_feature_embedding.fit_transform(X_train,y_train)
# # X_test_enc = cat_feature_embedding.transform(X_test)
# with parallel_backend('loky'):
#     y_pred = workflow.fit(X_train,y_train).predict(X_test)
#     print(classification_report(y_test,y_pred))