In [104]:
from pymongo import MongoClient
import optuna
import os

os.environ["NEPTUNE_PROJECT"] = "mlop3n/SDP"
os.environ[
    "NEPTUNE_NOTEBOOK_PATH"
] = "PycharmProjects/sdpiit/notebooks/Pipeline_components_builder.ipynb"
import warnings
from sklearnex import patch_sklearn

patch_sklearn()
import numpy as np
import pandas as pd
from category_encoders import (
    BackwardDifferenceEncoder,
    BaseNEncoder,
    BinaryEncoder,
    CatBoostEncoder,
    CountEncoder,
    GLMMEncoder,
    HelmertEncoder,
    JamesSteinEncoder,
    LeaveOneOutEncoder,
    MEstimateEncoder,
    QuantileEncoder,
    SummaryEncoder,
    TargetEncoder,
    WOEEncoder,
)
from sklearn import set_config
from sklearn.base import clone as model_clone
from sklearn.cluster import *
from sklearn.compose import *
from sklearn.cross_decomposition import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.feature_selection import *
from sklearn.gaussian_process import *
from sklearn.linear_model import *
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.multioutput import *
from sklearn.multiclass import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.pipeline import *
from sklearn.preprocessing import *
from sklearn.svm import *
from sklearn.tree import *
from sklearn.utils import *
from sklearn.dummy import *
from sklearn.semi_supervised import *
from sklearn.discriminant_analysis import *
import sklearnex, daal4py

from tqdm import tqdm, trange
from xgboost import XGBClassifier, XGBRFClassifier
from BorutaShap import BorutaShap

from sklearn.calibration import *

pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 50
set_config(display="diagram")
warnings.filterwarnings("ignore")
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import parallel_backend
from joblib.memory import Memory

sns.set()
from pprint import pprint
from helpers import PolynomialWrapper as PWrapper
from helpers import NestedCVWrapper as NCVWrapper
from helpers import ColumnSelectors
import sklearn
from helpers import DFCollection
from helpers import plot_mean_std_max
from helpers import CustomMetrics
import gc

CACHE_DIR = Memory(location="../data/joblib_memory/")
OPTUNA_DB = "postgresql+psycopg2://postgres:302492@localhost:5433/optuna"


def allow_stopping(func):
    def wrapper():
        try:
            value = func()
            return value
            # gc.collect()
        except KeyboardInterrupt as e:
            print("Program Stopped")
        gc.collect()

    return wrapper

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [105]:
db = DFCollection()
column_selector = ColumnSelectors()
# classifiers = [f() for f in cls_names]
dtype_info = column_selector.dtype_info
ordinal = column_selector.ordinal_cols
nominal = column_selector.nominal_cols
binary = column_selector.binary_cols
ratio = column_selector.ratio_cols


final_data = db.final_data
final_pred_data = db.final_pred_data
baseline_prediction_data = db.baseline_prediction_data
data_logit = db.data_logits
prediction_data = db.prediction_data
master_data = db.master
given_data = db.data

ordinal_data, nominal_data, binary_data, ratio_data = db.categorise_data()
nominal_categories = db.nominal_categories
ordinal_categories = db.ordinal_categories
class_labels, n_classes, class_priors = class_distribution(
    final_data.target.to_numpy().reshape(-1, 1)
)


def gen_balanced_trained_test(data, p):
    Y = data.target
    X_2 = Y_2 = Y[Y == 2].index
    X_0 = Y_0 = Y[Y == 0].index
    X_1 = Y_1 = Y[Y == 1].index
    train_size = int(p * Y_2.shape[0])
    test_size = int((1 - p) * Y_2.shape[0])

    train_idx_2 = np.random.choice(Y_2, (train_size,))
    train_idx_1 = np.random.choice(Y_1, (train_size,))
    train_idx_0 = np.random.choice(Y_0, (train_size,))
    train_idx = np.r_[train_idx_0, train_idx_1, train_idx_2]
    # train_idx.shape

    test_idx_2 = np.random.choice(np.setdiff1d(Y_2, train_idx_2), (test_size,))
    test_idx_1 = np.random.choice(np.setdiff1d(Y_1, train_idx_1), (test_size,))
    test_idx_0 = np.random.choice(np.setdiff1d(Y_0, train_idx_0), (test_size,))
    test_idx = np.r_[test_idx_0, test_idx_1, test_idx_2]
    # test_idx.shape
    return train_idx, test_idx


def gen_nominal_maps(bs: pd.DataFrame = master_data) -> tuple[defaultdict, defaultdict]:
    nominal_master_db = bs.loc[:, nominal]
    nominal_cont_map = defaultdict(dict)
    nominal_indvl_map = defaultdict(dict)
    for c in nominal:
        un = sorted(nominal_master_db[c].unique().tolist())
        n = len(un)
        new_id = list(range(n))
        nominal_indvl_map[c] = dict(zip(un, new_id))
    start = 0
    for c in nominal:
        un = sorted(nominal_master_db[c].unique().tolist())
        n = len(un)
        new_id = list(range(start, start + n))
        nominal_cont_map[c] = dict(zip(un, new_id))
        start += n
    return nominal_indvl_map, nominal_cont_map


# nominal_indvl_map, nominal_cont_map = gen_nominal_maps()
# nominal_master_db = bs.loc[:, nominal]

# nominal_master_db_indvl = nominal_master_db.copy()
# nominal_master_db_cont = nominal_master_db.copy()


# nominal_indvl_map
def nm_indvl_data_trnsform(row):
    for c in nominal:
        curr = row[c]
        row[c] = nominal_indvl_map[c][curr]
    return row


# test1_nominal = nominal_master_db_indvl.apply(nm_indvl_data_trnsform, axis=1)


def nm_cont_data_trnsform(row):
    for c in nominal:
        curr = row[c]
        row[c] = nominal_cont_map[c][curr]
    return row


# test2_nominal = nominal_master_db_cont.apply(nm_cont_data_trnsform, axis=1)
# prediction_data = pd.read_pickle("../data/pred_data.pkl")
# est_ = [("cnb",CategoricalNB()),]


def wf_create(cat_encoder=TargetEncoder, model=None):
    """
    :param cat_encoder: category_encoders
    :param model: scikit-learn Model
    :return pipe: sklearn.pipeline.Pipline
    Examples of model param:

    model = ComplementNB(norm=True,fit_prior=True,)
    model = MultinomialNB()
    model = LogisticRegression(n_jobs=-1, max_iter=10000,random_state=19)
    """
    _steps = []
    encoder__name = cat_encoder.__class__.__name__
    _steps.append(
        ("PW" + encoder__name, PolynomialWrapper(feature_encoder=cat_encoder))
    )
    if model is None:
        passordinal_columns
    else:
        model__name = model.__class__.__name__
        _steps.append((model__name, model))
    pipe = Pipeline(steps=_steps)
    return pipe

#### Define Base Classifier

```python
{'n_layers': 9,
 'l_1': 290,
 'l_3': 230,
 'l_5': 70,
 'l_7': 28,
 'activation': 'tanh',
 'beta_1': 0.33005594239944347,
 'batch_size': 394,
 'n_estimators': 512,
 'solver': 'adam',
 'l_0': 535,
 'l_2': 233,
 'l_4': 126,
 'l_6': 79,
 'l_8': 27,
 'alpha': 0.0005037897305741738,
 'beta_2': 0.5311681482120456,
 'learning_rate_init': 2.485794716152278e-05,
 'power_t': 0.38,
 'm_class_method': 'ovr'}
```

In [107]:
model_params = {
    "activation": "tanh",
    "hidden_layer_sizes": (256, 128, 64, 32, 16, 8, 4),
    "early_stopping": True,
    "random_state": 42,
    "max_iter": 512,
    "batch_size": 1300,
    "solver": "adam",
    "validation_fraction": 0.1,
    "tol": 0.00001,
    "beta_2": 0.5311681482120456,
    "beta_1": 0.33005594239944347,
    "verbose": False,
    "alpha": 0.0005037897305741738,
    "learning_rate_init": 2.485794716152278e-05,
    "power_t": 0.38,
    "warm_start": False,
}
rbm = BernoulliRBM(
    n_components=15,
    # learning_rate=0.01,
    # batch_size=19,
    # n_iter=100,
    verbose=0,
    random_state=42,
)
clf = MLPClassifier(**model_params)

rbm_features_classifier = Pipeline(
    steps=[
        ("rbm", rbm),
        (
            "logistic",
            LogisticRegressionCV(
                max_iter=109000, random_state=43, scoring="f1_macro", n_jobs=-1
            ),
        ),
    ]
)
rbm_features_classifier = Pipeline(
    steps=[
        ("rbm", rbm),
        ("logistic", LogisticRegression(max_iter=109000, random_state=43)),
    ]
)
rbm_features_classifier = Pipeline(
    steps=[
        ("rbm", rbm),
        ("logistic", LogisticRegression(max_iter=109000, random_state=43)),
    ]
)
# rbm_features_classifier = Pipeline(steps=[("logistic",LogisticRegression(max_iter=109000,random_state=43))])
# rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic",clf)])
# clf = LogisticRegression(random_state=42)
# mclass_clf = OneVsRestClassifier(clf,n_jobs=-1)
# mclass_clf
rbm_wf = OneVsOneClassifier(rbm_features_classifier, n_jobs=-1)
rbm_wf

#### Load Data 

In [108]:
# Load Data
encoder = OneHotEncoder(sparse=False, drop="first")
ohe_nominal_data = encoder.fit_transform(
    nominal_data.drop(["nominal__v_12", "nominal__v_21"], axis=1)
)
target = final_data.target


def convert_to_dfs(X_train, X_test, y_train, y_test, encoder):
    columns = encoder.get_feature_names_out()
    X_train = pd.DataFrame(X_train, columns=columns)
    X_test = pd.DataFrame(X_test, columns=columns)
    y_train = pd.DataFrame(y_train, columns=["target"])
    y_test = pd.DataFrame(y_test, columns=["target"])
    return X_train, X_test, y_train, y_test


def load_data(ohe_nominal_data, target, encoder=encoder):
    X_train, X_test, y_train, y_test = train_test_split(
        ohe_nominal_data, target, stratify=target, test_size=0.2
    )
    X_train, X_test, y_train, y_test = convert_to_dfs(
        X_train, X_test, y_train, y_test, encoder=encoder
    )
    return X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = load_data(ohe_nominal_data, target)

In [121]:
from numba import jit

#### RBM Testing

In [326]:
%load_ext cython

The cython extension is already loaded. To reload it, use:
  %reload_ext cython


In [370]:
%%cython -+ 
from pymongo import MongoClient
import optuna
import os
os.environ['NEPTUNE_PROJECT']="mlop3n/SDP"
os.environ['NEPTUNE_NOTEBOOK_PATH']="PycharmProjects/sdpiit/notebooks/Pipeline_components_builder.ipynb"
import warnings
# from sklearnex import patch_sklearn

# patch_sklearn()
import numpy as np
import pandas as pd
from category_encoders import (
    BackwardDifferenceEncoder,
    BaseNEncoder,
    BinaryEncoder,
    CatBoostEncoder,
    CountEncoder,
    GLMMEncoder,
    HelmertEncoder,
    JamesSteinEncoder,
    LeaveOneOutEncoder,
    MEstimateEncoder,
    QuantileEncoder,
    SummaryEncoder,
    TargetEncoder,
    WOEEncoder,
)
from sklearn import set_config
from sklearn.base import clone as model_clone
from sklearn.cluster import *
from sklearn.compose import *
from sklearn.cross_decomposition import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.feature_selection import *
from sklearn.gaussian_process import *
from sklearn.linear_model import *
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.multioutput import *
from sklearn.multiclass import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.pipeline import *
from sklearn.preprocessing import *
from sklearn.svm import *
from sklearn.tree import *
from sklearn.utils import *
from sklearn.dummy import *
from sklearn.semi_supervised import *
from sklearn.discriminant_analysis import *
import sklearnex, daal4py

from tqdm import tqdm, trange
from xgboost import XGBClassifier, XGBRFClassifier
from BorutaShap import BorutaShap

from sklearn.calibration import *
pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 50
set_config(display="diagram")
warnings.filterwarnings("ignore")
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import parallel_backend
from joblib.memory import Memory

sns.set()
from pprint import pprint
from helpers import PolynomialWrapper as PWrapper
from helpers import NestedCVWrapper as NCVWrapper
from helpers import ColumnSelectors
import sklearn
from helpers import DFCollection
from helpers import plot_mean_std_max
from helpers import CustomMetrics
import gc

CACHE_DIR = Memory(location='../data/joblib_memory/')
OPTUNA_DB = "postgresql+psycopg2://postgres:302492@localhost:5433/optuna"
db = DFCollection()
column_selector = ColumnSelectors()
# classifiers = [f() for f in cls_names]
dtype_info = column_selector.dtype_info
ordinal = column_selector.ordinal_cols
nominal = column_selector.nominal_cols
binary = column_selector.binary_cols
ratio = column_selector.ratio_cols


final_data = db.final_data
final_pred_data = db.final_pred_data
baseline_prediction_data = db.baseline_prediction_data
data_logit = db.data_logits
prediction_data = db.prediction_data
master_data = db.master
given_data = db.data

ordinal_data, nominal_data, binary_data, ratio_data = db.categorise_data()
nominal_categories = db.nominal_categories
ordinal_categories = db.ordinal_categories
class_labels, n_classes, class_priors = class_distribution(final_data.target.to_numpy().reshape(-1,1))
def allow_stopping(func):
    def wrapper():
        try:
            value = func()
            return value
            # gc.collect()
        except KeyboardInterrupt as e:
            print("Program Stopped")
        gc.collect()
    return wrapper
model_params = {
    "activation": "tanh",
    "hidden_layer_sizes" : (256,128,64,32,16,8,4),
    "early_stopping": True,
    "random_state": 42,
    "max_iter":512,
    'batch_size': 1300,
    "solver": 'adam',
    "validation_fraction": 0.1,
    "tol": 0.00001,
    'beta_2': 0.5311681482120456,
    'beta_1': 0.33005594239944347,
    "verbose": False,
    'alpha': 0.0005037897305741738,
     'learning_rate_init': 2.485794716152278e-05,
     'power_t': 0.38,
    "warm_start": False,
}
# cpdef execute():
#     for i in range(10,50):
# rbm = BernoulliRBM(n_components=i,
#                    # learning_rate=0.01,
#                    # batch_size=19,
#                    # n_iter=100,
#                    verbose=0,
#                    random_state=42,)
# # clf = MLPClassifier(**model_params)

# rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic",LogisticRegressionCV(max_iter=109000,random_state=43,scoring='f1_macro',n_jobs=-1))])
# rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic",LogisticRegression(max_iter=109000,random_state=43))])
# rbm_features_classifier = Pipeline(steps=[("logistic",LogisticRegression(max_iter=109000,random_state=43))])
# rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic",clf)])
# clf = LogisticRegression(random_state=42)
# mclass_clf = OneVsRestClassifier(clf,n_jobs=-1)
# mclass_clf
# rbm_wf = OneVsOneClassifier(rbm_features_classifier,n_jobs=-1)
# Load Data
encoder = OneHotEncoder(sparse=False, drop='first')
ohe_nominal_data = encoder.fit_transform(
    nominal_data.drop(["nominal__v_12", "nominal__v_21"], axis=1))
target = final_data.target

# cpdef convert_to_dfs(X_train, X_test, y_train, y_test, encoder):
cpdef convert_to_dfs(X_train, X_test, y_train, y_test, encoder):
    columns = encoder.get_feature_names_out()
    X_train = pd.DataFrame(X_train, columns=columns)
    X_test = pd.DataFrame(X_test, columns=columns)
    y_train = pd.DataFrame(y_train, columns=["target"])
    y_test = pd.DataFrame(y_test, columns=["target"])
    return X_train, X_test, y_train, y_test

# cpdef load_data(ohe_nominal_data, target, encoder = encoder):
cpdef load_data(ohe_nominal_data, target, encoder = encoder):
    X_train, X_test, y_train, y_test = train_test_split(ohe_nominal_data, target, stratify=target, test_size=0.3)    
    X_train, X_test, y_train, y_test = convert_to_dfs(X_train, X_test, y_train, y_test, encoder=encoder)
    return X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test  = load_data(ohe_nominal_data, target)

cpdef run(start: int, stop: int, step: int):
# def run():
    for i in range(start,stop,step):
        rbm = BernoulliRBM(n_components=i,
                       # learning_rate=0.01,
                       # batch_size=19,
                       # n_iter=100,
                       verbose=0,
                       random_state=42,)
        rbm_clf = OneVsOneClassifier(LogisticRegressionCV(max_iter=109000,n_jobs=-1,fit_intercept=True,random_state=43),n_jobs=-1)
        rbm_clf = OneVsRestClassifier(RandomForestClassifier(n_jobs=-1,random_state=42),n_jobs=-1)
        # rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic",rbm_clf)])
        rbm_wf = Pipeline(steps=[("rbm", rbm), ("logistic",rbm_clf)])

        with parallel_backend('loky'):
            rbm_wf.fit(X_train, y_train)
            y_pred_test = rbm_wf.predict(X_test)
            y_pred_train = rbm_wf.predict(X_train)
            metric1 = f1_score(y_test, y_pred_test, average="macro", labels=[0, 1, 2])
            metric2 = f1_score(y_train, y_pred_train,
                               average="macro", labels=[0, 1, 2])

            print(metric1, metric2)

In [1]:
# %%cython -+
from pymongo import MongoClient
import optuna
import os

os.environ["NEPTUNE_PROJECT"] = "mlop3n/SDP"
os.environ[
    "NEPTUNE_NOTEBOOK_PATH"
] = "PycharmProjects/sdpiit/notebooks/Pipeline_components_builder.ipynb"
import warnings

# from sklearnex import patch_sklearn

# patch_sklearn()
import numpy as np
import pandas as pd
from category_encoders import (
    BackwardDifferenceEncoder,
    BaseNEncoder,
    BinaryEncoder,
    CatBoostEncoder,
    CountEncoder,
    GLMMEncoder,
    HelmertEncoder,
    JamesSteinEncoder,
    LeaveOneOutEncoder,
    MEstimateEncoder,
    QuantileEncoder,
    SummaryEncoder,
    TargetEncoder,
    WOEEncoder,
)
from sklearn import set_config
from sklearn.base import clone as model_clone
from sklearn.cluster import *
from sklearn.compose import *
from sklearn.cross_decomposition import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.feature_selection import *
from sklearn.gaussian_process import *
from sklearn.linear_model import *
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.multioutput import *
from sklearn.multiclass import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.neural_network import *
from sklearn.pipeline import *
from sklearn.preprocessing import *
from sklearn.svm import *
from sklearn.tree import *
from sklearn.utils import *
from sklearn.dummy import *
from sklearn.semi_supervised import *
from sklearn.discriminant_analysis import *
import sklearnex, daal4py

from tqdm import tqdm, trange
from xgboost import XGBClassifier, XGBRFClassifier
from BorutaShap import BorutaShap

from sklearn.calibration import *

pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 50
set_config(display="diagram")
warnings.filterwarnings("ignore")
import pickle
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns
from joblib import parallel_backend
from joblib.memory import Memory

sns.set()
from pprint import pprint
from helpers import PolynomialWrapper as PWrapper
from helpers import NestedCVWrapper as NCVWrapper
from helpers import ColumnSelectors
import sklearn
from helpers import DFCollection
from helpers import plot_mean_std_max
from helpers import CustomMetrics
import gc

CACHE_DIR = Memory(location="../data/joblib_memory/")
OPTUNA_DB = "postgresql+psycopg2://postgres:302492@localhost:5433/optuna"
db = DFCollection()
column_selector = ColumnSelectors()
# classifiers = [f() for f in cls_names]
dtype_info = column_selector.dtype_info
ordinal = column_selector.ordinal_cols
nominal = column_selector.nominal_cols
binary = column_selector.binary_cols
ratio = column_selector.ratio_cols


final_data = db.final_data
final_pred_data = db.final_pred_data
baseline_prediction_data = db.baseline_prediction_data
data_logit = db.data_logits
prediction_data = db.prediction_data
master_data = db.master
given_data = db.data

ordinal_data, nominal_data, binary_data, ratio_data = db.categorise_data()
nominal_categories = db.nominal_categories
ordinal_categories = db.ordinal_categories
class_labels, n_classes, class_priors = class_distribution(
    final_data.target.to_numpy().reshape(-1, 1)
)


def allow_stopping(func):
    def wrapper():
        try:
            value = func()
            return value
            # gc.collect()
        except KeyboardInterrupt as e:
            print("Program Stopped")
        gc.collect()

    return wrapper


encoder = OneHotEncoder(sparse=False, drop="first")
ohe_nominal_data = encoder.fit_transform(
    nominal_data.drop(["nominal__v_12", "nominal__v_21"], axis=1)
)
target = final_data.target

# cpdef convert_to_dfs(X_train, X_test, y_train, y_test, encoder):
def convert_to_dfs(X_train, X_test, y_train, y_test, encoder):
    columns = encoder.get_feature_names_out()
    X_train = pd.DataFrame(X_train, columns=columns)
    X_test = pd.DataFrame(X_test, columns=columns)
    y_train = pd.DataFrame(y_train, columns=["target"])
    y_test = pd.DataFrame(y_test, columns=["target"])
    return X_train, X_test, y_train, y_test


# cpdef load_data(ohe_nominal_data, target, encoder = encoder):
def load_data(ohe_nominal_data, target, encoder=encoder):
    X_train, X_test, y_train, y_test = train_test_split(
        ohe_nominal_data, target, stratify=target, test_size=0.3
    )
    X_train, X_test, y_train, y_test = convert_to_dfs(
        X_train, X_test, y_train, y_test, encoder=encoder
    )
    return X_train, X_test, y_train, y_test


X_train, X_test, y_train, y_test = load_data(ohe_nominal_data, target)

df_t = {}


def transformed_collecs(i: int):
    # for i in tqdm(range(start,stop,step)):
    global df_t
    rbm = BernoulliRBM(
        n_components=i,
        verbose=0,
        random_state=42,
    )
    X_train_t = rbm.fit_transform(
        X_train,
        y_train,
    )
    X_test_t = rbm.transform(X_test)
    X_train_t_df = pd.DataFrame(
        X_train_t, columns=rbm.get_feature_names_out(), index=X_train.index
    )
    X_test_t_df = pd.DataFrame(
        X_test_t, columns=rbm.get_feature_names_out(), index=X_test.index
    )
    df_t[i] = (X_train_t_df, X_test_t_df)
    # return df_t


# payload = transformed_collecs(7,400,1)
import joblib
from joblib import Parallel, delayed, parallel_backend
from tqdm import trange

with parallel_backend("threading"):
    results = Parallel(n_jobs=-1)(
        delayed(transformed_collecs)(j) for j in trange(7, 400, 1)
    )
payload = df_t
joblib.dump(
    payload,
    "../data/rbm_transforms.pkl",
    compress="lzma",
    protocol=pickle.HIGHEST_PROTOCOL,
)


def run(payload=payload):
    # def run():
    corrs_test = {}
    corrs_train = {}
    for i in payload:
        X_train_t_df, X_test_t_df = payload[i]

        corrs_test[i] = [
            X_test_t_df.corrwith(y_test, method="kendall"),
            X_test_t_df.corrwith(y_test, method="pearson"),
            X_test_t_df.corrwith(y_test, method="spearman"),
        ]
        corrs_train[i] = [
            X_train_t_df.corrwith(y_train, method="kendall"),
            X_train_t_df.corrwith(y_train, method="pearson"),
            X_train_t_df.corrwith(y_train, method="spearman"),
        ]

        return corrs_test, corrs_train

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 393/393 [1:22:21<00:00, 12.57s/it]


Unnamed: 0,bernoullirbm0,bernoullirbm1,bernoullirbm2,bernoullirbm3,bernoullirbm4,bernoullirbm5,bernoullirbm6,bernoullirbm7,bernoullirbm8,bernoullirbm9,bernoullirbm10,bernoullirbm11,bernoullirbm12,bernoullirbm13,bernoullirbm14,bernoullirbm15,bernoullirbm16,bernoullirbm17,bernoullirbm18,bernoullirbm19,bernoullirbm20,bernoullirbm21,bernoullirbm22,bernoullirbm23,bernoullirbm24,bernoullirbm25,bernoullirbm26,bernoullirbm27,bernoullirbm28,bernoullirbm29,bernoullirbm30,bernoullirbm31,bernoullirbm32,bernoullirbm33,bernoullirbm34,bernoullirbm35,bernoullirbm36,bernoullirbm37,bernoullirbm38,bernoullirbm39,bernoullirbm40,bernoullirbm41,bernoullirbm42,bernoullirbm43,bernoullirbm44,bernoullirbm45,bernoullirbm46,bernoullirbm47,bernoullirbm48,bernoullirbm49
0,0.825472,0.608966,0.798992,0.774390,0.830711,0.749698,0.856484,0.779862,0.864356,0.867819,0.798058,0.299421,0.881988,0.743503,0.799799,0.891836,0.825457,0.718358,0.801489,0.960058,0.747026,0.841879,0.716211,0.758597,0.803330,0.793328,0.899029,0.713150,0.741093,0.771880,0.765416,0.735979,0.873624,0.821118,0.782969,0.832031,0.670217,0.803215,0.844529,0.857595,0.871091,0.739720,0.863660,0.743367,0.766235,0.780084,0.845985,0.890287,0.883802,0.809965
1,0.743103,0.690061,0.730932,0.721668,0.746688,0.711166,0.751939,0.708754,0.751453,0.738091,0.720981,0.607389,0.737422,0.696702,0.722265,0.733562,0.729793,0.702526,0.722484,0.801794,0.711909,0.755495,0.701241,0.736257,0.736101,0.730511,0.739684,0.701281,0.734119,0.725847,0.716003,0.705702,0.728293,0.733875,0.734693,0.738132,0.680491,0.711841,0.737925,0.738036,0.734062,0.711877,0.743573,0.701711,0.734791,0.692757,0.735904,0.742348,0.738192,0.740315
2,0.823888,0.724630,0.818790,0.799370,0.824357,0.798775,0.823200,0.806903,0.828466,0.841347,0.814065,0.548873,0.843411,0.792736,0.816787,0.839940,0.820978,0.767521,0.817562,0.850620,0.790894,0.823296,0.773860,0.791779,0.813262,0.810285,0.823461,0.783645,0.788262,0.796945,0.801913,0.797448,0.850129,0.819731,0.805315,0.827443,0.754653,0.818600,0.838282,0.829130,0.829583,0.790544,0.833208,0.778244,0.787380,0.809522,0.829302,0.796430,0.834189,0.815977
3,0.779772,0.732357,0.775798,0.755117,0.779050,0.757169,0.786755,0.766147,0.788296,0.789118,0.772072,0.651963,0.784542,0.753902,0.762334,0.776061,0.786429,0.755741,0.766782,0.817536,0.757102,0.789724,0.747751,0.771465,0.777034,0.776349,0.767570,0.750285,0.769588,0.772969,0.766712,0.751647,0.777931,0.776201,0.778213,0.784888,0.740327,0.768754,0.779922,0.784017,0.783146,0.753901,0.784766,0.753787,0.780591,0.746942,0.785345,0.776011,0.777791,0.780189
4,0.808482,0.906626,0.843214,0.869831,0.789465,0.887802,0.637391,0.870334,0.475898,0.703935,0.851573,0.925988,0.577993,0.886502,0.864503,0.342013,0.808259,0.892648,0.849523,0.010393,0.880954,0.698688,0.898255,0.859799,0.836737,0.856339,0.234758,0.895732,0.874078,0.857269,0.867674,0.883294,0.667484,0.804763,0.855507,0.772608,0.906087,0.855820,0.769435,0.735344,0.469714,0.888762,0.688093,0.891402,0.850544,0.879263,0.758201,0.134694,0.391428,0.828017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1134,0.799516,0.682348,0.795363,0.772212,0.800796,0.756506,0.822282,0.775466,0.837284,0.820533,0.778968,0.500582,0.834754,0.741697,0.784122,0.848630,0.800728,0.745071,0.783302,0.901789,0.742568,0.824573,0.730686,0.762784,0.796518,0.774389,0.851398,0.734057,0.761130,0.762132,0.760488,0.753605,0.825667,0.790136,0.770526,0.815060,0.697080,0.781868,0.804024,0.821965,0.840871,0.754924,0.821983,0.740100,0.781250,0.748716,0.810337,0.837423,0.838327,0.785962
1135,0.837271,0.534949,0.822503,0.767639,0.858538,0.751900,0.895097,0.785434,0.912387,0.878676,0.817174,0.185969,0.899859,0.682630,0.801447,0.925931,0.844913,0.723531,0.803238,0.987705,0.725893,0.890451,0.672271,0.762571,0.813495,0.779255,0.936078,0.680004,0.733116,0.772395,0.756220,0.734361,0.892071,0.839653,0.755984,0.863993,0.589930,0.795186,0.863122,0.879480,0.918094,0.719529,0.885686,0.706308,0.779971,0.723892,0.871905,0.942069,0.918849,0.813543
1136,0.821473,0.645394,0.788791,0.748073,0.825042,0.735999,0.846077,0.759491,0.866888,0.847126,0.779273,0.412529,0.853472,0.722980,0.776565,0.875622,0.816581,0.697073,0.782381,0.961668,0.729965,0.841463,0.698837,0.767936,0.784183,0.778375,0.889057,0.702512,0.756015,0.788700,0.760251,0.729622,0.842051,0.816071,0.782155,0.817061,0.658359,0.766022,0.823686,0.836270,0.872383,0.736698,0.847189,0.698844,0.785006,0.727349,0.827967,0.897244,0.874095,0.796297
1137,0.811351,0.636004,0.793719,0.775427,0.819633,0.753330,0.842186,0.781432,0.848972,0.852041,0.801737,0.348662,0.865468,0.743092,0.803560,0.869636,0.822394,0.739916,0.796205,0.939770,0.753489,0.833427,0.729923,0.763320,0.804554,0.793300,0.872692,0.715637,0.750508,0.772257,0.763391,0.742583,0.856835,0.816871,0.776505,0.838916,0.667112,0.799030,0.834118,0.845588,0.855285,0.752759,0.849893,0.747147,0.770184,0.771403,0.836992,0.866727,0.861378,0.808901
