In [3]:
import pickle
import warnings

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm
from xgboost import XGBClassifier

from config import *
from preprocessing import PREPROCESSING_CONFIGURATIONS

warnings.simplefilter(action="ignore", category=Warning)


def _generate_config_key(config):
    return " + ".join(func.__name__ for func in config)


def _prepare_data(
    datapath,
    target_label,
    columns_to_ignore=None,
    labels_to_ignore=None,
    test_size=0.2,
):
    # Read the dataset
    df = pd.read_csv(datapath)

    # Drop specified columns
    if columns_to_ignore:
        df.drop(columns=columns_to_ignore, inplace=True)

    # Drop rows with invalid categories in the target label
    if labels_to_ignore:
        df = df[~df[target_label].isin(labels_to_ignore)]

    # Check if target variable is categorical and convert to numerical if true
    if df[target_label].dtype == "object":
        le = LabelEncoder()
        df[target_label] = le.fit_transform(df[target_label])

    # Split features and target variable
    X = df.drop(columns=target_label)
    y = df[target_label]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42, stratify=y
    )

    return X_train, y_train, X_test, y_test


def _apply_techniques(config, X_train_, y_train, X_test, y_test):
    outliers_detection_technique = config[0]
    features_selection_technique = config[1]
    oversampling_technique = config[2]

    # Apply outlier removal
    X_train_, y_train = outliers_detection_technique(X_train_, y_train)

    # Apply features selection
    X_train_, y_train, X_test, y_test = features_selection_technique(
        X_train_, y_train, X_test, y_test
    )

    # Apply oversampling
    X_train_, y_train = oversampling_technique(X_train_, y_train)

    return X_train_, y_train, X_test, y_test


def get_data_for_config(
    config,
    datapath,
    target,
    columns_to_ignore=None,
    labels_to_ignore=None,
    test_size=0.2,
):
    X_train, y_train, X_test, y_test = _prepare_data(
        datapath=datapath,
        columns_to_ignore=columns_to_ignore,
        target_label=target,
        labels_to_ignore=labels_to_ignore,
        test_size=test_size,
    )
    X_train, y_train, X_test, y_test = _apply_techniques(
        config, X_train, y_train, X_test, y_test
    )
    # print(f"Size of X_train: {X_train.shape}")
    # print(f"Size of y_train: {y_train.shape}")
    # print(f"Size of X_test: {X_test.shape}")
    # print(f"Size of y_test: {y_test.shape}")
    # # Printing proportions
    # train_counts = y_train.value_counts(normalize=True)
    # test_counts = y_test.value_counts(normalize=True)

    # print(
    #     f"Proportion in y_train (positive:negative): {train_counts.get(1, 0):.2f} : {train_counts.get(0, 0):.2f}"
    # )
    # print(
    #     f"Proportion in y_test (positive:negative): {test_counts.get(1, 0):.2f} : {test_counts.get(0, 0):.2f}"
    # )
    return X_train, y_train, X_test, y_test

In [4]:
# Define the metrics
def compute_metrics(y_true, y_pred):
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "confusion_matrix": confusion_matrix(y_true, y_pred),
    }


# Training function
def train_classifiers(X_train, y_train, X_test, y_test, config_key):
    results = {}
    for classifier_name, classifier_info in classifiers.items():
        try:
            clf = classifier_info["model"]
            clf.fit(X_train, y_train)
            predictions = clf.predict(X_test)
            metrics = compute_metrics(y_test, predictions)

            results[classifier_name] = {
                "metrics": metrics,
                "data": {
                    "X_train": X_train,
                    "y_train": y_train,
                    "X_test": X_test,
                    "y_test": y_test,
                    "y_pred": predictions,
                },
            }
        except Exception as e:
            print(
                f"{classifier_name} failed to train with configuration {config_key} because: {e}"
            )
    return results


classifiers = {
    "RandomForest": {
        "model": RandomForestClassifier(),
    },
    "K-nearest-neighbor": {
        "model": KNeighborsClassifier(),
    },
    "Artificial Neural Network": {
        "model": MLPClassifier(),
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
    },
    "Logistic Regression": {
        "model": LogisticRegression(),
    },
    "Support Vector Machine": {
        "model": SVC(),  # https: //www.kaggle.com/code/sunayanagawde/ml-algorithms-usage-and-prediction?scriptVersionId=120249289&cellId=62
    },
    "Naive Bayes": {
        "model": GaussianNB(),
    },
    "XG-boost": {
        "model": XGBClassifier(),
    },
}

kaggle = (
    "kaggle",
    KAGGLE_DATA_PATH,
    KAGGLE_IGNORED_FEAT,
    KAGGLE_IGNORED_LABELS,
    KAGGLE_TARGET,
    0.8,
)
moodle = (
    "moodle",
    MOODLE_DATA_PATH,
    MOODLE_IGNORED_FEAT,
    MOODLE_IGNORED_LABELS,
    MOODLE_TARGET,
    0.2,
)
student_pred = (
    "student_pred",
    STUDENT_PRED_PATH,
    STUDENT_PRED_IGNORED_FEAT,
    STUDENT_PRED_IGNORED_LABELS,
    STUDENT_PRED_TARGET,
    0.2,
)
datasets = [
    kaggle,
    moodle,
    student_pred,
]

# Main dictionary to store results
all_results = {}

for dataset in datasets:
    dataset_name = dataset[0]
    datapath = dataset[1]
    columns_to_ignore = dataset[2]
    labels_to_ignore = dataset[3]
    target_label = dataset[4]
    test_size = dataset[5]
    for config in tqdm(PREPROCESSING_CONFIGURATIONS, desc=f"Dataset: {dataset_name}"):
        config_key = _generate_config_key(config)
        try:
            X_train, y_train, X_test, y_test = get_data_for_config(
                config,
                datapath=datapath,
                target=target_label,
                columns_to_ignore=columns_to_ignore,
                labels_to_ignore=labels_to_ignore,
                test_size=test_size,
            )
            all_results[config_key] = train_classifiers(
                X_train, y_train, X_test, y_test, config_key
            )
        except Exception as e:
            print(f"{config_key} is invalid for dataset {dataset_name} because: {e}")
        # Serialize results
        with open(f"all_results_{dataset_name}.pkl", "wb") as f:
            pickle.dump(all_results, f)

Dataset: kaggle:   0%|          | 0/180 [00:00<?, ?it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:   1%|          | 1/180 [00:01<03:15,  1.09s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:   1%|          | 2/180 [00:02<03:29,  1.18s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:   2%|▏         | 3/180 [00:03<03:35,  1.22s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:   2%|▏         | 4/180 [00:04<03:35,  1.23s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:   3%|▎         | 5/180 [00:06<03:34,  1.23s/it]

remove_outliers_none + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:   7%|▋         | 12/180 [00:12<02:31,  1.11it/s]

remove_outliers_none + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  10%|█         | 18/180 [00:18<02:25,  1.11it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  11%|█         | 19/180 [00:20<03:13,  1.20s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  11%|█         | 20/180 [00:22<03:51,  1.45s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  12%|█▏        | 21/180 [00:24<04:19,  1.63s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  12%|█▏        | 22/180 [00:26<04:35,  1.74s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  13%|█▎        | 24/180 [00:29<04:03,  1.56s/it]

remove_outliers_none + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  14%|█▍        | 25/180 [00:30<03:40,  1.42s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  14%|█▍        | 26/180 [00:32<03:30,  1.37s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  15%|█▌        | 27/180 [00:33<03:26,  1.35s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  16%|█▌        | 28/180 [00:34<03:22,  1.33s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  16%|█▌        | 29/180 [00:35<03:16,  1.30s/it]

remove_outliers_none + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  17%|█▋        | 31/180 [00:37<02:24,  1.03it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  18%|█▊        | 32/180 [00:38<02:37,  1.07s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  18%|█▊        | 33/180 [00:39<02:47,  1.14s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  19%|█▉        | 34/180 [00:41<02:53,  1.19s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  20%|██        | 36/180 [00:42<02:13,  1.08it/s]

remove_outliers_isolation_forest + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  23%|██▎       | 42/180 [00:49<02:17,  1.00it/s]

remove_outliers_isolation_forest + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  27%|██▋       | 48/180 [00:56<02:12,  1.00s/it]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  27%|██▋       | 49/180 [00:58<02:47,  1.28s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  28%|██▊       | 50/180 [01:00<03:13,  1.49s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  28%|██▊       | 51/180 [01:02<03:32,  1.65s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  29%|██▉       | 52/180 [01:04<03:48,  1.78s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  30%|███       | 54/180 [01:08<03:34,  1.70s/it]

remove_outliers_isolation_forest + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  31%|███       | 55/180 [01:09<03:14,  1.55s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  31%|███       | 56/180 [01:10<03:05,  1.50s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  32%|███▏      | 57/180 [01:12<02:59,  1.46s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  32%|███▏      | 58/180 [01:13<02:52,  1.42s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  33%|███▎      | 60/180 [01:14<02:03,  1.03s/it]

remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  34%|███▍      | 61/180 [01:15<02:01,  1.02s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  34%|███▍      | 62/180 [01:16<02:03,  1.04s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  35%|███▌      | 63/180 [01:18<02:05,  1.07s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  36%|███▌      | 64/180 [01:19<02:06,  1.09s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  36%|███▌      | 65/180 [01:20<02:06,  1.10s/it]

remove_outliers_zscore + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  40%|████      | 72/180 [01:26<01:29,  1.20it/s]

remove_outliers_zscore + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  43%|████▎     | 78/180 [01:32<01:30,  1.13it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  44%|████▍     | 79/180 [01:34<01:56,  1.16s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  44%|████▍     | 80/180 [01:36<02:16,  1.37s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  45%|████▌     | 81/180 [01:38<02:31,  1.53s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  46%|████▌     | 82/180 [01:39<02:41,  1.64s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  47%|████▋     | 84/180 [01:42<02:20,  1.46s/it]

remove_outliers_zscore + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  47%|████▋     | 85/180 [01:43<02:05,  1.32s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  48%|████▊     | 86/180 [01:44<01:59,  1.28s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  48%|████▊     | 87/180 [01:46<01:56,  1.25s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  49%|████▉     | 88/180 [01:47<01:53,  1.23s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  50%|█████     | 90/180 [01:48<01:20,  1.12it/s]

remove_outliers_zscore + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  51%|█████     | 91/180 [01:49<01:15,  1.17it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  51%|█████     | 92/180 [01:50<01:15,  1.17it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  52%|█████▏    | 93/180 [01:50<01:10,  1.24it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  52%|█████▏    | 94/180 [01:51<01:11,  1.20it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  53%|█████▎    | 96/180 [01:52<00:51,  1.62it/s]

remove_outliers_iqr + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  57%|█████▋    | 102/180 [01:57<00:54,  1.42it/s]

remove_outliers_iqr + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  60%|██████    | 108/180 [02:02<00:47,  1.51it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  61%|██████    | 109/180 [02:03<00:53,  1.34it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  61%|██████    | 110/180 [02:04<01:05,  1.07it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  62%|██████▏   | 111/180 [02:05<01:11,  1.03s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  62%|██████▏   | 112/180 [02:07<01:15,  1.11s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  63%|██████▎   | 114/180 [02:09<01:08,  1.03s/it]

remove_outliers_iqr + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  64%|██████▍   | 115/180 [02:09<00:59,  1.09it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  64%|██████▍   | 116/180 [02:10<00:57,  1.12it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  65%|██████▌   | 117/180 [02:11<00:54,  1.15it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  66%|██████▌   | 118/180 [02:12<00:53,  1.15it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  67%|██████▋   | 120/180 [02:13<00:38,  1.54it/s]

remove_outliers_iqr + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
RandomForest failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RandomForestClassifier.
K-nearest-neighbor failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by KNeighborsClassifier.
Artificial Neural Network failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by MLPClassifier.
Decision Tree failed to train with configuration remove_outliers_dbscan + 

Dataset: kaggle:  68%|██████▊   | 122/180 [02:13<00:23,  2.44it/s]

remove_outliers_dbscan + features_selection_none + oversampling_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by SMOTE.
remove_outliers_dbscan + features_selection_none + oversampling_svm_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by SVMSMOTE.


Dataset: kaggle:  69%|██████▉   | 124/180 [02:14<00:16,  3.39it/s]

remove_outliers_dbscan + features_selection_none + oversampling_adasyn is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ADASYN.
remove_outliers_dbscan + features_selection_none + oversampling_smote_borderline is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by BorderlineSMOTE.


Dataset: kaggle:  70%|███████   | 126/180 [02:14<00:12,  4.42it/s]

remove_outliers_dbscan + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: Found array with 0 sample(s) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_extratrees + oversampling_none is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.


Dataset: kaggle:  71%|███████   | 128/180 [02:14<00:11,  4.68it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_extratrees + oversampling_svm_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.


Dataset: kaggle:  72%|███████▏  | 130/180 [02:15<00:09,  5.01it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_adasyn is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.


Dataset: kaggle:  73%|███████▎  | 132/180 [02:15<00:09,  5.08it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_none is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.


Dataset: kaggle:  74%|███████▍  | 134/180 [02:16<00:08,  5.14it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.


Dataset: kaggle:  76%|███████▌  | 136/180 [02:16<00:07,  5.56it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_adasyn is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.


Dataset: kaggle:  77%|███████▋  | 138/180 [02:16<00:07,  5.82it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_rfe + oversampling_none is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.


Dataset: kaggle:  77%|███████▋  | 139/180 [02:16<00:06,  5.93it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.


Dataset: kaggle:  78%|███████▊  | 141/180 [02:17<00:07,  5.42it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_svm_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_adasyn is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.


Dataset: kaggle:  79%|███████▉  | 143/180 [02:17<00:06,  5.99it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_smote_borderline is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.


Dataset: kaggle:  81%|████████  | 145/180 [02:17<00:05,  6.25it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_none is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.


Dataset: kaggle:  82%|████████▏ | 147/180 [02:18<00:05,  6.06it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.


Dataset: kaggle:  83%|████████▎ | 149/180 [02:18<00:05,  6.06it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.


Dataset: kaggle:  83%|████████▎ | 150/180 [02:18<00:04,  6.10it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  84%|████████▍ | 151/180 [02:20<00:15,  1.84it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  84%|████████▍ | 152/180 [02:21<00:21,  1.28it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  85%|████████▌ | 153/180 [02:22<00:25,  1.05it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  86%|████████▌ | 154/180 [02:24<00:28,  1.09s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  87%|████████▋ | 156/180 [02:25<00:21,  1.14it/s]

remove_outliers_lof + features_selection_none + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  90%|█████████ | 162/180 [02:33<00:18,  1.02s/it]

remove_outliers_lof + features_selection_extratrees + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: kaggle:  93%|█████████▎| 168/180 [02:40<00:12,  1.05s/it]

remove_outliers_lof + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  94%|█████████▍| 169/180 [02:42<00:14,  1.35s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  94%|█████████▍| 170/180 [02:44<00:15,  1.56s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  95%|█████████▌| 171/180 [02:46<00:15,  1.77s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  96%|█████████▌| 172/180 [02:48<00:15,  1.89s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  97%|█████████▋| 174/180 [02:52<00:10,  1.73s/it]

remove_outliers_lof + features_selection_rfe + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  97%|█████████▋| 175/180 [02:53<00:07,  1.58s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  98%|█████████▊| 176/180 [02:54<00:06,  1.51s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  98%|█████████▊| 177/180 [02:56<00:04,  1.51s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle:  99%|█████████▉| 178/180 [02:57<00:03,  1.51s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


Dataset: kaggle: 100%|██████████| 180/180 [02:59<00:00,  1.00it/s]


remove_outliers_lof + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset kaggle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:   3%|▎         | 6/180 [00:02<01:03,  2.74it/s]

remove_outliers_none + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:   7%|▋         | 12/180 [00:06<01:27,  1.92it/s]

remove_outliers_none + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
remove_outliers_none + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:   8%|▊         | 14/180 [00:06<00:54,  3.05it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_none + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:   9%|▉         | 16/180 [00:06<00:37,  4.33it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_none + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  10%|█         | 18/180 [00:07<00:29,  5.53it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  13%|█▎        | 24/180 [00:10<01:21,  1.91it/s]

remove_outliers_none + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  17%|█▋        | 30/180 [00:13<00:57,  2.59it/s]

remove_outliers_none + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  20%|██        | 36/180 [00:16<01:04,  2.24it/s]

remove_outliers_isolation_forest + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  23%|██▎       | 42/180 [00:20<01:28,  1.55it/s]

remove_outliers_isolation_forest + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  24%|██▍       | 43/180 [00:21<01:10,  1.93it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  24%|██▍       | 44/180 [00:21<00:58,  2.34it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  25%|██▌       | 45/180 [00:21<00:49,  2.70it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  26%|██▌       | 46/180 [00:21<00:44,  3.00it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  26%|██▌       | 47/180 [00:22<00:39,  3.38it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  27%|██▋       | 48/180 [00:22<00:35,  3.68it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  30%|███       | 54/180 [00:26<01:19,  1.59it/s]

remove_outliers_isolation_forest + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  33%|███▎      | 60/180 [00:29<00:53,  2.26it/s]

remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  36%|███▌      | 65/180 [00:32<00:57,  2.02it/s]

remove_outliers_zscore + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  40%|████      | 72/180 [00:35<00:53,  2.03it/s]

remove_outliers_zscore + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
remove_outliers_zscore + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_zscore + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  42%|████▏     | 75/180 [00:36<00:29,  3.61it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_zscore + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  43%|████▎     | 78/180 [00:36<00:19,  5.29it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_zscore + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  47%|████▋     | 84/180 [00:40<00:53,  1.78it/s]

remove_outliers_zscore + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  49%|████▉     | 89/180 [00:43<00:46,  1.97it/s]

remove_outliers_zscore + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  53%|█████▎    | 95/180 [00:45<00:37,  2.28it/s]

remove_outliers_iqr + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  57%|█████▋    | 102/180 [00:48<00:36,  2.13it/s]

remove_outliers_iqr + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
remove_outliers_iqr + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_iqr + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  58%|█████▊    | 105/180 [00:49<00:18,  3.97it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_iqr + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_iqr + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  59%|█████▉    | 107/180 [00:49<00:13,  5.44it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  63%|██████▎   | 114/180 [00:53<00:33,  1.97it/s]

remove_outliers_iqr + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  66%|██████▌   | 119/180 [00:55<00:26,  2.30it/s]

remove_outliers_iqr + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  68%|██████▊   | 123/180 [00:56<00:12,  4.48it/s]

remove_outliers_dbscan + features_selection_none + oversampling_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_none + oversampling_svm_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_none + oversampling_adasyn is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  69%|██████▉   | 125/180 [00:56<00:10,  5.41it/s]

remove_outliers_dbscan + features_selection_none + oversampling_smote_borderline is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  71%|███████   | 128/180 [00:57<00:11,  4.70it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  72%|███████▏  | 129/180 [00:57<00:10,  4.66it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_svm_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  72%|███████▏  | 130/180 [00:57<00:10,  4.58it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_adasyn is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  73%|███████▎  | 131/180 [00:57<00:10,  4.51it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  73%|███████▎  | 132/180 [00:57<00:10,  4.57it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  74%|███████▍  | 134/180 [00:58<00:09,  5.02it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  76%|███████▌  | 136/180 [00:58<00:08,  5.29it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  77%|███████▋  | 138/180 [00:59<00:09,  4.67it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  78%|███████▊  | 140/180 [00:59<00:10,  3.68it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  78%|███████▊  | 141/180 [00:59<00:09,  3.94it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_svm_smote is invalid for dataset moodle because: Found array with 0 sample(s) (shape=(0, 4)) while a minimum of 1 is required by NearestNeighbors.


Dataset: moodle:  79%|███████▉  | 142/180 [01:00<00:09,  4.04it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_adasyn is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  80%|████████  | 144/180 [01:00<00:10,  3.52it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  82%|████████▏ | 147/180 [01:01<00:06,  5.16it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11


Dataset: moodle:  83%|████████▎ | 149/180 [01:01<00:04,  6.87it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset moodle because: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 11
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  86%|████████▌ | 155/180 [01:04<00:09,  2.69it/s]

remove_outliers_lof + features_selection_none + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle:  90%|█████████ | 162/180 [01:07<00:08,  2.22it/s]

remove_outliers_lof + features_selection_extratrees + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
remove_outliers_lof + features_selection_mutual_information + oversampling_none is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  92%|█████████▏| 165/180 [01:07<00:03,  4.14it/s]

remove_outliers_lof + features_selection_mutual_information + oversampling_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_lof + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_lof + features_selection_mutual_information + oversampling_adasyn is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  93%|█████████▎| 167/180 [01:07<00:02,  5.60it/s]

remove_outliers_lof + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.
remove_outliers_lof + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset moodle because: k should be <= n_features = 9; got 10. Use k='all' to return all features.


Dataset: moodle:  97%|█████████▋| 174/180 [01:12<00:03,  1.81it/s]

remove_outliers_lof + features_selection_rfe + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: moodle: 100%|██████████| 180/180 [01:14<00:00,  2.41it/s]


remove_outliers_lof + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset moodle because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


Dataset: student_pred:   1%|          | 2/180 [00:00<00:10, 16.87it/s]

remove_outliers_none + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:   2%|▏         | 4/180 [00:00<00:10, 16.74it/s]

remove_outliers_none + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:   3%|▎         | 6/180 [00:00<00:10, 16.93it/s]

remove_outliers_none + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:   4%|▍         | 8/180 [00:00<00:10, 17.14it/s]

remove_outliers_none + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:   7%|▋         | 12/180 [00:00<00:11, 14.38it/s]

remove_outliers_none + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:   9%|▉         | 16/180 [00:01<00:10, 15.70it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  11%|█         | 20/180 [00:01<00:09, 17.00it/s]

remove_outliers_none + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  12%|█▏        | 22/180 [00:01<00:09, 17.51it/s]

remove_outliers_none + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  13%|█▎        | 24/180 [00:01<00:11, 13.92it/s]

remove_outliers_none + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  14%|█▍        | 26/180 [00:01<00:10, 14.80it/s]

remove_outliers_none + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  17%|█▋        | 30/180 [00:01<00:09, 16.28it/s]

remove_outliers_none + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_none + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  19%|█▉        | 34/180 [00:02<00:08, 17.21it/s]

remove_outliers_isolation_forest + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  21%|██        | 38/180 [00:02<00:09, 15.06it/s]

remove_outliers_isolation_forest + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  23%|██▎       | 42/180 [00:02<00:08, 16.86it/s]

remove_outliers_isolation_forest + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  26%|██▌       | 47/180 [00:02<00:07, 18.18it/s]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  27%|██▋       | 49/180 [00:03<00:07, 18.59it/s]

remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  31%|███       | 55/180 [00:03<00:07, 16.36it/s]

remove_outliers_isolation_forest + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  32%|███▏      | 57/180 [00:03<00:07, 17.00it/s]

remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  34%|███▍      | 61/180 [00:03<00:06, 17.49it/s]

remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  36%|███▌      | 65/180 [00:04<00:08, 14.20it/s]

remove_outliers_zscore + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  37%|███▋      | 67/180 [00:04<00:07, 15.22it/s]

remove_outliers_zscore + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  39%|███▉      | 71/180 [00:04<00:07, 15.46it/s]

remove_outliers_zscore + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  42%|████▏     | 75/180 [00:04<00:06, 17.11it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  44%|████▍     | 79/180 [00:04<00:07, 13.94it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  45%|████▌     | 81/180 [00:05<00:06, 15.28it/s]

remove_outliers_zscore + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  47%|████▋     | 85/180 [00:05<00:06, 15.64it/s]

remove_outliers_zscore + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  49%|████▉     | 89/180 [00:05<00:05, 17.26it/s]

remove_outliers_zscore + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_zscore + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  51%|█████     | 92/180 [00:05<00:06, 14.31it/s]

remove_outliers_iqr + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  53%|█████▎    | 96/180 [00:06<00:05, 15.76it/s]

remove_outliers_iqr + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  56%|█████▌    | 100/180 [00:06<00:04, 16.65it/s]

remove_outliers_iqr + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  58%|█████▊    | 104/180 [00:06<00:04, 17.07it/s]

remove_outliers_iqr + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  59%|█████▉    | 106/180 [00:06<00:04, 17.27it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  61%|██████    | 110/180 [00:06<00:04, 14.42it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  63%|██████▎   | 114/180 [00:07<00:04, 15.48it/s]

remove_outliers_iqr + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  64%|██████▍   | 116/180 [00:07<00:04, 15.80it/s]

remove_outliers_iqr + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  67%|██████▋   | 120/180 [00:07<00:03, 16.70it/s]

remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_iqr + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  69%|██████▉   | 124/180 [00:07<00:03, 14.81it/s]

remove_outliers_dbscan + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  71%|███████   | 128/180 [00:08<00:03, 16.21it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  73%|███████▎  | 132/180 [00:08<00:02, 16.69it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  74%|███████▍  | 134/180 [00:08<00:02, 17.22it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  77%|███████▋  | 138/180 [00:08<00:03, 13.90it/s]

remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  79%|███████▉  | 142/180 [00:08<00:02, 15.68it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  81%|████████  | 146/180 [00:09<00:02, 16.01it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  82%|████████▏ | 148/180 [00:09<00:01, 16.55it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  84%|████████▍ | 152/180 [00:09<00:02, 13.86it/s]

remove_outliers_lof + features_selection_none + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_none + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_none + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_none + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  87%|████████▋ | 156/180 [00:09<00:01, 15.77it/s]

remove_outliers_lof + features_selection_none + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_none + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_extratrees + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_extratrees + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  89%|████████▉ | 160/180 [00:10<00:01, 15.58it/s]

remove_outliers_lof + features_selection_extratrees + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_extratrees + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_extratrees + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_extratrees + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  91%|█████████ | 164/180 [00:10<00:00, 16.43it/s]

remove_outliers_lof + features_selection_mutual_information + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_mutual_information + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_mutual_information + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  93%|█████████▎| 168/180 [00:10<00:00, 13.69it/s]

remove_outliers_lof + features_selection_mutual_information + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_mutual_information + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_mutual_information + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  94%|█████████▍| 170/180 [00:10<00:00, 14.58it/s]

remove_outliers_lof + features_selection_rfe + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_rfe + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_rfe + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_rfe + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  97%|█████████▋| 174/180 [00:11<00:00, 15.70it/s]

remove_outliers_lof + features_selection_rfe + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_rfe + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_variance_threshold + oversampling_none is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_variance_threshold + oversampling_smote is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred:  99%|█████████▉| 178/180 [00:11<00:00, 14.97it/s]

remove_outliers_lof + features_selection_variance_threshold + oversampling_svm_smote is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_variance_threshold + oversampling_adasyn is invalid for dataset student_pred because: Input y contains NaN.
remove_outliers_lof + features_selection_variance_threshold + oversampling_smote_borderline is invalid for dataset student_pred because: Input y contains NaN.


Dataset: student_pred: 100%|██████████| 180/180 [00:11<00:00, 15.52it/s]

remove_outliers_lof + features_selection_variance_threshold + oversampling_nc_smote is invalid for dataset student_pred because: Input y contains NaN.





# TO-DO

- implementar cross-validation
- limpiar los NaN de los datasets
- en feat_selection se usa k=10. cambiar esto a tomar los mas relevantes calculando k en funcion de cada escenario Y/O hacer k = min(n_feat, 10) pq hay datasets que tienen menos de 10 feats
- añadir modelos: algo de ensemble (boosting y bagging) y red neuronal mas turbia
- graficas para los mejores resultados de cada dataset
- graficas de aprendizaje
- añadir roc_auc a las metricas