In [7]:
from preprocessing import get_all_configurations, PREPROCESSING_CONFIGURATIONS
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from config import KAGGLE_DATA_PATH, KAGGLE_IGNORED_LABELS, KAGGLE_TARGET
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)
from tqdm import tqdm
import numpy as np
from sklearn.discriminant_analysis import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import GridSearchCV, learning_curve, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
import pickle
import warnings

warnings.simplefilter(action="ignore", category=Warning)


def _generate_config_key(config):
    return " + ".join(func.__name__ for func in config)


def _prepare_data(
    datapath,
    target_label,
    columns_to_ignore=None,
    labels_to_ignore=None,
    test_size=0.2,
    random_state=42,
):
    # Read the dataset
    df = pd.read_csv(datapath)

    # Drop specified columns
    if columns_to_ignore:
        df.drop(columns=columns_to_ignore, inplace=True)

    # Drop rows with invalid categories in the target label
    if labels_to_ignore:
        df = df[~df[target_label].isin(labels_to_ignore)]

    # Check if target variable is categorical and convert to numerical if true
    if df[target_label].dtype == "object":
        le = LabelEncoder()
        df[target_label] = le.fit_transform(df[target_label])

    # Split features and target variable
    X = df.drop(columns=target_label)
    y = df[target_label]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )

    return X_train, y_train, X_test, y_test


def _apply_techniques(config, X_train_, y_train, X_test, y_test):
    outliers_detection_technique = config[0]
    features_selection_technique = config[1]
    oversampling_technique = config[2]

    # Apply outlier removal
    X_train_, y_train = outliers_detection_technique(X_train_, y_train)

    # Apply features selection
    X_train_, y_train, X_test, y_test = features_selection_technique(
        X_train_, y_train, X_test, y_test
    )

    # Apply oversampling
    X_train_, y_train = oversampling_technique(X_train_, y_train)

    return X_train_, y_train, X_test, y_test


def get_data_for_config(config):
    X_train, y_train, X_test, y_test = _prepare_data(
        KAGGLE_DATA_PATH,
        KAGGLE_TARGET,
        labels_to_ignore=KAGGLE_IGNORED_LABELS,
        test_size=0.8,
    )
    X_train, y_train, X_test, y_test = _apply_techniques(
        config, X_train, y_train, X_test, y_test
    )
    # print(f"Size of X_train: {X_train.shape}")
    # print(f"Size of y_train: {y_train.shape}")
    # print(f"Size of X_test: {X_test.shape}")
    # print(f"Size of y_test: {y_test.shape}")
    # # Printing proportions
    # train_counts = y_train.value_counts(normalize=True)
    # test_counts = y_test.value_counts(normalize=True)

    # print(
    #     f"Proportion in y_train (positive:negative): {train_counts.get(1, 0):.2f} : {train_counts.get(0, 0):.2f}"
    # )
    # print(
    #     f"Proportion in y_test (positive:negative): {test_counts.get(1, 0):.2f} : {test_counts.get(0, 0):.2f}"
    # )
    return X_train, y_train, X_test, y_test

In [8]:
# Define the metrics
def compute_metrics(y_true, y_pred):
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "confusion_matrix": confusion_matrix(y_true, y_pred),
    }


# Training function
def train_classifiers(X_train, y_train, X_test, y_test, config_key):
    results = {}
    for classifier_name, classifier_info in classifiers.items():
        try:
            clf = classifier_info["model"]
            clf.fit(X_train, y_train)
            predictions = clf.predict(X_test)
            metrics = compute_metrics(y_test, predictions)

            results[classifier_name] = {
                "metrics": metrics,
                "data": {
                    "X_train": X_train,
                    "y_train": y_train,
                    "X_test": X_test,
                    "y_test": y_test,
                    "y_pred": predictions,
                },
            }
        except Exception as e:
            print(
                f"{classifier_name} failed to train with configuration {config_key} because: {e}"
            )
    return results


classifiers = {
    "RandomForest": {
        "model": RandomForestClassifier(),
    },
    "K-nearest-neighbor": {
        "model": KNeighborsClassifier(),
    },
    "Artificial Neural Network": {
        "model": MLPClassifier(),
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
    },
    "Logistic Regression": {
        "model": LogisticRegression(),
    },
    "Support Vector Machine": {
        "model": SVC(),  # https: //www.kaggle.com/code/sunayanagawde/ml-algorithms-usage-and-prediction?scriptVersionId=120249289&cellId=62
    },
    "Naive Bayes": {
        "model": GaussianNB(),
    },
    "XG-boost": {
        "model": XGBClassifier(),
    },
}

# Main dictionary to store results
all_results = {}

for config in tqdm(PREPROCESSING_CONFIGURATIONS):
    try:
        config_key = _generate_config_key(config)
        X_train, y_train, X_test, y_test = get_data_for_config(config)
        all_results[config_key] = train_classifiers(
            X_train, y_train, X_test, y_test, config_key
        )
    except Exception as e:
        print(f"{config_key} is invalid for this dataset because: {e}")

# Serialize results
with open(f"all_results.pkl", "wb") as f:
    pickle.dump(all_results, f)

  0%|          | 0/180 [00:00<?, ?it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


  1%|          | 1/180 [00:01<03:04,  1.03s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


  1%|          | 2/180 [00:02<03:25,  1.15s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


  2%|▏         | 3/180 [00:03<03:27,  1.17s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


  2%|▏         | 4/180 [00:04<03:28,  1.19s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


  3%|▎         | 5/180 [00:05<03:29,  1.20s/it]

remove_outliers_none + features_selection_none + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


  7%|▋         | 12/180 [00:12<02:27,  1.14it/s]

remove_outliers_none + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 10%|█         | 18/180 [00:19<02:39,  1.01it/s]

remove_outliers_none + features_selection_mutual_information + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 11%|█         | 19/180 [00:21<03:32,  1.32s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 11%|█         | 20/180 [00:23<04:01,  1.51s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 12%|█▏        | 21/180 [00:25<04:25,  1.67s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 12%|█▏        | 22/180 [00:27<04:39,  1.77s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 13%|█▎        | 24/180 [00:30<04:06,  1.58s/it]

remove_outliers_none + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 14%|█▍        | 25/180 [00:31<03:41,  1.43s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 14%|█▍        | 26/180 [00:32<03:30,  1.36s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 15%|█▌        | 27/180 [00:33<03:21,  1.32s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 16%|█▌        | 28/180 [00:35<03:17,  1.30s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_none + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 16%|█▌        | 29/180 [00:36<03:14,  1.29s/it]

remove_outliers_none + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 17%|█▋        | 31/180 [00:37<02:23,  1.04it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 18%|█▊        | 32/180 [00:38<02:35,  1.05s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 18%|█▊        | 33/180 [00:40<02:45,  1.13s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 19%|█▉        | 34/180 [00:41<02:51,  1.18s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 20%|██        | 36/180 [00:43<02:11,  1.10it/s]

remove_outliers_isolation_forest + features_selection_none + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 23%|██▎       | 42/180 [00:49<02:15,  1.02it/s]

remove_outliers_isolation_forest + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 27%|██▋       | 48/180 [00:56<02:12,  1.00s/it]

remove_outliers_isolation_forest + features_selection_mutual_information + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 27%|██▋       | 49/180 [00:59<02:55,  1.34s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 28%|██▊       | 50/180 [01:01<03:33,  1.65s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 28%|██▊       | 51/180 [01:03<03:53,  1.81s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 29%|██▉       | 52/180 [01:05<04:03,  1.90s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 30%|███       | 54/180 [01:08<03:30,  1.67s/it]

remove_outliers_isolation_forest + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 31%|███       | 55/180 [01:09<03:08,  1.51s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 31%|███       | 56/180 [01:11<02:59,  1.45s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 32%|███▏      | 57/180 [01:12<02:54,  1.42s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 32%|███▏      | 58/180 [01:13<02:50,  1.39s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 33%|███▎      | 60/180 [01:15<02:00,  1.01s/it]

remove_outliers_isolation_forest + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 34%|███▍      | 61/180 [01:16<01:54,  1.04it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 34%|███▍      | 62/180 [01:17<01:54,  1.03it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 35%|███▌      | 63/180 [01:18<01:54,  1.02it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 36%|███▌      | 64/180 [01:19<01:52,  1.03it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 36%|███▌      | 65/180 [01:20<01:51,  1.03it/s]

remove_outliers_zscore + features_selection_none + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 40%|████      | 72/180 [01:26<01:26,  1.24it/s]

remove_outliers_zscore + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 43%|████▎     | 78/180 [01:31<01:22,  1.24it/s]

remove_outliers_zscore + features_selection_mutual_information + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 44%|████▍     | 79/180 [01:33<01:42,  1.02s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 44%|████▍     | 80/180 [01:34<02:00,  1.21s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 45%|████▌     | 81/180 [01:36<02:13,  1.35s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 46%|████▌     | 82/180 [01:38<02:29,  1.52s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 47%|████▋     | 84/180 [01:41<02:08,  1.34s/it]

remove_outliers_zscore + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 47%|████▋     | 85/180 [01:41<01:54,  1.21s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 48%|████▊     | 86/180 [01:42<01:48,  1.15s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 48%|████▊     | 87/180 [01:44<01:44,  1.12s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 49%|████▉     | 88/180 [01:44<01:38,  1.07s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_zscore + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 49%|████▉     | 89/180 [01:46<01:36,  1.06s/it]

remove_outliers_zscore + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 51%|█████     | 91/180 [01:46<00:59,  1.49it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 51%|█████     | 92/180 [01:47<01:02,  1.42it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 52%|█████▏    | 93/180 [01:47<01:00,  1.43it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 52%|█████▏    | 94/180 [01:48<01:01,  1.39it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 53%|█████▎    | 95/180 [01:49<01:02,  1.36it/s]

remove_outliers_iqr + features_selection_none + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 57%|█████▋    | 102/180 [01:54<00:49,  1.58it/s]

remove_outliers_iqr + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 60%|██████    | 108/180 [01:58<00:43,  1.67it/s]

remove_outliers_iqr + features_selection_mutual_information + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 61%|██████    | 109/180 [01:59<00:50,  1.40it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 61%|██████    | 110/180 [02:00<01:00,  1.15it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 62%|██████▏   | 111/180 [02:01<01:07,  1.02it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 62%|██████▏   | 112/180 [02:02<01:09,  1.02s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 63%|██████▎   | 114/180 [02:04<01:01,  1.08it/s]

remove_outliers_iqr + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 64%|██████▍   | 115/180 [02:04<00:52,  1.25it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 64%|██████▍   | 116/180 [02:05<00:50,  1.28it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 65%|██████▌   | 117/180 [02:06<00:46,  1.36it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 66%|██████▌   | 118/180 [02:07<00:45,  1.35it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_iqr + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 69%|██████▉   | 125/180 [02:08<00:10,  5.02it/s]

remove_outliers_iqr + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
RandomForest failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RandomForestClassifier.
K-nearest-neighbor failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by KNeighborsClassifier.
Artificial Neural Network failed to train with configuration remove_outliers_dbscan + features_selection_none + oversampling_none because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by MLPClassifier.
Decision Tree failed to train with configuration remove_outliers_dbscan + fe

 71%|███████   | 128/180 [02:08<00:07,  7.13it/s]

remove_outliers_dbscan + features_selection_none + oversampling_nc_smote is invalid for this dataset because: Found array with 0 sample(s) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_extratrees + oversampling_none is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_extratrees + oversampling_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_extratrees + oversampling_svm_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_extratrees + oversampling_adasyn is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum o

 76%|███████▌  | 136/180 [02:08<00:03, 14.45it/s]

remove_outliers_dbscan + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by ExtraTreesClassifier.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_none is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_svm_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required.
remove_outliers_dbscan + features_selection_mutual_information + oversampling_adasyn is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a min

 80%|████████  | 144/180 [02:08<00:01, 21.44it/s]

remove_outliers_dbscan + features_selection_rfe + oversampling_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_svm_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_adasyn is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_smote_borderline is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbscan + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by RFE.
remove_outliers_dbsca

 82%|████████▏ | 148/180 [02:08<00:01, 24.43it/s]

remove_outliers_dbscan + features_selection_variance_threshold + oversampling_adasyn is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_smote_borderline is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
remove_outliers_dbscan + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: Found array with 0 sample(s) (shape=(0, 34)) while a minimum of 1 is required by VarianceThreshold.
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_smote because: 'Flags' object h

 84%|████████▍ | 152/180 [02:11<00:06,  4.57it/s]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_none + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 86%|████████▌ | 155/180 [02:14<00:11,  2.16it/s]

remove_outliers_lof + features_selection_none + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 90%|█████████ | 162/180 [02:21<00:13,  1.35it/s]

remove_outliers_lof + features_selection_extratrees + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.


 93%|█████████▎| 168/180 [02:28<00:11,  1.07it/s]

remove_outliers_lof + features_selection_mutual_information + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 94%|█████████▍| 169/180 [02:30<00:15,  1.38s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 94%|█████████▍| 170/180 [02:32<00:16,  1.60s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 95%|█████████▌| 171/180 [02:34<00:15,  1.72s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 96%|█████████▌| 172/180 [02:36<00:14,  1.81s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_rfe + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


 97%|█████████▋| 174/180 [02:39<00:09,  1.59s/it]

remove_outliers_lof + features_selection_rfe + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_none because: 'Flags' object has no attribute 'c_contiguous'


 97%|█████████▋| 175/180 [02:40<00:07,  1.44s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_smote because: 'Flags' object has no attribute 'c_contiguous'


 98%|█████████▊| 176/180 [02:42<00:05,  1.40s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_svm_smote because: 'Flags' object has no attribute 'c_contiguous'


 98%|█████████▊| 177/180 [02:43<00:04,  1.36s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_adasyn because: 'Flags' object has no attribute 'c_contiguous'


 99%|█████████▉| 178/180 [02:44<00:02,  1.32s/it]

K-nearest-neighbor failed to train with configuration remove_outliers_lof + features_selection_variance_threshold + oversampling_smote_borderline because: 'Flags' object has no attribute 'c_contiguous'


100%|██████████| 180/180 [02:45<00:00,  1.09it/s]


remove_outliers_lof + features_selection_variance_threshold + oversampling_nc_smote is invalid for this dataset because: SMOTE-NC is not designed to work only with numerical features. It requires some categorical features.
