In [1]:
!pip install imbalanced-learn
from imblearn.datasets import fetch_datasets
X, Y = [], []
# dataset_names = ['ecoli']
dataset_names = ['ecoli', 'optical_digits',
                 'satimage', 'pen_digits',
                 'abalone', 'sick_euthyroid', 'spectrometer',
                 'car_eval_34', 'isolet', 'us_crime', 'yeast_ml8',
                 'scene']
#dataset_names = ['ecoli']
for ds_name in dataset_names:
    var = fetch_datasets()[ds_name]
    X.append(var['data'])
    Y.append(var['target'])



In [2]:
from sklearn.model_selection import RepeatedStratifiedKFold
# from ipynb.fs.full.data_wrangling import * #Data preprocessing notebook
#from ipynb.fs.full.data_preparation import *
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import statistics
from itertools import chain
from collections import Counter
from sklearn.model_selection import GridSearchCV
import pandas as pd

SCORINGS = {
    'f1': make_scorer(f1_score, average = None),
    'precision': make_scorer(precision_score, average = None),
    'recall': make_scorer(recall_score, average = None),
    'roc_auc': make_scorer(roc_auc_score, average = None)
}

"""
Config individual experiment
"""

MODELS_TO_OPTIMIZE = ["K-Nearest Neighbors", "Logistic Regression", "Support Vector Machines",
                     "Naive Bayes", "Decision Tree", "AdaBoost", "Random Forest",  "Bagging (DT)",
                     "Extra Trees"]

POSSIBLE_ESTIMATORS_FOR_ADABOOST = ["Decision Tree", "Random Forest", "Support Vector Machines"]

cv_splits = 10
repetitions = 1
RANDOM_STATE = 42
cross_validation_setting = RepeatedStratifiedKFold(n_splits=cv_splits,
                                                   n_repeats=repetitions,
                                                   random_state= RANDOM_STATE)
        
def cross_validate(model, X_train, X_test, y_train, y_test, metric):
    for index in range(len(X_train)):
        model.fit(X_train, y_train)
        Y_pred = model.predict(X_test)


def model_evaluation(model, features, target, 
                     cv = cross_validation_setting):
    scores = dict()
    formatted_scores = dict()
    formatted_scores['model'] = model
    for scoring_name, scoring_function in SCORINGS.items():
        scores[score_metric] = cross_validate(model, X, Y, 
                                              scoring = scoring_function,
                                              cv = cross_validation_setting)
        return scores[score_metric]

def get_scores(Y_pred, Y_true):
    f1 = f1_score(Y_true, Y_pred, average=None)
    precision = precision_score(Y_true, Y_pred,
                                average = None)
    recall = recall_score(Y_true, Y_pred, 
                          average = None)
    roc = roc_auc_score(Y_true, Y_pred, 
                        average = None)
    acc = accuracy_score(Y_true, Y_pred)
    
    return f1, precision, recall, roc, acc

def convert_df(X, Y):
    return X.to_numpy(), Y.to_numpy()

def fault_cases(predictions, answers, indexes):
    failed_cases = []
    for i in range(len(predictions)):
        if (predictions[i] != answers[i]):
            failed_cases.append(indexes[i])
    return failed_cases

def get_best_estimator(model, param_grid, X, Y):
    model_gridsearch = GridSearchCV(model, param_grid#,
                                    #refit = True
                                   )
    return model_gridsearch.fit(X, Y).best_estimator_
    
def evaluate_model(dataset_name, models,
                   hyperparameters_grid, X, Y, sk_fold,
                   metric = "F1 Average B", sampling = ''):
    
    best_metric_score = 0
    best_failed_cases = []
    estimators = dict()
    folds = sk_fold.split(X, Y)
    model_names = list(models.keys())
    aux_table = pd.DataFrame()
    for model_name in model_names:
        
        if model_name in MODELS_TO_OPTIMIZE:
            estimator = get_best_estimator(models[model_name],
                                           hyperparameters_grid[model_name],
                                           X, Y)
        else:
            estimator = model[model_name]

        f1_list, precision_list, recall_list, auc_list, acc_list = [], [], [], [], []
        aux_failed_cases_index, failed_cases_index = [], []
        for train_index, test_index in sk_fold.split(X, Y):

            model_estimator = estimator
            
            X_train, X_test = X[train_index], X[test_index]
            Y_train, Y_test = Y[train_index], Y[test_index]
            
            model_estimator.fit(X_train, Y_train)
            Y_pred = model_estimator.predict(X_test)
            
            f1, precision, recall, roc, acc = get_scores(Y_pred, Y_test)

            failed_cases_index.append(fault_cases(Y_pred, Y_test, test_index))
            f1_list.append(f1)
            precision_list.append(precision)
            recall_list.append(recall)
            auc_list.append(roc)
            acc_list.append(acc)
            
        estimators[model_name] = estimator
            
        aux_table, aux_failed_cases_index =  format_return(dataset_name, model_name,
                                                           f1_list, precision_list,
                                                           recall_list, auc_list,
                                                           acc_list, metric,
                                                           sampling), failed_cases_index[0]
        if aux_table.iloc[0][metric] > best_metric_score:
            best_model_table = aux_table
            best_metric_score = aux_table.iloc[0][metric]
            best_failed_cases = aux_failed_cases_index
            best_estimator = estimators[model_name]
            best_estimator_name = model_name
            
    return estimators, best_model_table, best_failed_cases, best_estimator, best_estimator_name

WEAK_LEARNER_COLUMNS = ["Dataset Name", "Base Model Name", "F1 Average B", "F1 Class 0 B", "F1 Class 1 B",
           "Recall Average B", "Recall Class 0 B", "Recall Class 1 B",
           "Precision Average B", "Precision Class 0 B", "Precision Class 1 B",
           "AUC B", "Accuracy B"]

ENSEMBLE_COLUMNS = ["Dataset Name", "Ensemble Model Name", "F1 Average E", "F1 Class 0 E", "F1 Class 1 E",
                   "Recall Average E", "Recall Class 0 E", "Recall Class 1 E",
                   "Precision Average E", "Precision Class 0 E", "Precision Class 1 E",
                   "AUC E", "Accuracy E"]

RESULT = ["Dataset Name", "Base Model Name", "F1 Average B", "F1 Class 0 B", "F1 Class 1 B",
           "Recall Average B", "Recall Class 0 B", "Recall Class 1 B",
           "Precision Average B", "Precision Class 0 B", "Precision Class 1 B",
           "AUC B", "Accuracy B", "Ensemble Model Name", "F1 Average E", "F1 Class 0 E", "F1 Class 1 E",
           "Recall Average E", "Recall Class 0 E", "Recall Class 1 E",
           "Precision Average E", "Precision Class 0 E", "Precision Class 1 E",
           "AUC E", "Accuracy E"]

def format_return(dataset_name, model_name, f1_list, precision_list,
                  recall_list, auc_list, acc_list, metric, sampling = ''):
    if (metric == WEAK_LEARNER_METRIC):
        COLUMNS = WEAK_LEARNER_COLUMNS
    else:
        COLUMNS = ENSEMBLE_COLUMNS
    dataframe = pd.DataFrame(columns = COLUMNS)
    dataframe_line = []
    dataframe_line.append(dataset_name + sampling)
    dataframe_line.append(model_name)
    aux = []
    lists_of_score_list = []
    lists_of_score_list.append(f1_list)
    lists_of_score_list.append(precision_list)
    lists_of_score_list.append(recall_list)
    for score_list in lists_of_score_list:
        dataframe_line.extend((statistics.mean(flatten_list(score_list)),
                               statistics.mean([score[0] for score in score_list]),
                               statistics.mean([score[1] for score in score_list])))

    dataframe_line.append(statistics.mean(auc_list))
    dataframe_line.append(statistics.mean(acc_list))
    return dataframe.append(pd.Series(dataframe_line, index = COLUMNS),
                            ignore_index = True)

def flatten_list(lista):
    return [value for sublist in lista for value in sublist]
    
    

    
        
        
        


    
    
    

In [3]:
"""
Config for every experiment
"""
RANDOM_STATE = 0
VOTING_METHOD = 'hard'
#results = pd.DataFrame(columns = COLUMNS)

param_grids = dict()
weak_learners_base_models = dict()


In [4]:

"""
Weak Learners
"""

"""
KNN
"""
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

k_range = list(range(1,31))
weight_options = ["uniform", "distance"]

knn_grid = dict(n_neighbors = k_range, 
                weights = weight_options)

param_grids["K-Nearest Neighbors"] = knn_grid
knn = KNeighborsClassifier()
weak_learners_base_models["K-Nearest Neighbors"] = knn

"""
LR
"""
from sklearn.linear_model import LogisticRegression

log_reg_grid={'C':[0.001,0.01,.09,1,5,10],
              "penalty":["l1","l2"]} #l1 lasso l2 ridge
lr = LogisticRegression(random_state=RANDOM_STATE)

param_grids["Logistic Regression"] = log_reg_grid
weak_learners_base_models["Logistic Regression"] = lr

"""
Support Vector Machines
"""
from sklearn.svm import SVC
from sklearn.svm import LinearSVC

svm_grid = {'C': [0.1, 1, 10, 100, 1000], 
            'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
            'kernel': ['rbf']}

param_grids["Support Vector Machines"] = svm_grid
weak_learners_base_models["Support Vector Machines"] = SVC()
"""
Naive Bayes
"""
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb_params = {'var_smoothing': np.logspace(0,-9, num=100)}

weak_learners_base_models["Naive Bayes"] = nb
param_grids["Naive Bayes"] = nb_params

"""
DECISION TREES
"""
from sklearn.tree import DecisionTreeClassifier
tree_parameters = {'criterion':['gini','entropy'],
             'max_depth':[4,5,10]}

param_grids['Decision Tree'] = tree_parameters

dt = DecisionTreeClassifier(random_state=RANDOM_STATE)
weak_learners_base_models['Decision Tree'] = dt




In [5]:
def generate_ensembles(best_base_estimators, best_estimator, best_estimator_name):
    """
    ENSEMBLES
    """
    #AdaBoost
    from sklearn.ensemble import AdaBoostClassifier

    ensembles = dict()
    ensembles_param_grids = dict()

    ab_param_grid = {
        'n_estimators' : [400, 600],
        'learning_rate' : [1e-3, 1e-2, 1e-1, 1],
        'algorithm' : ['SAMME']
    }
    if best_estimator_name in POSSIBLE_ESTIMATORS_FOR_ADABOOST:
        ab_model = AdaBoostClassifier(base_estimator = best_estimator,
                                      random_state = RANDOM_STATE)
    else:
        ab_model = AdaBoostClassifier(base_estimator = best_base_estimators['Support Vector Machines'],
                                      random_state = RANDOM_STATE)

    ensembles_param_grids['AdaBoost'] = ab_param_grid
    ensembles['AdaBoost'] = ab_model


    #RandomForest
    from sklearn.ensemble import RandomForestClassifier

    rf_grid = {
        'criterion' : ['gini', 'entropy'],
#         'max_depth': [4, 5, 6],
        'min_samples_leaf': [3, 5, 10],
#         'min_samples_split': [8, 10, 12],
        'n_estimators': [200, 400, 600],
        'max_features' : ['auto', 'log2']
    }

    rf = RandomForestClassifier(random_state = RANDOM_STATE)

    ensembles_param_grids['Random Forest'] = rf_grid
    ensembles['Random Forest'] = rf

    #Bagging
    from sklearn.ensemble import BaggingClassifier

    bagging_param_grid = {
        #'base_estimator__max_depth' : [ 4, 5],
        'max_samples' : [0.05, 0.1, 0.2, 0.5]
    }

    bg_clf = BaggingClassifier(base_estimator=best_estimator,
                               random_state = RANDOM_STATE)

    ensembles_param_grids['Bagging (DT)'] = bagging_param_grid
    ensembles['Bagging (DT)'] = bg_clf

    #Extra Trees
    from sklearn.ensemble import ExtraTreesClassifier

    et_grid = {
        'max_depth': [4, 5, 6],
        'min_samples_leaf': [3, 4, 5],
        'min_samples_split': [8, 10, 12],
        'n_estimators': [200, 400],
        'oob_score': [True, False]
    }

    et_clf = ExtraTreesClassifier(random_state = RANDOM_STATE)

    ensembles_param_grids['Extra Trees'] = et_grid

    ensembles['Extra Trees'] = et_clf

    #Stacking 

    """Stacking Ensemble"""
    from sklearn.ensemble import StackingClassifier

    stacking_estimators = [
        ('lr', best_base_estimators['Logistic Regression']),
        ('knn', best_base_estimators['K-Nearest Neighbors']),
        ('svm', best_base_estimators['Support Vector Machines']),
        ('gnb', best_base_estimators['Naive Bayes']),
        ('dt',  best_base_estimators['Decision Tree'])
    ]

    final_estimator = best_estimator

    stacking_model = StackingClassifier(estimators = stacking_estimators,
                                        final_estimator = final_estimator)

    """Voting Ensemble"""
    from sklearn.ensemble import VotingClassifier

    voting_estimators = [
        ('lr', best_base_estimators['Logistic Regression']),
        ('knn', best_base_estimators['K-Nearest Neighbors']),
        ('svm', best_base_estimators['Support Vector Machines']),
        ('gnb', best_base_estimators['Naive Bayes']),
        ('dt', best_base_estimators['Decision Tree'])
    ]

    VOTING_METHOD = 'hard'

    voting_classifier = VotingClassifier(voting_estimators,
                                         voting=VOTING_METHOD)
    
    return ensembles, ensembles_param_grids

In [6]:
dataframe = pd.DataFrame(columns = WEAK_LEARNER_COLUMNS)
dataframe2 = pd.DataFrame(columns = ENSEMBLE_COLUMNS)
result = pd.DataFrame(columns = RESULT)
cases = []
WEAK_LEARNER_METRIC = "F1 Average B"
ENSEMBLE_LEARNER_METRIC = "F1 Average E"
best_base_estimators = dict()
best_base_clfs = []
for index in range(len(dataset_names)):
    data = X[index]
    target = Y[index]
    
    best_estimators, model_scores, _, current_best_clf, best_estimator_name = evaluate_model(
                   dataset_names[index], weak_learners_base_models, param_grids, data, target,
                   cross_validation_setting, metric = WEAK_LEARNER_METRIC, sampling = '')
    
    #cases.append(current_case)
    print(current_best_clf)
    
    ensembles, ensembles_param_grids = generate_ensembles(best_estimators,
                                                          current_best_clf,
                                                          best_estimator_name)
    _, ensemble_scores, ensemble_error_cases, _, _ = evaluate_model(dataset_names[index], 
                   ensembles, ensembles_param_grids, data, target,
                   cross_validation_setting, metric = ENSEMBLE_LEARNER_METRIC)
    print(ensemble_scores)
    dataframe = dataframe.append(model_scores)
    dataframe2 = dataframe2.append(ensemble_scores)
    
# aux_df = dataframe
# aux_df = aux_df.merge(dataframe2, on = 'Dataset Name', how = 'outer')
# result = aux_df
# print(result)
    

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File

        nan 0.8928446         nan 0.90763828        nan 0.90759438]
  _warn_prf(average, modifier, msg_start, len(result))


DecisionTreeClassifier(criterion='entropy', max_depth=10, random_state=0)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent c

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.8928446         nan 0.8928446         nan 0.8928446
        nan 0.89582968        nan 0.89582968        nan 0.89582968
        nan 0.89582968        nan 0.89582968        nan 0.89881475
        nan 0.89881475        nan 0.89582968        nan 0.89881

  Dataset Name Ensemble Model Name  F1 Average E  F1 Class 0 E  F1 Class 1 E  \
0        ecoli            AdaBoost      0.791944      0.958332      0.625556   

   Recall Average E  Recall Class 0 E  Recall Class 1 E  Precision Average E  \
0          0.820578          0.961156              0.68             0.803387   

   Precision Class 0 E  Precision Class 1 E     AUC E  Accuracy E  
0             0.956774                 0.65  0.803387    0.925579  


  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
dataframe

Unnamed: 0,Dataset Name,Base Model Name,F1 Average B,F1 Class 0 B,F1 Class 1 B,Recall Average B,Recall Class 0 B,Recall Class 1 B,Precision Average B,Precision Class 0 B,Precision Class 1 B,AUC B,Accuracy B
0,ecoli,Decision Tree,0.800676,0.961828,0.639524,0.862507,0.958347,0.766667,0.795887,0.966774,0.625,0.795887,0.931551


In [11]:
#UNDERSAMPLING
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='majority')

for index in range(len(dataset_names)):
    data = X[index]
    target = Y[index]
    X_under, y_under = undersample.fit_resample(data, target)
    best_estimators, model_scores, _, current_best_clf, best_estimator_name = evaluate_model(
                   dataset_names[index], weak_learners_base_models, param_grids, X_under, y_under,
                   cross_validation_setting, metric = WEAK_LEARNER_METRIC, sampling = ' Undersampled')
    
    print(current_best_clf)
    
    ensembles, ensembles_param_grids = generate_ensembles(best_estimators,
                                                          current_best_clf,
                                                          best_estimator_name)
    _, ensemble_scores, ensemble_error_cases, _, _ = evaluate_model(dataset_names[index], 
                   ensembles, ensembles_param_grids, X_under, y_under,
                   cross_validation_setting, metric = ENSEMBLE_LEARNER_METRIC)
    print(ensemble_scores)
    dataframe = dataframe.append(model_scores)
    dataframe2 = dataframe2.append(ensemble_scores)
    
# aux_df = dataframe
# aux_df = aux_df.merge(dataframe2, on = 'Dataset Name', how = 'outer')
# result = aux_df
# print(result)

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


SVC(C=1, gamma=1)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_bagging.py", line 244, in fit
    return self._fit(X, y, self.max_samples, sample_weight=sample_weight)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9
 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9
 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9
 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9
 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9 nan 0.9]


  Dataset Name Ensemble Model Name  F1 Average E  F1 Class 0 E  F1 Class 1 E  \
0        ecoli        Bagging (DT)       0.89754      0.893095      0.901984   

   Recall Average E  Recall Class 0 E  Recall Class 1 E  Precision Average E  \
0          0.915833              0.95          0.881667             0.904167   

   Precision Class 0 E  Precision Class 1 E     AUC E  Accuracy E  
0             0.866667             0.941667  0.904167         0.9  


In [12]:
#OVERSAMPLING
from imblearn.over_sampling import RandomOverSampler
oversample = RandomOverSampler()

for index in range(len(dataset_names)):
    data = X[index]
    target = Y[index]
    X_over, y_over = oversample.fit_resample(data, target)
    best_estimators, model_scores, _, current_best_clf, best_estimator_name = evaluate_model(
                   dataset_names[index], weak_learners_base_models, param_grids, X_over, y_over,
                   cross_validation_setting, metric = WEAK_LEARNER_METRIC, sampling = ' Oversampled')
    
    #cases.append(current_case)
    print(current_best_clf)
    
    ensembles, ensembles_param_grids = generate_ensembles(best_estimators,
                                                          current_best_clf,
                                                          best_estimator_name)
    _, ensemble_scores, ensemble_error_cases, _, _ = evaluate_model(dataset_names[index], 
                   ensembles, ensembles_param_grids, X_over, y_over,
                   cross_validation_setting, metric = ENSEMBLE_LEARNER_METRIC)
    print(ensemble_scores)
    dataframe = dataframe.append(model_scores)
    dataframe2 = dataframe2.append(ensemble_scores)
    
aux_df = dataframe.reset_index()
dataframe2 = dataframe2.reset_index()
aux_df = aux_df.join(dataframe2, lsuffix='', rsuffix='_to_delete')
result = aux_df
print(result)

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 444, in _check_solver
    "got %s penalty." % (solver, penalty))
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File

        nan 0.87348485        nan 0.85688705        nan 0.85523416]


KNeighborsClassifier(n_neighbors=1)


Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 352, in fit
    raise ValueError("Out of bag estimation only available"
ValueError: Out of bag estimation only available if bootstrap=True

Traceback (most recent call last):
  File "C:\Users\Nathan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 5

        nan 0.88338843        nan 0.88338843        nan 0.88338843
        nan 0.8850551         nan 0.88338843        nan 0.88338843
        nan 0.88338843        nan 0.88338843        nan 0.88338843
        nan 0.88172176        nan 0.88172176        nan 0.88172176
        nan 0.88172176        nan 0.87838843        nan 0.88172176
        nan 0.88672176        nan 0.88672176        nan 0.88672176
        nan 0.88672176        nan 0.88672176        nan 0.88672176
        nan 0.88672176        nan 0.8850551         nan 0.8850551
        nan 0.88338843        nan 0.88672176        nan 0.8850551
        nan 0.88338843        nan 0.88172176        nan 0.88338843
        nan 0.88172176        nan 0.88338843        nan 0.88172176
        nan 0.90169421        nan 0.905             nan 0.90336088
        nan 0.90333333        nan 0.90169421        nan 0.90333333
        nan 0.89172176        nan 0.89836088        nan 0.89338843
        nan 0.89836088        nan 0.8900551         nan 0.898360

  Dataset Name Ensemble Model Name  F1 Average E  F1 Class 0 E  F1 Class 1 E  \
0        ecoli       Random Forest      0.963349      0.961632      0.965066   

   Recall Average E  Recall Class 0 E  Recall Class 1 E  Precision Average E  \
0          0.966493               1.0          0.932986             0.963387   

   Precision Class 0 E  Precision Class 1 E     AUC E  Accuracy E  
0             0.926774                  1.0  0.963387    0.963443  
   index        Dataset Name          Base Model Name  F1 Average B  \
0      0               ecoli            Decision Tree      0.800676   
1      0  ecoli Undersampled  Support Vector Machines      0.897540   
2      0   ecoli Oversampled      K-Nearest Neighbors      0.971640   

   F1 Class 0 B  F1 Class 1 B  Recall Average B  Recall Class 0 B  \
0      0.961828      0.639524          0.862507          0.958347   
1      0.893095      0.901984          0.915833          0.950000   
2      0.970330      0.972949          0.974048   

In [None]:
"""
ENSEMBLES
"""

#AdaBoost
from sklearn.ensemble import AdaBoostClassifier

ensembles = dict()
ensembles_param_grids = dict()

ab_param_grid = {
    'n_estimators' : [100, 300, 500],
    'learning_rate' : [1e-3, 1e-2, 1e-1, 1]
}
ab_model = AdaBoostClassifier(random_state = RANDOM_STATE)

ensembles_param_grids['AdaBoost'] = ab_param_grid
ensembles['AdaBoost'] = ab_model


#RandomForest
from sklearn.ensemble import RandomForestClassifier

rf_grid = {
    'max_depth': [4, 5, 6],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [200, 400]
}

rf = RandomForestClassifier(random_state = RANDOM_STATE)

ensembles_param_grids['Random Forest'] = rf_grid
ensembles['Random Forest'] = rf

#Bagging
from sklearn.ensemble import BaggingClassifier

bagging_param_grid = {
    'base_estimator__max_depth' : [1, 2, 3, 4, 5],
    'max_samples' : [0.05, 0.1, 0.2, 0.5]
}

bg_clf = BaggingClassifier(base_estimator=best_base_estimators['Decision Tree'],
                           random_state = RANDOM_STATE)

ensembles_param_grids['Bagging (DT)'] = bagging_param_grid
ensembles['Bagging (DT)'] = bg_clf

#Extra Trees
from sklearn.ensemble import ExtraTreesClassifier

et_grid = {
    'max_depth': [4, 5, 6],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [200, 400],
    'oob_score': [True, False]
}

et_clf = ExtraTreesClassifier(random_state = RANDOM_STATE)

ensembles_param_grids['Extra Trees'] = et_grid

ensembles['Extra Trees'] = et_clf

#Stacking 

"""Stacking Ensemble"""
from sklearn.ensemble import StackingClassifier

stacking_estimators = [
    ('lr', best_base_estimators['Logistic Regression']),
    ('knn', best_base_estimators['K-Nearest Neighbors']),
    ('svm', best_base_estimators['Support Vector Machines']),
    ('gnb', best_base_estimators['Naive Bayes']),
    ('dt',  best_base_estimators['Decision Tree'])
]

final_estimator = best_base_estimators['Logistic Regression']

stacking_model = StackingClassifier(estimators = stacking_estimators,
                                    final_estimator = final_estimator)

"""Voting Ensemble"""
from sklearn.ensemble import VotingClassifier

voting_estimators = [
    ('lr', best_base_estimators['Logistic Regression']),
    ('knn', best_base_estimators['K-Nearest Neighbors']),
    ('svm', best_base_estimators['Support Vector Machines']),
    ('gnb', best_base_estimators['Naive Bayes']),
    ('dt', best_base_estimators['Decision Tree'])
]

VOTING_METHOD = 'hard'

voting_classifier = VotingClassifier(voting_estimators,
                                     voting=VOTING_METHOD)

In [None]:

dataframe_ensembles = pd.DataFrame(columns = ENSEMBLE_COLUMNS)
cases_ensembles = []
for index in range(len(dataset_names)):
    data = X[index]
    target = Y[index]
    _, model_scores, current_case = evaluate_model(dataset_names[index], 
                   ensembles, ensembles_param_grids, data, target,
                   cross_validation_setting, metric = "F1 Average E")
    cases_ensembles.append(current_case)
    dataframe_ensembles = dataframe_ensembles.append(model_scores)