<a href="https://colab.research.google.com/github/stepthom/sandbox/blob/master/slides_ensemble_study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Playground for Ensemble Slides - Study on Many Datasets

- Stephen W. Thomas
- Used for MMA 869, MMAI 869, and GMMA 869

This code is going to evaluate lots of ensemble classifiers (as well as some regular ones) on a lot of datasets. BOOM.

Also, we'll let autosklearn crank for 5 minutes per dataset to see how it fairs as well.

In [1]:
import datetime
print(datetime.datetime.now())

2020-07-13 20:27:23.956403


In [2]:
import pandas as pd
pd.show_versions(as_json=False)

import sklearn
sklearn.__version__

  """)



INSTALLED VERSIONS
------------------
commit           : None
python           : 3.6.9.final.0
python-bits      : 64
OS               : Linux
OS-release       : 4.19.104+
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : en_US.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 0.25.3
numpy            : 1.18.5
pytz             : 2018.9
dateutil         : 2.8.1
pip              : 19.3.1
setuptools       : 49.1.0
Cython           : 0.29.20
pytest           : 5.4.3
hypothesis       : None
sphinx           : 1.8.5
blosc            : None
feather          : 0.4.1
xlsxwriter       : None
lxml.etree       : 4.2.6
html5lib         : 1.0.1
pymysql          : None
psycopg2         : 2.7.6.1 (dt dec pq3 ext lo64)
jinja2           : 2.11.2
IPython          : 5.5.0
pandas_datareader: 0.8.1
bs4              : 4.6.3
bottleneck       : 1.3.2
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.2.6
matplotlib  

'0.22.2.post1'

In [3]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

Reading package lists... Done
Building dependency tree       
Reading state information... Done
swig is already the newest version (3.0.12-1).
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [4]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns 

from sklearn.model_selection import train_test_split

import itertools

import scipy

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Uncle Steve's Amazing Do-All function

It slices! It dices!

In order to streamline the evaluation of each dataset, let's create an function that takes in a dataset, the name of the target column, and the name of any columsn to drop (because that's decided by the human), and then automate the rest:

- Converting datatypes of the target column if necessary
- OHE any categorical features
- Splitting data into training and testing
- Training and evaluating all the models/ensembles
- Returning a list of the performance of all the models

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier, StackingClassifier, VotingClassifier, BaggingClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier

from sklearn.experimental import enable_hist_gradient_boosting  # noqa

from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, roc_auc_score

import autosklearn.classification

import time

# Helper function
def do_all_for_dataset(dataset_name, df, target_col, drop_cols=[]):

    # If target_col is an object, convert to numbers
    if df[target_col].dtype == 'object':
      df[target_col] =  df[target_col].astype('category').cat.codes

    # OHE all categorical columns
    cat_cols = list(df.select_dtypes(include=['object']).columns) 
    if target_col in cat_cols: cat_cols.remove(targe_col)
    if len(cat_cols) > 0:
      df = pd.concat([df,pd.get_dummies(df[cat_cols])],axis=1)

    # Split into X and y
    X = df.drop(drop_cols + cat_cols + [target_col], axis=1)
    y = df[target_col]

    # Split into training and testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

    print('Y (train) counts:')
    print(y_train.value_counts())
    print('Y (test) counts:')
    print(y_test.value_counts())
    
    nb = GaussianNB()   
    lr = LogisticRegression(random_state=42, solver='lbfgs', max_iter=5000)
    dt = DecisionTreeClassifier(random_state=42)
    knn = KNeighborsClassifier(n_neighbors=7)

    rf = RandomForestClassifier(random_state=42, n_estimators=200)
    ada = AdaBoostClassifier(random_state=42, n_estimators=200)

    scorer = autosklearn.metrics.make_scorer(
        'f1_score',
        sklearn.metrics.f1_score
    )    
    automl = autosklearn.classification.AutoSklearnClassifier(
          time_left_for_this_task=100, # run auto-sklearn for at most X secs
          per_run_time_limit=15, # spend at most 60 sec for each model training
          metric=scorer
          )


    est_list = [('DT', dt), ('LR', lr), ('NB', nb), ('RF', rf), ('ADA', ada)]
       
    dict_classifiers = {
        "LR": lr, 
        "NB": nb,
        "DT": dt,
        "KNN": knn,
        "Voting": VotingClassifier(estimators = est_list, voting='soft'),
        "Bagging": BaggingClassifier(DecisionTreeClassifier(), n_estimators=200, random_state=42),
        "RF": rf,
        "ExtraTrees": ExtraTreesClassifier(random_state=42, n_estimators=200),
        "Adaboost": ada,
        "GBC": GradientBoostingClassifier(random_state=42, n_estimators=200),
        "Stacking": StackingClassifier(estimators=est_list, final_estimator=LogisticRegression()),
        "automl": automl,
    }
    
    model_results = list()
    
    for model_name, model in dict_classifiers.items():
        start = time.time()
        y_pred = model.fit(X_train, y_train).predict(X_test)
        end = time.time()
        total = end - start
        
        accuracy       = accuracy_score(y_test, y_pred)
        f1             = f1_score(y_test, y_pred)
        recall         = recall_score(y_test, y_pred)
        precision      = precision_score(y_test, y_pred)
        roc_auc        = roc_auc_score(y_test, y_pred)
    
        df = pd.DataFrame({"Dataset"   : [dataset_name],
                           "Method"    : [model_name],
                           "Time"      : [total],
                           "Accuracy"  : [accuracy],
                           "Recall"    : [recall],
                           "Precision" : [precision],
                           "F1"        : [f1],
                           "AUC"       : [roc_auc],
                          })
        model_results.append(df)
   

    dataset_results = pd.concat([m for m in model_results], axis = 0).reset_index()

    dataset_results = dataset_results.drop(columns = "index",axis =1)
    dataset_results = dataset_results.sort_values(by=['F1'], ascending=False)
    dataset_results['Rank'] = range(1, len(dataset_results)+1)
    
    return dataset_results

In [6]:
# We're going to save the results of each dataset into a big list, to analyze later.
results = list()

# Diabetes

In [7]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/diabetes_orig.csv')
r = do_all_for_dataset( 'Diabetes', df, target_col='diabetes', drop_cols=['Id'])
results.append(r)
r

Y (train) counts:
0    400
1    214
Name: diabetes, dtype: int64
Y (test) counts:
0    100
1     54
Name: diabetes, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
8,Diabetes,Adaboost,0.409679,0.772727,0.648148,0.686275,0.666667,0.744074,1
9,Diabetes,GBC,0.321875,0.753247,0.62963,0.653846,0.641509,0.724815,2
4,Diabetes,Voting,1.009843,0.746753,0.592593,0.653061,0.621359,0.711296,3
6,Diabetes,RF,0.407973,0.746753,0.592593,0.653061,0.621359,0.711296,4
5,Diabetes,Bagging,0.682466,0.746753,0.574074,0.659574,0.613861,0.707037,5
1,Diabetes,NB,0.002595,0.707792,0.648148,0.57377,0.608696,0.694074,6
11,Diabetes,automl,94.601545,0.74026,0.555556,0.652174,0.6,0.697778,7
10,Diabetes,Stacking,4.753475,0.733766,0.555556,0.638298,0.594059,0.692778,8
7,Diabetes,ExtraTrees,0.307972,0.727273,0.555556,0.625,0.588235,0.687778,9
2,Diabetes,DT,0.005634,0.727273,0.5,0.642857,0.5625,0.675,10


# German Credit

In [8]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/GermanCredit.csv')
r = do_all_for_dataset('GermanCredit', df, target_col='Class', drop_cols=[])
results.append(r)
r

Y (train) counts:
1    560
0    240
Name: Class, dtype: int64
Y (test) counts:
1    140
0     60
Name: Class, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
9,GermanCredit,GBC,0.452885,0.76,0.857143,0.810811,0.833333,0.695238,1
11,GermanCredit,automl,95.494524,0.745,0.878571,0.783439,0.828283,0.655952,2
7,GermanCredit,ExtraTrees,0.381405,0.745,0.857143,0.794702,0.824742,0.670238,3
6,GermanCredit,RF,0.424692,0.735,0.885714,0.770186,0.82392,0.634524,4
10,GermanCredit,Stacking,6.789234,0.725,0.828571,0.789116,0.808362,0.655952,5
5,GermanCredit,Bagging,1.137309,0.72,0.828571,0.783784,0.805556,0.647619,6
3,GermanCredit,KNN,0.028221,0.69,0.85,0.74375,0.793333,0.583333,7
4,GermanCredit,Voting,1.199796,0.71,0.785714,0.797101,0.791367,0.659524,8
0,GermanCredit,LR,0.137589,0.7,0.792857,0.78169,0.787234,0.638095,9
8,GermanCredit,Adaboost,0.493485,0.695,0.778571,0.784173,0.781362,0.639286,10


# Los Angeles Heart Disease

In [9]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/laheart.csv')
r = do_all_for_dataset('LA Heart', df, target_col='DEATH', drop_cols=['ID', 'DEATH_YR'])
results.append(r)
r

Y (train) counts:
0    109
1     51
Name: DEATH, dtype: int64
Y (test) counts:
0    27
1    13
Name: DEATH, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
8,LA Heart,Adaboost,0.321829,0.8,0.692308,0.692308,0.692308,0.77208,1
11,LA Heart,automl,95.261597,0.75,0.384615,0.714286,0.5,0.655271,2
1,LA Heart,NB,0.003416,0.725,0.384615,0.625,0.47619,0.636752,3
5,LA Heart,Bagging,0.384849,0.7,0.384615,0.555556,0.454545,0.618234,4
9,LA Heart,GBC,0.177967,0.675,0.384615,0.5,0.434783,0.599715,5
6,LA Heart,RF,0.31692,0.725,0.307692,0.666667,0.421053,0.616809,6
4,LA Heart,Voting,0.904772,0.7,0.307692,0.571429,0.4,0.598291,7
2,LA Heart,DT,0.004762,0.6,0.384615,0.384615,0.384615,0.54416,8
7,LA Heart,ExtraTrees,0.216772,0.725,0.230769,0.75,0.352941,0.596866,9
0,LA Heart,LR,0.253523,0.675,0.230769,0.5,0.315789,0.559829,10


# HR Churn

In [10]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/HR_comma_sep.csv')
r = do_all_for_dataset('HR Churn', df, target_col='left', drop_cols=[])
results.append(r)
r

Y (train) counts:
0    9142
1    2857
Name: left, dtype: int64
Y (test) counts:
0    2286
1     714
Name: left, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
10,HR Churn,Stacking,26.474826,0.990333,0.966387,0.992806,0.979418,0.9821,1
6,HR Churn,RF,1.948336,0.99,0.963585,0.99422,0.978663,0.980918,2
7,HR Churn,ExtraTrees,1.726488,0.988,0.963585,0.985673,0.974504,0.979605,3
5,HR Churn,Bagging,6.435993,0.987667,0.967787,0.980142,0.973925,0.980831,4
11,HR Churn,automl,94.908843,0.986,0.95098,0.989796,0.97,0.973959,5
4,HR Churn,Voting,5.131726,0.978333,0.948179,0.960284,0.954193,0.967965,6
9,HR Churn,GBC,2.744781,0.976667,0.938375,0.962644,0.950355,0.963501,7
2,HR Churn,DT,0.066668,0.973333,0.964986,0.926075,0.94513,0.970463,8
8,HR Churn,Adaboost,2.103505,0.960333,0.917367,0.916084,0.916725,0.94556,9
3,HR Churn,KNN,0.170436,0.946333,0.932773,0.854942,0.892163,0.941671,10


# US Census Data

In [11]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/adult.csv')
r = do_all_for_dataset('Adult', df, target_col='high_salary', drop_cols=['education'])
results.append(r)
r

Y (train) counts:
0    19775
1     6273
Name: high_salary, dtype: int64
Y (test) counts:
0    4945
1    1568
Name: high_salary, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
8,Adult,Adaboost,9.22371,0.871488,0.652423,0.777947,0.709677,0.796687,1
9,Adult,GBC,13.845171,0.872563,0.642857,0.788732,0.708363,0.794128,2
6,Adult,RF,9.886277,0.856902,0.63648,0.733824,0.681694,0.781637,3
5,Adult,Bagging,50.486441,0.854445,0.637755,0.724638,0.678426,0.780455,4
10,Adult,Stacking,102.303363,0.857362,0.60523,0.753773,0.671383,0.77127,5
11,Adult,automl,95.460762,0.818517,0.720663,0.602988,0.656595,0.785104,6
7,Adult,ExtraTrees,12.199482,0.836634,0.616709,0.676224,0.645097,0.76154,7
2,Adult,DT,0.380956,0.814525,0.635204,0.610294,0.6225,0.753295,8
4,Adult,Voting,21.091315,0.83817,0.417092,0.823678,0.553768,0.69439,9
1,Adult,NB,0.107367,0.799324,0.317602,0.677551,0.432479,0.634837,10


# Orange Juice Sales

In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/OJ.csv')
r = do_all_for_dataset('OJ', df, target_col='Purchase', drop_cols=['ID'])
results.append(r)
r

Y (train) counts:
0    522
1    334
Name: Purchase, dtype: int64
Y (test) counts:
0    131
1     83
Name: Purchase, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
8,OJ,Adaboost,0.412627,0.85514,0.771084,0.842105,0.805031,0.839741,1
9,OJ,GBC,0.341138,0.845794,0.746988,0.837838,0.789809,0.827692,2
10,OJ,Stacking,6.078939,0.841121,0.746988,0.826667,0.78481,0.823876,3
4,OJ,Voting,1.154752,0.831776,0.759036,0.797468,0.777778,0.818449,4
0,OJ,LR,0.19048,0.831776,0.722892,0.821918,0.769231,0.811827,5
11,OJ,automl,94.765673,0.82243,0.759036,0.777778,0.768293,0.810816,6
7,OJ,ExtraTrees,0.336726,0.808411,0.722892,0.769231,0.745342,0.792743,7
6,OJ,RF,0.426854,0.808411,0.698795,0.783784,0.738854,0.788329,8
5,OJ,Bagging,0.745412,0.799065,0.722892,0.75,0.736196,0.78511,9
2,OJ,DT,0.008439,0.78972,0.698795,0.74359,0.720497,0.773062,10


# Bank

In [13]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/bank.csv')
r = do_all_for_dataset('Bank', df, target_col='y', drop_cols=[])
results.append(r)
r

Y (train) counts:
0    3199
1     417
Name: y, dtype: int64
Y (test) counts:
0    801
1    104
Name: y, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
4,Bank,Voting,8.002571,0.895028,0.413462,0.558442,0.475138,0.685507,1
11,Bank,automl,94.526651,0.79337,0.788462,0.331984,0.467236,0.791235,2
9,Bank,GBC,1.6505,0.889503,0.394231,0.525641,0.450549,0.674019,3
5,Bank,Bagging,5.624545,0.887293,0.394231,0.5125,0.445652,0.672771,4
10,Bank,Stacking,26.435482,0.895028,0.365385,0.567164,0.444444,0.66459,5
2,Bank,DT,0.053082,0.857459,0.480769,0.4,0.436681,0.693568,6
8,Bank,Adaboost,1.236406,0.887293,0.336538,0.514706,0.406977,0.64767,7
0,Bank,LR,3.510823,0.893923,0.307692,0.571429,0.4,0.638865,8
1,Bank,NB,0.011552,0.81768,0.480769,0.310559,0.377358,0.671096,9
7,Bank,ExtraTrees,0.964086,0.890608,0.240385,0.555556,0.33557,0.607708,10


# Credit Data (from ISLR)

In [14]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/869_course/main/islr_credit.csv')
r = do_all_for_dataset('Credit', df, target_col='default', drop_cols=['ID'])
results.append(r)
r

Y (train) counts:
0    7734
1     266
Name: default, dtype: int64
Y (test) counts:
0    1933
1      67
Name: default, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
11,Credit,automl,95.552348,0.971,0.358209,0.615385,0.45283,0.675224,1
10,Credit,Stacking,15.910119,0.9705,0.313433,0.617647,0.415842,0.653354,2
8,Credit,Adaboost,1.256315,0.97,0.313433,0.6,0.411765,0.653095,3
5,Credit,Bagging,2.49538,0.968,0.313433,0.538462,0.396226,0.65206,4
7,Credit,ExtraTrees,0.720486,0.967,0.313433,0.512195,0.388889,0.651543,5
6,Credit,RF,1.743766,0.9685,0.298507,0.555556,0.38835,0.645115,6
4,Credit,Voting,3.141324,0.968,0.283582,0.542857,0.372549,0.637652,7
1,Credit,NB,0.005281,0.9665,0.268657,0.5,0.349515,0.629672,8
2,Credit,DT,0.029368,0.956,0.328358,0.338462,0.333333,0.653056,9
9,Credit,GBC,1.60503,0.963,0.268657,0.418605,0.327273,0.627862,10


# UCI Credit Approval
From: https://archive.ics.uci.edu/ml/datasets/Credit+Approval

In [15]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/crx.csv')
r = do_all_for_dataset('UCI Credit', df, target_col='class', drop_cols=[])
results.append(r)
r

Y (train) counts:
1    306
0    246
Name: class, dtype: int64
Y (test) counts:
1    77
0    61
Name: class, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
4,UCI Credit,Voting,2.846164,0.847826,0.857143,0.868421,0.862745,0.846604,1
10,UCI Credit,Stacking,12.236892,0.847826,0.844156,0.878378,0.860927,0.848307,2
1,UCI Credit,NB,0.014273,0.833333,0.922078,0.806818,0.860606,0.821695,3
5,UCI Credit,Bagging,1.970659,0.847826,0.831169,0.888889,0.85906,0.850011,4
7,UCI Credit,ExtraTrees,0.427059,0.847826,0.831169,0.888889,0.85906,0.850011,5
8,UCI Credit,Adaboost,0.978684,0.847826,0.818182,0.9,0.857143,0.851714,6
6,UCI Credit,RF,0.496387,0.84058,0.831169,0.876712,0.853333,0.841814,7
11,UCI Credit,automl,95.071934,0.826087,0.857143,0.835443,0.846154,0.822014,8
3,UCI Credit,KNN,0.073794,0.797101,0.922078,0.763441,0.835294,0.780711,9
9,UCI Credit,GBC,0.945455,0.826087,0.779221,0.895522,0.833333,0.832233,10


# King-Rook vs. King

From: https://archive.ics.uci.edu/ml/datasets/Chess+%28King-Rook+vs.+King%29

In [16]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/869_course/main/data/krkopt.csv')
df['result'] = df['result'] != 'draw' # Change to binary classification problem
r = do_all_for_dataset('King Rook', df, target_col='result', drop_cols=[])
results.append(r)
r

Y (train) counts:
True     20207
False     2237
Name: result, dtype: int64
Y (test) counts:
True     5053
False     559
Name: result, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
5,King Rook,Bagging,6.672816,0.995723,0.997427,0.997822,0.997625,0.988875,1
10,King Rook,Stacking,34.802662,0.992338,0.997427,0.994083,0.995752,0.97188,2
2,King Rook,DT,0.060836,0.988774,0.993865,0.993668,0.993767,0.96831,3
11,King Rook,automl,95.70465,0.981468,0.996834,0.982829,0.989782,0.919705,4
9,King Rook,GBC,3.809935,0.96846,0.998219,0.967767,0.982757,0.848841,5
6,King Rook,RF,2.882421,0.956522,0.984762,0.967529,0.976069,0.843007,6
3,King Rook,KNN,1.487516,0.955809,0.997823,0.955105,0.975997,0.786926,7
7,King Rook,ExtraTrees,3.099249,0.945474,0.969721,0.969721,0.969721,0.848009,8
4,King Rook,Voting,6.729951,0.941376,1.0,0.93887,0.968471,0.705725,9
0,King Rook,LR,0.409874,0.900392,1.0,0.900392,0.947586,0.5,10


# Credit Card Fraud

In [17]:
#df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/creditcard_sample.csv')
# This dataset is huge, so let's take a sample to speed things up
#df = df.sample(frac=0.3, replace=False, random_state=1, axis=0)
#r = do_all_for_dataset('Credit Card', df, target_col='Class', drop_cols=[])
#results.append(r)
#r

# Overall Results

In [18]:
r = pd.concat([m for m in results], axis = 0).reset_index()
r = r.drop(columns = "index",axis =1)
r.groupby('Method').describe().Rank.sort_values(by='mean')
r.groupby('Method').describe().Time.sort_values(by='mean')

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
automl,10.0,4.3,2.451757,1.0,2.0,4.5,6.0,8.0
Stacking,10.0,4.4,3.134042,1.0,2.0,4.0,5.0,11.0
Bagging,10.0,4.5,2.013841,1.0,4.0,4.0,4.75,9.0
GBC,10.0,4.7,3.335,1.0,2.0,4.0,6.5,10.0
Adaboost,10.0,5.0,4.082483,1.0,1.0,4.5,8.5,11.0
Voting,10.0,5.5,3.064129,1.0,3.25,6.5,7.75,9.0
RF,10.0,5.7,2.626785,2.0,4.0,6.0,6.75,11.0
ExtraTrees,10.0,6.6,2.503331,3.0,5.0,7.0,8.75,10.0
DT,10.0,8.5,2.592725,3.0,8.0,8.5,10.0,12.0
NB,10.0,8.5,3.439961,3.0,6.5,9.5,11.0,12.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
NB,10.0,0.019692,0.031509,0.002595,0.005162,0.011045,0.013637,0.107367
DT,10.0,0.06519,0.113343,0.004762,0.009441,0.02954,0.058897,0.380956
KNN,10.0,0.38127,0.687203,0.004526,0.01903,0.073627,0.151124,1.855113
LR,10.0,0.709857,1.04117,0.04035,0.150811,0.331699,0.825899,3.510823
RF,10.0,1.961118,2.914727,0.31692,0.425232,0.786971,1.897193,9.886277
Adaboost,10.0,1.980205,2.715555,0.321829,0.432842,1.107545,1.891708,9.22371
ExtraTrees,10.0,2.037973,3.680456,0.216772,0.347896,0.573773,1.535887,12.199482
GBC,10.0,2.589474,4.127622,0.177967,0.369075,1.275242,2.471211,13.845171
Voting,10.0,5.121221,6.153792,0.904772,1.166013,2.993744,6.330395,21.091315
Bagging,10.0,7.663587,15.245154,0.384849,0.843386,2.23302,6.233131,50.486441
