# Playground for Ensemble Slides - Study on Many Datasets

- Stephen W. Thomas
- Used for MMA 869, MMAI 869, and GMMA 869

This code is going to evaluate lots of ensemble classifiers (as well as some regular ones) on a lot of datasets. BOOM.

In [1]:
import datetime
print(datetime.datetime.now())

2020-07-10 14:32:53.295157


In [2]:
import pandas as pd
pd.show_versions(as_json=False)

import sklearn
sklearn.__version__

  """)
  from pandas.util.testing import assert_frame_equal



INSTALLED VERSIONS
------------------
commit           : None
python           : 3.6.9.final.0
python-bits      : 64
OS               : Linux
OS-release       : 4.19.104+
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : en_US.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 1.0.5
numpy            : 1.18.5
pytz             : 2018.9
dateutil         : 2.8.1
pip              : 19.3.1
setuptools       : 47.3.1
Cython           : 0.29.20
pytest           : 3.6.4
hypothesis       : None
sphinx           : 1.8.5
blosc            : None
feather          : 0.4.1
xlsxwriter       : None
lxml.etree       : 4.2.6
html5lib         : 1.0.1
pymysql          : None
psycopg2         : 2.7.6.1 (dt dec pq3 ext lo64)
jinja2           : 2.11.2
IPython          : 5.5.0
pandas_datareader: 0.8.1
bs4              : 4.6.3
bottleneck       : 1.3.2
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.2.6
matplotlib   

'0.22.2.post1'

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns 

from sklearn.model_selection import train_test_split

import itertools

import scipy

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Uncle Steve's Amazing Do-All function

It slices! It dices!

In order to streamline the evaluation of each dataset, let's create an function that takes in a dataset, the name of the target column, and the name of any columsn to drop (because that's decided by the human), and then automate the rest:

- Converting datatypes of the target column if necessary
- OHE any categorical features
- Splitting data into training and testing
- Training and evaluating all the models/ensembles
- Returning a list of the performance of all the models

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier, StackingClassifier, VotingClassifier, BaggingClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier

from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier


from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, roc_auc_score

import time

# Helper function
def do_all_for_dataset(dataset_name, df, target_col, drop_cols=[]):

    # If target_col is an object, convert to numbers
    if df[target_col].dtype == 'object':
      df[target_col] =  df[target_col].astype('category').cat.codes

    # OHE all categorical columns
    cat_cols = list(df.select_dtypes(include=['object']).columns) 
    if target_col in cat_cols: cat_cols.remove(targe_col)
    if len(cat_cols) > 0:
      df = pd.concat([df,pd.get_dummies(df[cat_cols])],axis=1)

    # Split into X and y
    X = df.drop(drop_cols + cat_cols + [target_col], axis=1)
    y = df[target_col]

    # Split into training and testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

    print('Y (train) counts:')
    print(y_train.value_counts())
    print('Y (test) counts:')
    print(y_test.value_counts())
    
    nb = GaussianNB()   
    lr = LogisticRegression(random_state=42, solver='lbfgs', max_iter=5000)
    dt = DecisionTreeClassifier(random_state=42)
    rf = RandomForestClassifier(random_state=42, n_estimators=200)
    ada = AdaBoostClassifier(random_state=42, n_estimators=200)
       
    dict_classifiers = {
        "LR": lr, 
        "NB": nb,
        "DT": dt,
        "Voting": VotingClassifier(estimators = [('DT', dt), ('LR', lr), ('NB', nb)], voting='soft'),
        "Bagging": BaggingClassifier(DecisionTreeClassifier(), n_estimators=200, random_state=42),
        "RF": rf,
        "ExtraTrees": ExtraTreesClassifier(random_state=42, n_estimators=200),
        "Adaboost": ada,
        "GBC": GradientBoostingClassifier(random_state=42, n_estimators=200),
        "Stacking": StackingClassifier(estimators=[('DT', dt), ('LR', lr), ('NB', nb), ('RF', rf), ('ADA', ada)], final_estimator=LogisticRegression())
    }
    
    model_results = list()
    
    for model_name, model in dict_classifiers.items():
        start = time.time()
        y_pred = model.fit(X_train, y_train).predict(X_test)
        end = time.time()
        total = end - start
        
        accuracy       = accuracy_score(y_test, y_pred)
        f1             = f1_score(y_test, y_pred)
        recall         = recall_score(y_test, y_pred)
        precision      = precision_score(y_test, y_pred)
        roc_auc        = roc_auc_score(y_test, y_pred)
    
        df = pd.DataFrame({"Dataset"   : [dataset_name],
                           "Method"    : [model_name],
                           "Time"      : [total],
                           "Accuracy"  : [accuracy],
                           "Recall"    : [recall],
                           "Precision" : [precision],
                           "F1"        : [f1],
                           "AUC"       : [roc_auc],
                          })
        model_results.append(df)
   

    dataset_results = pd.concat([m for m in model_results], axis = 0).reset_index()

    dataset_results = dataset_results.drop(columns = "index",axis =1)
    dataset_results = dataset_results.sort_values(by=['F1'], ascending=False)
    dataset_results['Rank'] = range(1, len(dataset_results)+1)
    
    return dataset_results

In [5]:
# We're going to save the results of each dataset into a big list, to analyze later.
results = list()

# Diabetes

In [6]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/diabetes_orig.csv')
r = do_all_for_dataset( 'Diabetes', df, target_col='diabetes', drop_cols=['Id'])
results.append(r)
r

Y (train) counts:
0    400
1    214
Name: diabetes, dtype: int64
Y (test) counts:
0    100
1     54
Name: diabetes, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
7,Diabetes,Adaboost,0.367171,0.772727,0.648148,0.686275,0.666667,0.744074,1
8,Diabetes,GBC,0.30058,0.753247,0.62963,0.653846,0.641509,0.724815,2
3,Diabetes,Voting,0.045212,0.746753,0.592593,0.653061,0.621359,0.711296,3
5,Diabetes,RF,0.377052,0.746753,0.592593,0.653061,0.621359,0.711296,4
4,Diabetes,Bagging,0.619293,0.746753,0.574074,0.659574,0.613861,0.707037,5
1,Diabetes,NB,0.002234,0.707792,0.648148,0.57377,0.608696,0.694074,6
9,Diabetes,Stacking,4.499238,0.733766,0.555556,0.638298,0.594059,0.692778,7
6,Diabetes,ExtraTrees,0.288043,0.727273,0.555556,0.625,0.588235,0.687778,8
2,Diabetes,DT,0.006047,0.727273,0.5,0.642857,0.5625,0.675,9
0,Diabetes,LR,0.03845,0.714286,0.518519,0.608696,0.56,0.669259,10


# German Credit

In [7]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/GermanCredit.csv')
r = do_all_for_dataset('GermanCredit', df, target_col='Class', drop_cols=[])
results.append(r)
r

Y (train) counts:
1    560
0    240
Name: Class, dtype: int64
Y (test) counts:
1    140
0     60
Name: Class, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
8,GermanCredit,GBC,0.431021,0.76,0.857143,0.810811,0.833333,0.695238,1
6,GermanCredit,ExtraTrees,0.359332,0.745,0.857143,0.794702,0.824742,0.670238,2
5,GermanCredit,RF,0.40851,0.735,0.885714,0.770186,0.82392,0.634524,3
9,GermanCredit,Stacking,6.353294,0.725,0.828571,0.789116,0.808362,0.655952,4
4,GermanCredit,Bagging,1.137991,0.72,0.828571,0.783784,0.805556,0.647619,5
0,GermanCredit,LR,0.125056,0.7,0.792857,0.78169,0.787234,0.638095,6
3,GermanCredit,Voting,0.166601,0.705,0.771429,0.8,0.785455,0.660714,7
7,GermanCredit,Adaboost,0.482291,0.695,0.778571,0.784173,0.781362,0.639286,8
2,GermanCredit,DT,0.011358,0.675,0.742857,0.781955,0.761905,0.629762,9
1,GermanCredit,NB,0.004378,0.685,0.714286,0.813008,0.760456,0.665476,10


# Los Angeles Heart Disease

In [8]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/laheart.csv')
r = do_all_for_dataset('LA Heart', df, target_col='DEATH', drop_cols=['ID', 'DEATH_YR'])
results.append(r)
r

Y (train) counts:
0    109
1     51
Name: DEATH, dtype: int64
Y (test) counts:
0    27
1    13
Name: DEATH, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
7,LA Heart,Adaboost,0.294612,0.8,0.692308,0.692308,0.692308,0.77208,1
1,LA Heart,NB,0.001997,0.725,0.384615,0.625,0.47619,0.636752,2
4,LA Heart,Bagging,0.371966,0.7,0.384615,0.555556,0.454545,0.618234,3
8,LA Heart,GBC,0.170561,0.675,0.384615,0.5,0.434783,0.599715,4
5,LA Heart,RF,0.284079,0.725,0.307692,0.666667,0.421053,0.616809,5
2,LA Heart,DT,0.00303,0.6,0.384615,0.384615,0.384615,0.54416,6
3,LA Heart,Voting,0.206264,0.675,0.307692,0.5,0.380952,0.579772,7
6,LA Heart,ExtraTrees,0.208411,0.725,0.230769,0.75,0.352941,0.596866,8
0,LA Heart,LR,0.183591,0.675,0.230769,0.5,0.315789,0.559829,9
9,LA Heart,Stacking,4.492594,0.7,0.153846,0.666667,0.25,0.558405,10


# HR Churn

In [9]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/HR_comma_sep.csv')
r = do_all_for_dataset('HR Churn', df, target_col='left', drop_cols=[])
results.append(r)
r

Y (train) counts:
0    9142
1    2857
Name: left, dtype: int64
Y (test) counts:
0    2286
1     714
Name: left, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
9,HR Churn,Stacking,24.749431,0.990333,0.966387,0.992806,0.979418,0.9821,1
5,HR Churn,RF,1.860441,0.99,0.963585,0.99422,0.978663,0.980918,2
6,HR Churn,ExtraTrees,1.675256,0.988,0.963585,0.985673,0.974504,0.979605,3
4,HR Churn,Bagging,6.200312,0.987667,0.967787,0.980142,0.973925,0.980831,4
8,HR Churn,GBC,2.646512,0.976667,0.938375,0.962644,0.950355,0.963501,5
2,HR Churn,DT,0.066567,0.973333,0.964986,0.926075,0.94513,0.970463,6
7,HR Churn,Adaboost,1.960874,0.960333,0.917367,0.916084,0.916725,0.94556,7
3,HR Churn,Voting,0.925462,0.930333,0.865546,0.845417,0.855363,0.908057,8
1,HR Churn,NB,0.010525,0.680667,0.829132,0.414566,0.552754,0.731714,9
0,HR Churn,LR,0.894115,0.791667,0.343137,0.610973,0.439462,0.637448,10


# US Census Data

In [10]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/adult.csv')
r = do_all_for_dataset('Adult', df, target_col='high_salary', drop_cols=['education'])
results.append(r)
r

Y (train) counts:
0    19775
1     6273
Name: high_salary, dtype: int64
Y (test) counts:
0    4945
1    1568
Name: high_salary, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
7,Adult,Adaboost,8.848156,0.871488,0.652423,0.777947,0.709677,0.796687,1
8,Adult,GBC,13.33388,0.872563,0.642857,0.788732,0.708363,0.794128,2
5,Adult,RF,8.524249,0.856902,0.63648,0.733824,0.681694,0.781637,3
4,Adult,Bagging,46.175066,0.854445,0.637755,0.724638,0.678426,0.780455,4
9,Adult,Stacking,93.58883,0.857362,0.60523,0.753773,0.671383,0.77127,5
6,Adult,ExtraTrees,10.342284,0.836634,0.616709,0.676224,0.645097,0.76154,6
2,Adult,DT,0.331754,0.814525,0.635204,0.610294,0.6225,0.753295,7
3,Adult,Voting,0.965517,0.809304,0.30676,0.756289,0.436479,0.637708,8
1,Adult,NB,0.089855,0.799324,0.317602,0.677551,0.432479,0.634837,9
0,Adult,LR,0.504623,0.799478,0.274235,0.719064,0.397045,0.62013,10


# Orange Juice Sales

In [11]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/OJ.csv')
r = do_all_for_dataset('OJ', df, target_col='Purchase', drop_cols=['ID'])
results.append(r)
r

Y (train) counts:
0    522
1    334
Name: Purchase, dtype: int64
Y (test) counts:
0    131
1     83
Name: Purchase, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
7,OJ,Adaboost,0.409162,0.85514,0.771084,0.842105,0.805031,0.839741,1
8,OJ,GBC,0.329845,0.845794,0.746988,0.837838,0.789809,0.827692,2
9,OJ,Stacking,5.763381,0.841121,0.746988,0.826667,0.78481,0.823876,3
3,OJ,Voting,0.209088,0.827103,0.759036,0.7875,0.773006,0.814633,4
0,OJ,LR,0.197225,0.831776,0.722892,0.821918,0.769231,0.811827,5
6,OJ,ExtraTrees,0.342271,0.808411,0.722892,0.769231,0.745342,0.792743,6
5,OJ,RF,0.40562,0.808411,0.698795,0.783784,0.738854,0.788329,7
4,OJ,Bagging,0.732738,0.799065,0.722892,0.75,0.736196,0.78511,8
2,OJ,DT,0.00796,0.78972,0.698795,0.74359,0.720497,0.773062,9
1,OJ,NB,0.004149,0.738318,0.795181,0.628571,0.702128,0.748735,10


# Bank

In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/bank.csv')
r = do_all_for_dataset('Bank', df, target_col='y', drop_cols=[])
results.append(r)
r

Y (train) counts:
0    3199
1     417
Name: y, dtype: int64
Y (test) counts:
0    801
1    104
Name: y, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
3,Bank,Voting,3.364869,0.888398,0.413462,0.518072,0.459893,0.681762,1
8,Bank,GBC,1.554164,0.889503,0.394231,0.525641,0.450549,0.674019,2
4,Bank,Bagging,4.610549,0.887293,0.394231,0.5125,0.445652,0.672771,3
9,Bank,Stacking,25.107987,0.895028,0.365385,0.567164,0.444444,0.66459,4
2,Bank,DT,0.054316,0.857459,0.480769,0.4,0.436681,0.693568,5
7,Bank,Adaboost,1.151769,0.887293,0.336538,0.514706,0.406977,0.64767,6
0,Bank,LR,3.379655,0.893923,0.307692,0.571429,0.4,0.638865,7
1,Bank,NB,0.009133,0.81768,0.480769,0.310559,0.377358,0.671096,8
6,Bank,ExtraTrees,0.885231,0.890608,0.240385,0.555556,0.33557,0.607708,9
5,Bank,RF,0.978818,0.889503,0.240385,0.543478,0.333333,0.607084,10


# Credit Data (from ISLR)

In [13]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/islr_credit.csv')
r = do_all_for_dataset('Credit', df, target_col='default', drop_cols=['ID'])
results.append(r)
r

Y (train) counts:
0    7734
1     266
Name: default, dtype: int64
Y (test) counts:
0    1933
1      67
Name: default, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
9,Credit,Stacking,14.97624,0.9705,0.313433,0.617647,0.415842,0.653354,1
7,Credit,Adaboost,1.157727,0.97,0.313433,0.6,0.411765,0.653095,2
4,Credit,Bagging,2.447967,0.968,0.313433,0.538462,0.396226,0.65206,3
6,Credit,ExtraTrees,0.670394,0.967,0.313433,0.512195,0.388889,0.651543,4
5,Credit,RF,1.632787,0.9685,0.298507,0.555556,0.38835,0.645115,5
1,Credit,NB,0.004548,0.9665,0.268657,0.5,0.349515,0.629672,6
3,Credit,Voting,0.114445,0.9665,0.268657,0.5,0.349515,0.629672,7
2,Credit,DT,0.029254,0.956,0.328358,0.338462,0.333333,0.653056,8
8,Credit,GBC,1.560421,0.963,0.268657,0.418605,0.327273,0.627862,9
0,Credit,LR,0.076984,0.964,0.134328,0.391304,0.2,0.563543,10


# UCI Credit Approval
From: https://archive.ics.uci.edu/ml/datasets/Credit+Approval

In [14]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/crx.csv')
r = do_all_for_dataset('UCI Credit', df, target_col='class', drop_cols=[])
results.append(r)
r

Y (train) counts:
1    306
0    246
Name: class, dtype: int64
Y (test) counts:
1    77
0    61
Name: class, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
9,UCI Credit,Stacking,11.59267,0.847826,0.844156,0.878378,0.860927,0.848307,1
1,UCI Credit,NB,0.016544,0.833333,0.922078,0.806818,0.860606,0.821695,2
4,UCI Credit,Bagging,1.655983,0.847826,0.831169,0.888889,0.85906,0.850011,3
6,UCI Credit,ExtraTrees,0.406104,0.847826,0.831169,0.888889,0.85906,0.850011,4
7,UCI Credit,Adaboost,0.899091,0.847826,0.818182,0.9,0.857143,0.851714,5
5,UCI Credit,RF,0.451134,0.84058,0.831169,0.876712,0.853333,0.841814,6
3,UCI Credit,Voting,0.89035,0.833333,0.857143,0.846154,0.851613,0.830211,7
8,UCI Credit,GBC,0.888338,0.826087,0.779221,0.895522,0.833333,0.832233,8
0,UCI Credit,LR,0.960651,0.818841,0.779221,0.882353,0.827586,0.824037,9
2,UCI Credit,DT,0.024555,0.789855,0.844156,0.792683,0.81761,0.782734,10


# King-Rook vs. King

From: https://archive.ics.uci.edu/ml/datasets/Chess+%28King-Rook+vs.+King%29

In [15]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/krkopt.csv')
df['result'] = df['result'] != 'draw' # Change to binary classification problem
r = do_all_for_dataset('King Rook', df, target_col='result', drop_cols=[])
results.append(r)
r

Y (train) counts:
True     20207
False     2237
Name: result, dtype: int64
Y (test) counts:
True     5053
False     559
Name: result, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
4,King Rook,Bagging,6.404598,0.995723,0.997427,0.997822,0.997625,0.988875,1
9,King Rook,Stacking,32.819147,0.992338,0.997427,0.994083,0.995752,0.97188,2
2,King Rook,DT,0.061002,0.988774,0.993865,0.993668,0.993767,0.96831,3
8,King Rook,GBC,3.724569,0.96846,0.998219,0.967767,0.982757,0.848841,4
5,King Rook,RF,2.704429,0.956522,0.984762,0.967529,0.976069,0.843007,5
6,King Rook,ExtraTrees,2.948201,0.945474,0.969721,0.969721,0.969721,0.848009,6
3,King Rook,Voting,0.423724,0.927655,1.0,0.925627,0.961377,0.636852,7
0,King Rook,LR,0.379135,0.900392,1.0,0.900392,0.947586,0.5,8
7,King Rook,Adaboost,3.170238,0.900392,1.0,0.900392,0.947586,0.5,9
1,King Rook,NB,0.021783,0.855488,0.934692,0.907571,0.920932,0.537114,10


# Kiva Loans

In [16]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/kiva_df_bow_tfidf.csv')
r = do_all_for_dataset('Kiva', df, target_col='status', drop_cols=['loan_id'])
results.append(r)
r

Y (train) counts:
1    2753
0    2688
Name: status, dtype: int64
Y (test) counts:
1    689
0    672
Name: status, dtype: int64


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
6,Kiva,ExtraTrees,3.396668,0.930933,0.969521,0.901484,0.934266,0.930445,1
9,Kiva,Stacking,190.067491,0.927994,0.953556,0.908714,0.930595,0.927671,2
5,Kiva,RF,4.706191,0.92432,0.963716,0.894879,0.928022,0.923822,3
8,Kiva,GBC,22.464525,0.925055,0.94775,0.908206,0.927557,0.924768,4
4,Kiva,Bagging,114.992814,0.92432,0.941945,0.911517,0.926481,0.924097,5
0,Kiva,LR,23.816978,0.91036,0.923077,0.902128,0.912482,0.910199,6
3,Kiva,Voting,24.277507,0.906686,0.953556,0.87367,0.911867,0.906093,7
7,Kiva,Adaboost,11.382839,0.909625,0.908563,0.912536,0.910545,0.909639,8
2,Kiva,DT,0.867897,0.895665,0.910015,0.886846,0.898281,0.895483,9
1,Kiva,NB,0.087344,0.741367,0.833091,0.707768,0.765333,0.740206,10


# Credit Card Fraud

In [17]:
df = pd.read_csv('https://raw.githubusercontent.com/stepthom/sandbox/master/data/creditcard_sample.csv')
# This dataset is huge, so let's take a sample to speed things up
df = df.sample(frac=0.3, replace=False, random_state=1, axis=0)
r = do_all_for_dataset('Credit Card', df, target_col='Class', drop_cols=[])
results.append(r)
r

Y (train) counts:
0    34113
1       63
Name: Class, dtype: int64
Y (test) counts:
0    8529
1      16
Name: Class, dtype: int64


Unnamed: 0,Dataset,Method,Time,Accuracy,Recall,Precision,F1,AUC,Rank
5,Credit Card,RF,27.195963,0.999649,0.875,0.933333,0.903226,0.937441,1
6,Credit Card,ExtraTrees,3.486423,0.999649,0.875,0.933333,0.903226,0.937441,2
3,Credit Card,Voting,3.288296,0.999532,0.875,0.875,0.875,0.937383,3
9,Credit Card,Stacking,320.393715,0.999532,0.8125,0.928571,0.866667,0.906191,4
4,Credit Card,Bagging,140.105755,0.999415,0.9375,0.789474,0.857143,0.968516,5
7,Credit Card,Adaboost,34.736051,0.999415,0.75,0.923077,0.827586,0.874941,6
2,Credit Card,DT,1.282202,0.99883,0.8125,0.65,0.722222,0.90584,7
0,Credit Card,LR,2.012916,0.998947,0.5,0.888889,0.64,0.749941,8
8,Credit Card,GBC,88.876732,0.998011,0.125,0.4,0.190476,0.562324,9
1,Credit Card,NB,0.02745,0.984201,0.875,0.095238,0.171779,0.929703,10


# Overall Results

In [18]:
r = pd.concat([m for m in results], axis = 0).reset_index()
r = r.drop(columns = "index",axis =1)
r.groupby('Method').describe().Rank.sort_values(by='mean')
r.groupby('Method').describe().Time.sort_values(by='mean')

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Stacking,12.0,3.666667,2.708013,1.0,1.75,3.5,4.25,10.0
Bagging,12.0,4.083333,1.729862,1.0,3.0,4.0,5.0,8.0
GBC,12.0,4.333333,2.870962,1.0,2.0,4.0,5.75,9.0
RF,12.0,4.5,2.430862,1.0,3.0,4.5,5.25,10.0
Adaboost,12.0,4.583333,3.175426,1.0,1.0,5.5,7.25,9.0
ExtraTrees,12.0,4.916667,2.644319,1.0,2.75,5.0,6.5,9.0
Voting,12.0,5.75,2.340357,1.0,3.75,7.0,7.0,8.0
DT,12.0,7.333333,2.059715,3.0,6.0,7.5,9.0,10.0
NB,12.0,7.666667,3.025147,2.0,6.0,9.0,10.0,10.0
LR,12.0,8.166667,1.800673,5.0,6.75,8.5,10.0,10.0


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
NB,12.0,0.023328,0.031528,0.001997,0.004321,0.009829,0.023199,0.089855
DT,12.0,0.228829,0.414707,0.00303,0.010509,0.041785,0.132864,1.282202
ExtraTrees,12.0,2.084052,2.888931,0.208411,0.355067,0.777813,3.060318,10.342284
LR,12.0,2.714115,6.718642,0.03845,0.168958,0.441879,1.223718,23.816978
Voting,12.0,2.906445,6.82944,0.045212,0.196348,0.657037,1.546212,24.277507
RF,12.0,4.127439,7.658385,0.284079,0.407788,1.305802,3.20487,27.195963
Adaboost,12.0,5.404998,9.917278,0.294612,0.464008,1.154748,4.589717,34.736051
GBC,12.0,11.356762,25.335455,0.170561,0.405727,1.557293,6.126897,88.876732
Bagging,12.0,27.121253,48.869182,0.371966,1.036678,3.529258,16.347215,140.105755
Stacking,12.0,61.200335,97.87915,4.492594,6.205815,19.862836,48.011568,320.393715
