In [26]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import  RobustScaler
from sklearn.utils import shuffle
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler, ClusterCentroids, TomekLinks
from imblearn.combine import SMOTEENN
from sklearn.model_selection import cross_validate
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from imblearn.ensemble import RUSBoostClassifier
from sklearn.metrics import make_scorer, recall_score, precision_score, accuracy_score, roc_auc_score


%matplotlib inline

### Load train and test dataset

In [2]:
AbsenteeismAtWork = pd.read_csv('data/train_data.csv', index_col=0)
AbsenteeismAtWork['Work load Average/day '] = [x.replace(',', '') for x in AbsenteeismAtWork['Work load Average/day ']]
AbsenteeismAtWork['Work load Average/day '] = AbsenteeismAtWork['Work load Average/day '].astype(int)

X_train = AbsenteeismAtWork.drop('Absent', 1)
y_train =  AbsenteeismAtWork['Absent']
X_test = pd.read_csv('data/test_data.csv', index_col=0)
y_test = pd.read_csv('data/sample_submission.csv', index_col=0)

# Balance Dataset

### Up-sample minority class

#### Resample with replacement

Método mais simples que consiste em replicar aleatoriamente (com reposição) dados da classe minoritária até atingir ratio de 1:1

In [3]:
def overSampler(X_train, y_train):
    ros = RandomOverSampler()
    X_balanced, y_train = ros.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

#### SMOTE - Synthetic Minority Over-sampling Technique

In [4]:
def smoteSampler(X_train, y_train):
    smote = SMOTE(sampling_strategy='minority')
    X_balanced, y_train = smote.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

### Down-sample majority class

#### Resample without replacement

In [5]:
def underSampler(X_train, y_train):
    rus = RandomUnderSampler()
    X_balanced, y_train = rus.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

#### Cluster Centroids


In [6]:
def centroidSampler(X_train, y_train):
    cc = ClusterCentroids(sampling_strategy='majority')
    X_balanced, y_train = cc.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

#### Tomek links

In [23]:
def tomekSampler(X_train, y_train):
    cc = TomekLinks(sampling_strategy='majority')
    X_balanced, y_train = cc.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

### Combination of over- and under-sampling

#### SMOTE-ENN

In [7]:
def smoteeenSampler(X_train, y_train):
    smote_enn = SMOTEENN(random_state=0)
    X_balanced, y_train = smote_enn.fit_sample(X_train, y_train)
    X_balanced, y_train = shuffle(X_balanced, y_train)
    return X_balanced, y_train;

## Avaliação das diferentes técnicas

In [8]:
def robustScaling(X_train):
    scaler = RobustScaler()
    scaled_data = scaler.fit_transform( X_train )
    return scaled_data;

def evaluateTechnique(balancer):
    X_train = AbsenteeismAtWork.drop('Absent', 1)
    y_train =  AbsenteeismAtWork['Absent']

    X_train = robustScaling(X_train)
    
    X_train, y_train = balancer(X_train, y_train)
    
    classifiers = [
        LogisticRegression(class_weight='balanced'),
        SGDClassifier(class_weight='balanced'),
        KNeighborsClassifier(n_neighbors=5),
        SVC(class_weight='balanced'),
        LinearSVC(max_iter=10000, class_weight='balanced'),
        GaussianNB(),
        GaussianProcessClassifier(),
        DecisionTreeClassifier(class_weight='balanced'),
        MLPClassifier(max_iter=10000),
        AdaBoostClassifier(),
        RandomForestClassifier(class_weight='balanced'),
    ]
    
    
    names = [
             "Logistic regression", "SGDClassifier",
             "KNearest Neighbors (5)", 
             "SVM-rbf", "SMV-linear", 
             "Gaussian naive bayes",
             "Gaussian Process", 
             "Decision Tree", 
             "Multi-layer Perceptron", 
             "AdaBoost", "Random Forest"]


    metrics = {'recall0': make_scorer(recall_score, pos_label = 0), 
               'recall1': make_scorer(recall_score, pos_label = 1),
               'precision0': make_scorer(precision_score, pos_label = 0),
               'precision1': make_scorer(precision_score, pos_label = 0),
               'accuracy' : 'accuracy',
               'roc_auc': 'roc_auc'
              }

    for name, clf in zip(names, classifiers):
        scores = cross_validate(clf, X_train, y_train, cv=10, scoring=metrics)
        print("Accuracy: %0.3f || AUROC %0.3f || (Accuracy, Precision) 0:( %0.3f, %0.3f)  1:( %0.3f, %0.3f) ->" 
              % (scores['test_accuracy'].mean(), scores['test_roc_auc'].mean(),
                scores['test_recall0'].mean(), scores['test_precision0'].mean(),
                scores['test_recall1'].mean(), scores['test_precision1'].mean()), name)
        
    return;

In [9]:
evaluateTechnique(overSampler)

Accuracy: 0.725 || AUROC 0.800 || (Accuracy, Precision) 0:( 0.721, 0.730)  1:( 0.729, 0.730) -> Logistic regression
Accuracy: 0.638 || AUROC 0.698 || (Accuracy, Precision) 0:( 0.591, 0.665)  1:( 0.687, 0.665) -> SGDClassifier
Accuracy: 0.763 || AUROC 0.851 || (Accuracy, Precision) 0:( 0.856, 0.722)  1:( 0.671, 0.722) -> KNearest Neighbors (5)
Accuracy: 0.746 || AUROC 0.834 || (Accuracy, Precision) 0:( 0.739, 0.750)  1:( 0.752, 0.750) -> SVM-rbf
Accuracy: 0.710 || AUROC 0.803 || (Accuracy, Precision) 0:( 0.718, 0.707)  1:( 0.702, 0.707) -> SMV-linear
Accuracy: 0.634 || AUROC 0.685 || (Accuracy, Precision) 0:( 0.268, 1.000)  1:( 1.000, 1.000) -> Gaussian naive bayes
Accuracy: 0.824 || AUROC 0.903 || (Accuracy, Precision) 0:( 0.881, 0.792)  1:( 0.767, 0.792) -> Gaussian Process
Accuracy: 0.922 || AUROC 0.927 || (Accuracy, Precision) 0:( 0.980, 0.879)  1:( 0.863, 0.879) -> Decision Tree
Accuracy: 0.916 || AUROC 0.957 || (Accuracy, Precision) 0:( 0.972, 0.878)  1:( 0.861, 0.878) -> Multi-la

In [10]:
evaluateTechnique(smoteSampler)

Accuracy: 0.699 || AUROC 0.800 || (Accuracy, Precision) 0:( 0.663, 0.718)  1:( 0.734, 0.718) -> Logistic regression
Accuracy: 0.659 || AUROC 0.743 || (Accuracy, Precision) 0:( 0.688, 0.652)  1:( 0.631, 0.652) -> SGDClassifier
Accuracy: 0.762 || AUROC 0.859 || (Accuracy, Precision) 0:( 0.893, 0.707)  1:( 0.631, 0.707) -> KNearest Neighbors (5)
Accuracy: 0.752 || AUROC 0.841 || (Accuracy, Precision) 0:( 0.731, 0.764)  1:( 0.772, 0.764) -> SVM-rbf
Accuracy: 0.706 || AUROC 0.803 || (Accuracy, Precision) 0:( 0.680, 0.721)  1:( 0.731, 0.721) -> SMV-linear
Accuracy: 0.678 || AUROC 0.730 || (Accuracy, Precision) 0:( 0.356, 1.000)  1:( 1.000, 1.000) -> Gaussian naive bayes
Accuracy: 0.810 || AUROC 0.892 || (Accuracy, Precision) 0:( 0.896, 0.765)  1:( 0.724, 0.765) -> Gaussian Process
Accuracy: 0.839 || AUROC 0.848 || (Accuracy, Precision) 0:( 0.850, 0.835)  1:( 0.828, 0.835) -> Decision Tree
Accuracy: 0.904 || AUROC 0.956 || (Accuracy, Precision) 0:( 0.937, 0.880)  1:( 0.871, 0.880) -> Multi-la

In [11]:
evaluateTechnique(underSampler)

Accuracy: 0.638 || AUROC 0.735 || (Accuracy, Precision) 0:( 0.589, 0.664)  1:( 0.688, 0.664) -> Logistic regression
Accuracy: 0.643 || AUROC 0.687 || (Accuracy, Precision) 0:( 0.685, 0.663)  1:( 0.600, 0.663) -> SGDClassifier
Accuracy: 0.624 || AUROC 0.680 || (Accuracy, Precision) 0:( 0.516, 0.693)  1:( 0.735, 0.693) -> KNearest Neighbors (5)
Accuracy: 0.652 || AUROC 0.649 || (Accuracy, Precision) 0:( 0.504, 0.733)  1:( 0.799, 0.733) -> SVM-rbf
Accuracy: 0.648 || AUROC 0.758 || (Accuracy, Precision) 0:( 0.608, 0.672)  1:( 0.686, 0.672) -> SMV-linear
Accuracy: 0.648 || AUROC 0.667 || (Accuracy, Precision) 0:( 0.304, 0.980)  1:( 0.990, 0.980) -> Gaussian naive bayes
Accuracy: 0.614 || AUROC 0.645 || (Accuracy, Precision) 0:( 0.580, 0.631)  1:( 0.647, 0.631) -> Gaussian Process
Accuracy: 0.676 || AUROC 0.675 || (Accuracy, Precision) 0:( 0.702, 0.678)  1:( 0.648, 0.678) -> Decision Tree
Accuracy: 0.681 || AUROC 0.742 || (Accuracy, Precision) 0:( 0.695, 0.686)  1:( 0.667, 0.686) -> Multi-la

In [12]:
evaluateTechnique(centroidSampler)

Accuracy: 0.662 || AUROC 0.716 || (Accuracy, Precision) 0:( 0.575, 0.715)  1:( 0.756, 0.715) -> Logistic regression
Accuracy: 0.643 || AUROC 0.685 || (Accuracy, Precision) 0:( 0.571, 0.670)  1:( 0.710, 0.670) -> SGDClassifier
Accuracy: 0.571 || AUROC 0.627 || (Accuracy, Precision) 0:( 0.505, 0.586)  1:( 0.642, 0.586) -> KNearest Neighbors (5)
Accuracy: 0.600 || AUROC 0.597 || (Accuracy, Precision) 0:( 0.500, 0.680)  1:( 0.710, 0.680) -> SVM-rbf
Accuracy: 0.686 || AUROC 0.764 || (Accuracy, Precision) 0:( 0.639, 0.712)  1:( 0.737, 0.712) -> SMV-linear
Accuracy: 0.652 || AUROC 0.687 || (Accuracy, Precision) 0:( 0.308, 1.000)  1:( 1.000, 1.000) -> Gaussian naive bayes
Accuracy: 0.538 || AUROC 0.530 || (Accuracy, Precision) 0:( 0.525, 0.541)  1:( 0.557, 0.541) -> Gaussian Process
Accuracy: 0.800 || AUROC 0.800 || (Accuracy, Precision) 0:( 0.791, 0.820)  1:( 0.809, 0.820) -> Decision Tree
Accuracy: 0.571 || AUROC 0.651 || (Accuracy, Precision) 0:( 0.584, 0.585)  1:( 0.565, 0.585) -> Multi-la

In [13]:
evaluateTechnique(smoteeenSampler)

Accuracy: 0.826 || AUROC 0.911 || (Accuracy, Precision) 0:( 0.801, 0.896)  1:( 0.864, 0.896) -> Logistic regression
Accuracy: 0.796 || AUROC 0.891 || (Accuracy, Precision) 0:( 0.787, 0.861)  1:( 0.809, 0.861) -> SGDClassifier
Accuracy: 0.955 || AUROC 0.988 || (Accuracy, Precision) 0:( 0.986, 0.943)  1:( 0.909, 0.943) -> KNearest Neighbors (5)
Accuracy: 0.926 || AUROC 0.982 || (Accuracy, Precision) 0:( 0.897, 0.978)  1:( 0.970, 0.978) -> SVM-rbf
Accuracy: 0.845 || AUROC 0.929 || (Accuracy, Precision) 0:( 0.835, 0.899)  1:( 0.858, 0.899) -> SMV-linear
Accuracy: 0.652 || AUROC 0.857 || (Accuracy, Precision) 0:( 0.429, 0.969)  1:( 0.980, 0.969) -> Gaussian naive bayes
Accuracy: 0.969 || AUROC 0.997 || (Accuracy, Precision) 0:( 0.986, 0.964)  1:( 0.944, 0.964) -> Gaussian Process
Accuracy: 0.945 || AUROC 0.943 || (Accuracy, Precision) 0:( 0.952, 0.956)  1:( 0.934, 0.956) -> Decision Tree
Accuracy: 0.973 || AUROC 0.993 || (Accuracy, Precision) 0:( 0.987, 0.970)  1:( 0.954, 0.970) -> Multi-la

## Avaliar com os dados de teste

In [25]:
def robustScaling2(X_train, X_test):
    scaler = RobustScaler()
    scaled_data = scaler.fit_transform( X_train )
    scaled_test = scaler.transform( X_test )
    return scaled_data, scaled_test;

def discretize2(X_train, X_test):
    featuresToDiscretize = ['Transportation expense', 'Distance from Residence to Work', 'Service time', 'Age', 'Work load Average/day ', 'Hit target', 'Weight', 'Height', 'Body mass index']
    discretizer = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
    X_train[featuresToDiscretize] = discretizer.fit_transform(X_train[featuresToDiscretize])
    X_test[featuresToDiscretize] = discretizer.transform(X_test[featuresToDiscretize])
    return X_train, X_test;

def evaluateBalancerAgaintTestData(balancer):
    X_train = AbsenteeismAtWork.drop('Absent', 1)
    y_train =  AbsenteeismAtWork['Absent']
    X_test = pd.read_csv('data/test_data.csv', index_col=0)
    y_test = pd.read_csv('data/sample_submission.csv', index_col=0)
    
    X_train, X_test = robustScaling2(X_train, X_test)
    
    X_train, y_train = balancer(X_train, y_train)
    
    classifiers = [
        LogisticRegression(class_weight='balanced'),
        SGDClassifier(class_weight='balanced'),
        KNeighborsClassifier(n_neighbors=5),
        SVC(class_weight='balanced'),
        LinearSVC(max_iter=10000, class_weight='balanced'),
        GaussianNB(),
        GaussianProcessClassifier(),
        DecisionTreeClassifier(class_weight='balanced'),
        MLPClassifier(max_iter=10000),
        AdaBoostClassifier(),
        RandomForestClassifier(class_weight='balanced'),
    ]

    names = [
             "Logistic regression", "SGDClassifier",
             "KNearest Neighbors (5)", 
             "SVM-rbf", "SMV-linear", 
             "Gaussian naive bayes",
             "Gaussian Process", 
             "Decision Tree", 
             "Multi-layer Perceptron", 
             "AdaBoost", "Random Forest"]

    for name, clf in zip(names, classifiers):
        clf.fit(X_train, y_train)
        predicted = clf.predict(X_test)
        print("Accuracy: %0.3f || AUROC %0.3f || (Accuracy, Precision) 0:( %0.3f, %0.3f)  1:( %0.3f, %0.3f) ->" 
              % (accuracy_score(y_test,predicted), roc_auc_score(y_test, predicted),
                recall_score(y_test,predicted,pos_label=0), precision_score(y_test,predicted,pos_label=0),
                recall_score(y_test,predicted,pos_label=1), precision_score(y_test,predicted,pos_label=1)), name)
        
    return;


In [32]:
evaluateBalancerAgaintTestData(overSampler)

Accuracy: 0.671 || AUROC 0.525 || (Accuracy, Precision) 0:( 0.295, 0.213)  1:( 0.755, 0.827) -> Logistic regression
Accuracy: 0.717 || AUROC 0.553 || (Accuracy, Precision) 0:( 0.295, 0.260)  1:( 0.811, 0.837) -> SGDClassifier
Accuracy: 0.604 || AUROC 0.493 || (Accuracy, Precision) 0:( 0.318, 0.177)  1:( 0.668, 0.814) -> KNearest Neighbors (5)
Accuracy: 0.633 || AUROC 0.502 || (Accuracy, Precision) 0:( 0.295, 0.186)  1:( 0.709, 0.818) -> SVM-rbf
Accuracy: 0.642 || AUROC 0.525 || (Accuracy, Precision) 0:( 0.341, 0.208)  1:( 0.709, 0.827) -> SMV-linear
Accuracy: 0.804 || AUROC 0.519 || (Accuracy, Precision) 0:( 0.068, 0.333)  1:( 0.969, 0.823) -> Gaussian naive bayes
Accuracy: 0.692 || AUROC 0.503 || (Accuracy, Precision) 0:( 0.205, 0.188)  1:( 0.801, 0.818) -> Gaussian Process
Accuracy: 0.637 || AUROC 0.575 || (Accuracy, Precision) 0:( 0.477, 0.247)  1:( 0.673, 0.852) -> Decision Tree
Accuracy: 0.738 || AUROC 0.540 || (Accuracy, Precision) 0:( 0.227, 0.256)  1:( 0.852, 0.831) -> Multi-la

In [33]:
evaluateBalancerAgaintTestData(smoteSampler)

Accuracy: 0.654 || AUROC 0.542 || (Accuracy, Precision) 0:( 0.364, 0.225)  1:( 0.719, 0.834) -> Logistic regression
Accuracy: 0.629 || AUROC 0.544 || (Accuracy, Precision) 0:( 0.409, 0.222)  1:( 0.679, 0.836) -> SGDClassifier
Accuracy: 0.613 || AUROC 0.516 || (Accuracy, Precision) 0:( 0.364, 0.198)  1:( 0.668, 0.824) -> KNearest Neighbors (5)
Accuracy: 0.683 || AUROC 0.524 || (Accuracy, Precision) 0:( 0.273, 0.214)  1:( 0.776, 0.826) -> SVM-rbf
Accuracy: 0.667 || AUROC 0.567 || (Accuracy, Precision) 0:( 0.409, 0.250)  1:( 0.724, 0.845) -> SMV-linear
Accuracy: 0.808 || AUROC 0.521 || (Accuracy, Precision) 0:( 0.068, 0.375)  1:( 0.974, 0.823) -> Gaussian naive bayes
Accuracy: 0.679 || AUROC 0.513 || (Accuracy, Precision) 0:( 0.250, 0.200)  1:( 0.776, 0.822) -> Gaussian Process
Accuracy: 0.646 || AUROC 0.519 || (Accuracy, Precision) 0:( 0.318, 0.203)  1:( 0.719, 0.825) -> Decision Tree
Accuracy: 0.713 || AUROC 0.480 || (Accuracy, Precision) 0:( 0.114, 0.143)  1:( 0.847, 0.810) -> Multi-la

In [34]:
evaluateBalancerAgaintTestData(underSampler)

Accuracy: 0.579 || AUROC 0.540 || (Accuracy, Precision) 0:( 0.477, 0.212)  1:( 0.602, 0.837) -> Logistic regression
Accuracy: 0.438 || AUROC 0.541 || (Accuracy, Precision) 0:( 0.705, 0.203)  1:( 0.378, 0.851) -> SGDClassifier
Accuracy: 0.579 || AUROC 0.469 || (Accuracy, Precision) 0:( 0.295, 0.157)  1:( 0.643, 0.803) -> KNearest Neighbors (5)
Accuracy: 0.583 || AUROC 0.525 || (Accuracy, Precision) 0:( 0.432, 0.202)  1:( 0.617, 0.829) -> SVM-rbf
Accuracy: 0.512 || AUROC 0.525 || (Accuracy, Precision) 0:( 0.545, 0.198)  1:( 0.505, 0.832) -> SMV-linear
Accuracy: 0.808 || AUROC 0.530 || (Accuracy, Precision) 0:( 0.091, 0.400)  1:( 0.969, 0.826) -> Gaussian naive bayes
Accuracy: 0.604 || AUROC 0.529 || (Accuracy, Precision) 0:( 0.409, 0.207)  1:( 0.648, 0.830) -> Gaussian Process
Accuracy: 0.596 || AUROC 0.559 || (Accuracy, Precision) 0:( 0.500, 0.227)  1:( 0.617, 0.846) -> Decision Tree
Accuracy: 0.604 || AUROC 0.511 || (Accuracy, Precision) 0:( 0.364, 0.193)  1:( 0.658, 0.822) -> Multi-la

In [35]:
evaluateBalancerAgaintTestData(centroidSampler)

Accuracy: 0.567 || AUROC 0.567 || (Accuracy, Precision) 0:( 0.568, 0.227)  1:( 0.566, 0.854) -> Logistic regression
Accuracy: 0.642 || AUROC 0.507 || (Accuracy, Precision) 0:( 0.295, 0.191)  1:( 0.719, 0.820) -> SGDClassifier
Accuracy: 0.592 || AUROC 0.468 || (Accuracy, Precision) 0:( 0.273, 0.154)  1:( 0.663, 0.802) -> KNearest Neighbors (5)
Accuracy: 0.637 || AUROC 0.487 || (Accuracy, Precision) 0:( 0.250, 0.169)  1:( 0.724, 0.811) -> SVM-rbf
Accuracy: 0.533 || AUROC 0.556 || (Accuracy, Precision) 0:( 0.591, 0.217)  1:( 0.520, 0.850) -> SMV-linear
Accuracy: 0.808 || AUROC 0.521 || (Accuracy, Precision) 0:( 0.068, 0.375)  1:( 0.974, 0.823) -> Gaussian naive bayes
Accuracy: 0.675 || AUROC 0.484 || (Accuracy, Precision) 0:( 0.182, 0.160)  1:( 0.786, 0.811) -> Gaussian Process
Accuracy: 0.571 || AUROC 0.535 || (Accuracy, Precision) 0:( 0.477, 0.208)  1:( 0.592, 0.835) -> Decision Tree
Accuracy: 0.604 || AUROC 0.529 || (Accuracy, Precision) 0:( 0.409, 0.207)  1:( 0.648, 0.830) -> Multi-la

In [36]:
evaluateBalancerAgaintTestData(smoteeenSampler)

Accuracy: 0.629 || AUROC 0.614 || (Accuracy, Precision) 0:( 0.591, 0.268)  1:( 0.638, 0.874) -> Logistic regression
Accuracy: 0.650 || AUROC 0.557 || (Accuracy, Precision) 0:( 0.409, 0.237)  1:( 0.704, 0.841) -> SGDClassifier
Accuracy: 0.529 || AUROC 0.535 || (Accuracy, Precision) 0:( 0.545, 0.205)  1:( 0.526, 0.837) -> KNearest Neighbors (5)
Accuracy: 0.575 || AUROC 0.528 || (Accuracy, Precision) 0:( 0.455, 0.204)  1:( 0.602, 0.831) -> SVM-rbf
Accuracy: 0.637 || AUROC 0.602 || (Accuracy, Precision) 0:( 0.545, 0.264)  1:( 0.658, 0.866) -> SMV-linear
Accuracy: 0.771 || AUROC 0.542 || (Accuracy, Precision) 0:( 0.182, 0.296)  1:( 0.903, 0.831) -> Gaussian naive bayes
Accuracy: 0.537 || AUROC 0.523 || (Accuracy, Precision) 0:( 0.500, 0.198)  1:( 0.546, 0.829) -> Gaussian Process
Accuracy: 0.579 || AUROC 0.540 || (Accuracy, Precision) 0:( 0.477, 0.212)  1:( 0.602, 0.837) -> Decision Tree
Accuracy: 0.600 || AUROC 0.526 || (Accuracy, Precision) 0:( 0.409, 0.205)  1:( 0.643, 0.829) -> Multi-la

In [27]:
evaluateBalancerAgaintTestData(tomekSampler)

Accuracy: 0.646 || AUROC 0.545 || (Accuracy, Precision) 0:( 0.386, 0.227)  1:( 0.704, 0.836) -> Logistic regression
Accuracy: 0.600 || AUROC 0.526 || (Accuracy, Precision) 0:( 0.409, 0.205)  1:( 0.643, 0.829) -> SGDClassifier
Accuracy: 0.800 || AUROC 0.507 || (Accuracy, Precision) 0:( 0.045, 0.250)  1:( 0.969, 0.819) -> KNearest Neighbors (5)
Accuracy: 0.613 || AUROC 0.525 || (Accuracy, Precision) 0:( 0.386, 0.205)  1:( 0.663, 0.828) -> SVM-rbf
Accuracy: 0.633 || AUROC 0.546 || (Accuracy, Precision) 0:( 0.409, 0.225)  1:( 0.684, 0.838) -> SMV-linear
Accuracy: 0.804 || AUROC 0.519 || (Accuracy, Precision) 0:( 0.068, 0.333)  1:( 0.969, 0.823) -> Gaussian naive bayes
Accuracy: 0.779 || AUROC 0.512 || (Accuracy, Precision) 0:( 0.091, 0.235)  1:( 0.934, 0.821) -> Gaussian Process
Accuracy: 0.613 || AUROC 0.551 || (Accuracy, Precision) 0:( 0.455, 0.225)  1:( 0.648, 0.841) -> Decision Tree
Accuracy: 0.721 || AUROC 0.521 || (Accuracy, Precision) 0:( 0.205, 0.220)  1:( 0.837, 0.824) -> Multi-la