In [1]:
import pandas as pd

from pprint import pprint
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression as LR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.metrics import roc_auc_score as AUC, accuracy_score as accuracy
from sklearn.svm import SVC as SVM
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [2]:
train = pd.read_csv( "data/train_file.csv" )
test = pd.read_csv( "data/test_file.csv" )

In [3]:
x_train = train.drop( 'y', axis = 1 ).values
y_train = train.y.values

x_test = test.drop( 'y', axis = 1 ).values
y_test = test.y.values

In [4]:
x_test.shape

(161, 5)

In [5]:
classifiers = [
    

    make_pipeline( StandardScaler(), LR()),
    

    make_pipeline( MinMaxScaler(), LR()),
    make_pipeline( MinMaxScaler(), SVM(gamma='auto', probability=True)),
    
    RF( n_estimators = 100, min_samples_leaf = 5 )
]
for clf in classifiers:

    clf.fit( x_train, y_train )
    p = clf.predict_proba( x_test )[:,1]
    p_bin = clf.predict( x_test )

    auc = AUC( y_test, p )
    acc = accuracy( y_test, p_bin )
    precision = precision_score(y_test, p_bin, average='weighted')
    recall = recall_score(y_test, p_bin, average='weighted')
    print( "AUC: {:.2%}, accuracy: {:.2%} \n\n{}\n\n".format( auc, acc, clf ))
    print("Precision Score :" + str(precision))
    print("Recall Score : " + str(recall))

AUC: 72.56%, accuracy: 65.22% 

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='warn', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='warn', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)


Precision Score :0.6551891237044016
Recall Score : 0.6521739130434783
AUC: 73.16%, accuracy: 66.46% 

Pipeline(memory=None,
         steps=[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('logisticregression',
                 LogisticRegression(C=1.0

