In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from pprint import pprint
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression as LR
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.metrics import roc_auc_score as AUC, accuracy_score as accuracy
from sklearn.svm import SVC as SVM
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score,f1_score
from sklearn.model_selection import cross_val_score,KFold
from sklearn.linear_model import SGDClassifier as SGD
from sklearn.gaussian_process import GaussianProcessClassifier as GPC
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.naive_bayes import GaussianNB as NB
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier as DTC

In [2]:
data = pd.read_csv( "data/train_file.csv")
data.head()

Unnamed: 0,0__spkt_welch_density__coeff_2,"0__fft_coefficient__coeff_1__attr_""abs""",0__partial_autocorrelation__lag_1,0__autocorrelation__lag_1,0__autocorrelation__lag_2,y
0,14323.358128,1141.209142,0.973038,0.973038,0.923763,1.0
1,208.004862,468.304575,0.889677,0.889677,0.75552,0.0
2,4685.229641,660.325661,0.992158,0.992158,0.961695,1.0
3,11895.273616,804.60459,0.964971,0.964971,0.897135,1.0
4,4328.383081,1086.189694,0.982644,0.982644,0.935608,1.0


In [3]:
labels = data['y']
train_data = data.drop(columns = ['y'])

In [10]:
classifiers = [
    
    make_pipeline( StandardScaler(), LR()),
    make_pipeline( MinMaxScaler(), LR()),
    make_pipeline( MinMaxScaler(), SVM(gamma='auto', probability=True)),
    make_pipeline( StandardScaler(), SVM(kernel = 'poly',degree = 3,coef0 = 1,C=5)),
    SVM(kernel = 'rbf',gamma = 5,C=0.01),
    RF( n_estimators = 100, min_samples_leaf = 5 ),
    SGD(random_state = 42),
    GPC(1.0 * RBF(1.0)),
    ABC(),
    NB(),
    DTC(max_depth = 5)
]

In [11]:
classifiers_names = ["Logistic Regression(Standard Scalar)","Logistic Regression(MinMax Scalar)","SVM","SVM(polynomial kernel)","SVM(RBF kernel)","RandomForest","Stocahastic gradient descent","Gaussian Process Classifier","Ada boost","Naive bayes","Decision Trees"]

In [12]:
idx = 0
for clf in classifiers:
    
    acc = []
    precision = []
    recall = []
    f1_list = []
    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(train_data):
        X_train_split, X_test_split = train_data.iloc[train_index][:], train_data.iloc[test_index][:]
        y_train_split, y_test_split = labels[train_index], labels[test_index]
        clf.fit(X_train_split,y_train_split)
        y_pred = clf.predict(X_test_split)
        acc.append(accuracy( y_test_split, y_pred ))
        precision.append(precision_score(y_test_split, y_pred))
        recall.append(recall_score(y_test_split, y_pred))
        f1_list.append(f1_score(y_test_split,y_pred))
    print("\n\nFor Classifier:", classifiers_names[idx])
    print("--------------------------------------")
    print("Accuracy:", sum(acc)/len(acc))
    print("Precision:", sum(precision)/len(precision))
    print("Recall:", sum(recall)/len(recall))
    print("F1 - Score:", sum(f1_list)/len(f1_list))
    idx += 1



For Classifier: Logistic Regression(Standard Scalar)
--------------------------------------
Accuracy: 0.633846153846154
Precision: 0.615806007226739
Recall: 0.5981151897169694
F1 - Score: 0.5987556280998904


For Classifier: Logistic Regression(MinMax Scalar)
--------------------------------------
Accuracy: 0.6061538461538462
Precision: 0.5841310541310542
Recall: 0.5612038066988012
F1 - Score: 0.5658289499318137


For Classifier: SVM
--------------------------------------
Accuracy: 0.6153846153846153
Precision: 0.599175717070454
Recall: 0.5661166728463725
F1 - Score: 0.5717814226003881


For Classifier: SVM(polynomial kernel)
--------------------------------------
Accuracy: 0.64
Precision: 0.6715852130325815
Recall: 0.4846866889136076
F1 - Score: 0.5564888106791802


For Classifier: SVM(RBF kernel)
--------------------------------------
Accuracy: 0.5200000000000001
Precision: 0.0
Recall: 0.0
F1 - Score: 0.0


For Classifier: RandomForest
--------------------------------------
Accurac