## 1. Load Datasets

In [1]:
## Utils
import re
import numpy as np
import pandas as pd
import itertools

## Classical Learner
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
"""Dataset Parameter Setting"""
"""Load Dataset"""
r6g_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devset.csv")
r6g_tset1 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_batch1_devset.csv")
r6g_tset2 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_1.csv")
r6g_tset3 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_2.csv")
r6g_tset4 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_3.csv")

"""Set Output Path"""
fileout = "C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/3results/R6G_Classification/raw_data/baseline_r6g_model_output_dev_to_test.csv"

In [3]:
r6g_dset = r6g_dset.iloc[:,1:]
r6g_tset1 = r6g_tset1.iloc[:,2:]

In [4]:
X_dset = r6g_dset.iloc[:, 1:].to_numpy(dtype='float32')
X_tset1 = r6g_tset1.iloc[:, 1:].to_numpy(dtype='float32')
X_tset2 = r6g_tset2.iloc[:, 1:].to_numpy(dtype='float32')
X_tset3 = r6g_tset3.iloc[:, 1:].to_numpy(dtype='float32')
X_tset4 = r6g_tset4.iloc[:, 1:].to_numpy(dtype='float32')

y_dset = (r6g_dset.label>=1).to_numpy(dtype='int64') ## 0: DI-water vs 1:R6G 10 mM, 10 uM
y_tset1 = r6g_tset1.iloc[:,0].to_numpy()              ## 0: DI-water vs 1:R6G 10 mM, 10 uM
y_tset2 = r6g_tset2.iloc[:,0].to_numpy()
y_tset3 = r6g_tset3.iloc[:,0].to_numpy()
y_tset4 = r6g_tset4.iloc[:,0].to_numpy()

In [5]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=10)
# pca.fit(X_dset)
# X_dset = pca.transform(X_dset)
# X_tset1 = pca.transform(X_tset1)
# X_tset2 = pca.transform(X_tset2)
# X_tset3 = pca.transform(X_tset3)
# X_tset4 = pca.transform(X_tset4)

## 2. Train Test Split

In [6]:
## 4.4 train and test split for Classical Learner
X_train, X_test, y_train, y_test = train_test_split(X_dset, y_dset, test_size = 0.2, 
                                                    random_state=123)

## 3. Classical Learner

### 3.1 Naive Bayes

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB

In [8]:
## Learn Classifier
scaler = StandardScaler()
clf = BernoulliNB()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [9]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [ 91 115]]
[1mMetrics[0m
ACC: 0.697
BACC: 0.779
F1: 0.717
AUROC: 0.782
AP: 0.863
MCC: 0.533
Precision: 1.0
Recall: 0.558


In [10]:
NB_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [11]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [142 409]]
[1mMetrics[0m
ACC: 0.865
BACC: 0.871
F1: 0.852
AUROC: 0.896
AP: 0.901
MCC: 0.76
Precision: 1.0
Recall: 0.742


In [12]:
NB_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [13]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [  0 500]]
[1mMetrics[0m
ACC: 0.999
BACC: 0.999
F1: 0.999
AUROC: 0.999
AP: 0.998
MCC: 0.998
Precision: 0.998
Recall: 1.0


In [14]:
NB_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [15]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [500   0]]
[1mMetrics[0m
ACC: 0.499
BACC: 0.499
F1: 0.0
AUROC: 0.499
AP: 0.5
MCC: -0.032
Precision: 0.0
Recall: 0.0


In [16]:
NB_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [17]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [500   0]]
[1mMetrics[0m
ACC: 0.499
BACC: 0.499
F1: 0.0
AUROC: 0.499
AP: 0.5
MCC: -0.032
Precision: 0.0
Recall: 0.0


In [18]:
NB_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [19]:
outF = open(fileout, "a")
outF.write("Naive Bayes, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, NB_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, NB_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, NB_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, NB_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, NB_tset4_res)))
outF.write('\n')
outF.close()

### 3.2 Decision Tree

In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [21]:
## Learn Classifier
scaler = StandardScaler()
clf = DecisionTreeClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [22]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  1 205]]
[1mMetrics[0m
ACC: 0.997
BACC: 0.998
F1: 0.998
AUROC: 0.998
AP: 0.998
MCC: 0.992
Precision: 1.0
Recall: 0.995


In [23]:
DT_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [24]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [ 81 470]]
[1mMetrics[0m
ACC: 0.923
BACC: 0.926
F1: 0.921
AUROC: 0.926
AP: 0.93
MCC: 0.857
Precision: 1.0
Recall: 0.853


In [25]:
DT_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [26]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[485  15]
 [  0 500]]
[1mMetrics[0m
ACC: 0.985
BACC: 0.985
F1: 0.985
AUROC: 0.985
AP: 0.971
MCC: 0.97
Precision: 0.971
Recall: 1.0


In [27]:
DT_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [28]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[485  15]
 [  0 500]]
[1mMetrics[0m
ACC: 0.985
BACC: 0.985
F1: 0.985
AUROC: 0.985
AP: 0.971
MCC: 0.97
Precision: 0.971
Recall: 1.0


In [29]:
DT_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [30]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[485  15]
 [  0 500]]
[1mMetrics[0m
ACC: 0.985
BACC: 0.985
F1: 0.985
AUROC: 0.985
AP: 0.971
MCC: 0.97
Precision: 0.971
Recall: 1.0


In [31]:
DT_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [32]:
outF = open(fileout, "a")
outF.write("Decision_Tree, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, DT_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, DT_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, DT_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, DT_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, DT_tset4_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression Classifier

In [33]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [34]:
## Learn Classifier
scaler = StandardScaler()
clf = LogisticRegression()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [35]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  0 206]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [36]:
LR_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [37]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 551]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [38]:
LR_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [39]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [40]:
LR_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [41]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [42]:
LR_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [43]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [44]:
LR_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [45]:
outF = open(fileout, "a")
outF.write("Logistic_Regression, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LR_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LR_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LR_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LR_tset4_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression with CV

In [46]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV

In [47]:
## Learn Classifier
scaler = StandardScaler()
clf = LogisticRegressionCV()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='auto', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [48]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  0 206]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [49]:
LR_CV_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [50]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [ 14 537]]
[1mMetrics[0m
ACC: 0.987
BACC: 0.987
F1: 0.987
AUROC: 0.999
AP: 0.999
MCC: 0.974
Precision: 1.0
Recall: 0.975


In [51]:
LR_CV_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [52]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [  0 500]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1: 0.998
AUROC: 1.0
AP: 1.0
MCC: 0.996
Precision: 0.996
Recall: 1.0


In [53]:
LR_CV_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [54]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [290 210]]
[1mMetrics[0m
ACC: 0.708
BACC: 0.708
F1: 0.59
AUROC: 0.996
AP: 0.994
MCC: 0.509
Precision: 0.991
Recall: 0.42


In [55]:
LR_CV_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [56]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [290 210]]
[1mMetrics[0m
ACC: 0.708
BACC: 0.708
F1: 0.59
AUROC: 0.996
AP: 0.994
MCC: 0.509
Precision: 0.991
Recall: 0.42


In [57]:
LR_CV_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [58]:
outF = open(fileout, "a")
outF.write("Logistic_Regression_CV, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_CV_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LR_CV_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LR_CV_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LR_CV_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LR_CV_tset4_res)))
outF.write('\n')
outF.close()

### 3.4 MLP

In [59]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [60]:
## Learn Classifier
scaler = StandardScaler()
clf = MLPClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [61]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  0 206]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [62]:
MLP_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [63]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [ 22 529]]
[1mMetrics[0m
ACC: 0.978
BACC: 0.979
F1: 0.979
AUROC: 1.0
AP: 1.0
MCC: 0.957
Precision: 0.998
Recall: 0.96


In [64]:
MLP_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [65]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [124 376]]
[1mMetrics[0m
ACC: 0.874
BACC: 0.874
F1: 0.856
AUROC: 0.998
AP: 0.997
MCC: 0.771
Precision: 0.995
Recall: 0.752


In [66]:
MLP_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [67]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [  0 500]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1: 0.998
AUROC: 1.0
AP: 1.0
MCC: 0.996
Precision: 0.996
Recall: 1.0


In [68]:
MLP_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [69]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [  0 500]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1: 0.998
AUROC: 1.0
AP: 1.0
MCC: 0.996
Precision: 0.996
Recall: 1.0


In [70]:
MLP_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [71]:
outF = open(fileout, "a")
outF.write("MLP, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, MLP_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, MLP_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, MLP_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, MLP_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, MLP_tset4_res)))
outF.write('\n')
outF.close()

### 3.5 Random Forest

In [72]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [73]:
## Learn Classifier
scaler = StandardScaler()
clf = RandomForestClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [74]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 93   1]
 [  0 206]]
[1mMetrics[0m
ACC: 0.997
BACC: 0.995
F1: 0.998
AUROC: 1.0
AP: 0.995
MCC: 0.992
Precision: 0.995
Recall: 1.0


In [75]:
RF_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [76]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [ 23 528]]
[1mMetrics[0m
ACC: 0.977
BACC: 0.978
F1: 0.978
AUROC: 1.0
AP: 1.0
MCC: 0.955
Precision: 0.998
Recall: 0.958


In [77]:
RF_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [78]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[494   6]
 [  0 500]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.994
F1: 0.994
AUROC: 1.0
AP: 1.0
MCC: 0.988
Precision: 0.988
Recall: 1.0


In [79]:
RF_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [80]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[494   6]
 [  0 500]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.994
F1: 0.994
AUROC: 1.0
AP: 1.0
MCC: 0.988
Precision: 0.988
Recall: 1.0


In [81]:
RF_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [82]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[494   6]
 [  0 500]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.994
F1: 0.994
AUROC: 1.0
AP: 1.0
MCC: 0.988
Precision: 0.988
Recall: 1.0


In [83]:
RF_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [84]:
outF = open(fileout, "a")
outF.write("RandomForest, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RF_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, RF_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, RF_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, RF_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, RF_tset4_res)))
outF.write('\n')
outF.close()

### 3.6 Linear SVM

In [85]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [86]:
## Learn Classifier
scaler = StandardScaler()
clf = SVC(kernel = 'linear', probability=True)
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [87]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  0 206]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [88]:
LinSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [89]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [ 45 506]]
[1mMetrics[0m
ACC: 0.957
BACC: 0.959
F1: 0.957
AUROC: 1.0
AP: 1.0
MCC: 0.918
Precision: 1.0
Recall: 0.918


In [90]:
LinSVM_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [91]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [92]:
LinSVM_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [93]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [94]:
LinSVM_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [95]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[500   0]
 [  0 500]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [96]:
LinSVM_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [97]:
outF = open(fileout, "a")
outF.write("Lin_SVM, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LinSVM_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LinSVM_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LinSVM_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LinSVM_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LinSVM_tset4_res)))
outF.write('\n')
outF.close()

### 3.7 RBF SVM (Nonlinear SVM)

In [98]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [99]:
## Learn Classifier
scaler = StandardScaler()
clf = SVC(kernel = 'rbf', probability=True)
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [100]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score 
from sklearn.metrics import roc_auc_score, precision_score, recall_score, matthews_corrcoef, average_precision_score

yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_test, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_test, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_test, yp_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_test, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_test, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_test, yp_test),3)))

[1mConfusion Matrix[0m
[[ 94   0]
 [  0 206]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1: 1.0
AUROC: 1.0
AP: 1.0
MCC: 1.0
Precision: 1.0
Recall: 1.0


In [101]:
RBFSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test),3), round(roc_auc_score(y_test, ys_test),3),
                 round(average_precision_score(y_test, ys_test),3), round(matthews_corrcoef(y_test, yp_test),3),
                 round(precision_score(y_test, yp_test),3), round(recall_score(y_test, yp_test),3)]

In [102]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[499   1]
 [ 44 507]]
[1mMetrics[0m
ACC: 0.957
BACC: 0.959
F1: 0.958
AUROC: 0.999
AP: 0.999
MCC: 0.918
Precision: 0.998
Recall: 0.92


In [103]:
RBFSVM_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [104]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [382 118]]
[1mMetrics[0m
ACC: 0.616
BACC: 0.616
F1: 0.381
AUROC: 0.996
AP: 0.979
MCC: 0.357
Precision: 0.983
Recall: 0.236


In [105]:
RBFSVM_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [106]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [  0 500]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1: 0.998
AUROC: 1.0
AP: 1.0
MCC: 0.996
Precision: 0.996
Recall: 1.0


In [107]:
RBFSVM_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [108]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)
ys_test = ys_test[:,1]

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1: {}'.format(round(f1_score(y_tset, yp_test),3)))
print('AUROC: {}'.format(round(roc_auc_score(y_tset, ys_test),3)))
print('AP: {}'.format(round(average_precision_score(y_tset, ys_test),3)))
print('MCC: {}'.format(round(matthews_corrcoef(y_tset, yp_test),3)))
print('Precision: {}'.format(round(precision_score(y_tset, yp_test),3)))
print('Recall: {}'.format(round(recall_score(y_tset, yp_test),3)))

[1mConfusion Matrix[0m
[[498   2]
 [  0 500]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1: 0.998
AUROC: 1.0
AP: 1.0
MCC: 0.996
Precision: 0.996
Recall: 1.0


In [109]:
RBFSVM_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test),3), round(roc_auc_score(y_tset, ys_test),3),
                 round(average_precision_score(y_tset, ys_test),3), round(matthews_corrcoef(y_tset, yp_test),3),
                 round(precision_score(y_tset, yp_test),3), round(recall_score(y_tset, yp_test),3)]

In [110]:
outF = open(fileout, "a")
outF.write("RBF_SVM, ")
outF.write("ACC, BACC, F1, AUROC, Average_Precision, MCC, Precision, Recall\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RBFSVM_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, RBFSVM_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, RBFSVM_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, RBFSVM_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, RBFSVM_tset4_res)))
outF.write('\n')
outF.close()