## 1. Load Datasets

In [1]:
## Utils
import re
import numpy as np
import pandas as pd
import itertools

## Classical Learner
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
"""Dataset Parameter Setting"""
"""Load Dataset"""
hm_tset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sers_net_hm_dev.csv")
hm_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sers_net_hm_test.csv")

"""Set Output Path"""
fileout = "C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/3results/HM_Classification/raw_data/baseline_hm_model_output_test_to_dev.csv"

In [3]:
X_dset = hm_dset.iloc[:,1:].to_numpy()
X_tset = hm_tset.iloc[:,1:].to_numpy()
y_dset = hm_dset.iloc[:,0].to_numpy()
y_tset = hm_tset.iloc[:,0].to_numpy()

In [4]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=10)
# pca.fit(X_dset)
# X_dset = pca.transform(X_dset)
# X_tset = pca.transform(X_tset)

In [5]:
## One-hot-encoding by pandas way
y = pd.get_dummies(hm_dset.label, prefix='label')

In [6]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
enc.fit(y_dset)

LabelEncoder()

In [7]:
y_dset = enc.transform(y_dset)
y_tset = enc.transform(y_tset)

## 2. Train Test split

In [8]:
## 4.4 train and test split for Classical Learner
X_train, X_test, y_train, y_test = train_test_split(X_dset, y_dset, test_size = 0.2, 
                                                    random_state=123)

## 3. Classical Learner

### 3.1 Naive Bayes

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB

In [10]:
## Learn Classifier
scaler = StandardScaler()
clf = BernoulliNB()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [11]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[260  21 129]
 [171  49 183]
 [  0   8 379]]
[1mMetrics[0m
ACC: 0.573
BACC: 0.578
F1_micro: 0.573
F1_macro: 0.508
AUROC_OVR: 0.727
AUROC_OVO: 0.728
Precisio_micro: 0.573
Precisio_macro: 0.593
Recall_micro: 0.573
Recall_macro: 0.578


In [12]:
NB_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [13]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 614  136  750]
 [   6  878  616]
 [   0    0 1500]]
[1mMetrics[0m
ACC: 0.665
BACC: 0.665
F1_micro: 0.665
F1_macro: 0.655
AUROC_OVR: 0.775
AUROC_OVO: 0.775
Precisio_micro: 0.665
Precisio_macro: 0.793
Recall_micro: 0.665
Recall_macro: 0.665


In [14]:
NB_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [15]:
outF = open(fileout, "w")
outF.write("Naive Bayes, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, NB_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, NB_indset_res)))
outF.write('\n')
outF.close()

### 3.2 Decision Tree

In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [17]:
## Learn Classifier
scaler = StandardScaler()
clf = DecisionTreeClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [18]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[392  12   6]
 [  9 387   7]
 [  2   7 378]]
[1mMetrics[0m
ACC: 0.964
BACC: 0.964
F1_micro: 0.964
F1_macro: 0.964
AUROC_OVR: 0.973
AUROC_OVO: 0.973
Precisio_micro: 0.964
Precisio_macro: 0.964
Recall_micro: 0.964
Recall_macro: 0.964


In [19]:
DT_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [20]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[1020  204  276]
 [1052  347  101]
 [   1    3 1496]]
[1mMetrics[0m
ACC: 0.636
BACC: 0.636
F1_micro: 0.636
F1_macro: 0.599
AUROC_OVR: 0.727
AUROC_OVO: 0.727
Precisio_micro: 0.636
Precisio_macro: 0.639
Recall_micro: 0.636
Recall_macro: 0.636


In [21]:
DT_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [22]:
outF = open(fileout, "a")
outF.write("Decision_Tree, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, DT_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, DT_indset_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression Classifier

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [24]:
## Learn Classifier
scaler = StandardScaler()
clf = LogisticRegression(multi_class = 'multinomial')
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [25]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[381  28   1]
 [ 11 392   0]
 [  4   2 381]]
[1mMetrics[0m
ACC: 0.962
BACC: 0.962
F1_micro: 0.962
F1_macro: 0.962
AUROC_OVR: 0.995
AUROC_OVO: 0.995
Precisio_micro: 0.962
Precisio_macro: 0.963
Recall_micro: 0.962
Recall_macro: 0.962


In [26]:
LR_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [27]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 741  641  118]
 [1162  338    0]
 [   8    0 1492]]
[1mMetrics[0m
ACC: 0.571
BACC: 0.571
F1_micro: 0.571
F1_macro: 0.556
AUROC_OVR: 0.765
AUROC_OVO: 0.765
Precisio_micro: 0.571
Precisio_macro: 0.553
Recall_micro: 0.571
Recall_macro: 0.571


In [28]:
LR_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [29]:
outF = open(fileout, "a")
outF.write("Logistic_Regression, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, LR_indset_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression CV Classifier

In [30]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV

In [31]:
## Learn Classifier
scaler = StandardScaler()
clf = LogisticRegressionCV(multi_class = 'multinomial')
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

LogisticRegressionCV(Cs=10, class_weight=None, cv=None, dual=False,
                     fit_intercept=True, intercept_scaling=1.0, l1_ratios=None,
                     max_iter=100, multi_class='multinomial', n_jobs=None,
                     penalty='l2', random_state=None, refit=True, scoring=None,
                     solver='lbfgs', tol=0.0001, verbose=0)

In [32]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[405   5   0]
 [  0 403   0]
 [  1   0 386]]
[1mMetrics[0m
ACC: 0.995
BACC: 0.995
F1_micro: 0.995
F1_macro: 0.995
AUROC_OVR: 0.999
AUROC_OVO: 0.999
Precisio_micro: 0.995
Precisio_macro: 0.995
Recall_micro: 0.995
Recall_macro: 0.995


In [33]:
LR_CV_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [34]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 713  175  612]
 [ 798  702    0]
 [   1    0 1499]]
[1mMetrics[0m
ACC: 0.648
BACC: 0.648
F1_micro: 0.648
F1_macro: 0.631
AUROC_OVR: 0.867
AUROC_OVO: 0.867
Precisio_micro: 0.648
Precisio_macro: 0.661
Recall_micro: 0.648
Recall_macro: 0.648


In [35]:
LR_CV_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [36]:
outF = open(fileout, "a")
outF.write("Logistic_Regression_CV, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_CV_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, LR_CV_indset_res)))
outF.write('\n')
outF.close()

### 3.4 MLP

In [37]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [38]:
## Learn Classifier
scaler = StandardScaler()
clf = MLPClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [39]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[407   3   0]
 [  0 402   1]
 [  0   0 387]]
[1mMetrics[0m
ACC: 0.997
BACC: 0.997
F1_micro: 0.997
F1_macro: 0.997
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.997
Precisio_macro: 0.997
Recall_micro: 0.997
Recall_macro: 0.997


In [40]:
MLP_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [41]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 754  406  340]
 [1140  360    0]
 [   3    0 1497]]
[1mMetrics[0m
ACC: 0.58
BACC: 0.58
F1_micro: 0.58
F1_macro: 0.553
AUROC_OVR: 0.827
AUROC_OVO: 0.827
Precisio_micro: 0.58
Precisio_macro: 0.561
Recall_micro: 0.58
Recall_macro: 0.58


In [42]:
MLP_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [43]:
outF = open(fileout, "a")
outF.write("MLP, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, MLP_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, MLP_indset_res)))
outF.write('\n')
outF.close()

### 3.5 Random Forest

In [44]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [45]:
## Learn Classifier
scaler = StandardScaler()
clf = RandomForestClassifier()
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [46]:
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[406   4   0]
 [  1 402   0]
 [  0   0 387]]
[1mMetrics[0m
ACC: 0.996
BACC: 0.996
F1_micro: 0.996
F1_macro: 0.996
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.996
Precisio_macro: 0.996
Recall_micro: 0.996
Recall_macro: 0.996


In [47]:
RF_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [48]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 882    5  613]
 [1040  416   44]
 [   0    0 1500]]
[1mMetrics[0m
ACC: 0.622
BACC: 0.622
F1_micro: 0.622
F1_macro: 0.59
AUROC_OVR: 0.897
AUROC_OVO: 0.897
Precisio_micro: 0.622
Precisio_macro: 0.714
Recall_micro: 0.622
Recall_macro: 0.622


In [49]:
RF_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [50]:
outF = open(fileout, "a")
outF.write("RandomForest, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RF_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, RF_indset_res)))
outF.write('\n')
outF.close()

### 3.6 Linear SVM

In [51]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [52]:
## Learn Classifier
scaler = StandardScaler()
clf = SVC(kernel = 'linear', probability=True)
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [53]:
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[408   2   0]
 [  0 403   0]
 [  1   0 386]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.998
F1_micro: 0.998
F1_macro: 0.998
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.998
Precisio_macro: 0.998
Recall_micro: 0.998
Recall_macro: 0.998


In [54]:
LinSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [55]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 935  201  364]
 [ 701  799    0]
 [   1    0 1499]]
[1mMetrics[0m
ACC: 0.718
BACC: 0.718
F1_micro: 0.718
F1_macro: 0.709
AUROC_OVR: 0.872
AUROC_OVO: 0.872
Precisio_micro: 0.718
Precisio_macro: 0.725
Recall_micro: 0.718
Recall_macro: 0.718


In [56]:
LinSVM_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [57]:
outF = open(fileout, "a")
outF.write("Linear_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LinSVM_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, LinSVM_indset_res)))
outF.write('\n')
outF.close()

### 3.7 RBF SVM (Nonlinear SVM)

In [58]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [59]:
## Learn Classifier
scaler = StandardScaler()
clf = SVC(kernel = 'rbf', probability=True)
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [60]:
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[342  38  30]
 [ 14 366  23]
 [  5  17 365]]
[1mMetrics[0m
ACC: 0.894
BACC: 0.895
F1_micro: 0.894
F1_macro: 0.894
AUROC_OVR: 0.978
AUROC_OVO: 0.978
Precisio_micro: 0.894
Precisio_macro: 0.897
Recall_micro: 0.894
Recall_macro: 0.895


In [61]:
RBFSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [62]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset)
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 735  386  379]
 [ 744  756    0]
 [   0    0 1500]]
[1mMetrics[0m
ACC: 0.665
BACC: 0.665
F1_micro: 0.665
F1_macro: 0.651
AUROC_OVR: 0.748
AUROC_OVO: 0.748
Precisio_micro: 0.665
Precisio_macro: 0.652
Recall_micro: 0.665
Recall_macro: 0.665


In [63]:
RBFSVM_indset_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [64]:
outF = open(fileout, "a")
outF.write("RBF_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RBFSVM_devset_res)))
outF.write('\n')
outF.write('IndSet, ')
outF.write(', '.join(map(str, RBFSVM_indset_res)))
outF.write('\n')
outF.close()