## 1. Load Datasets

In [1]:
## Utils
import re
import numpy as np
import pandas as pd
import itertools

## Classical Learner
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
from sklearn.base import clone


class OrdinalClassifier():
    
    def __init__(self, clf):
        self.clf = clf
        self.clfs = {}
    
    def fit(self, X, y):
        self.unique_class = np.sort(np.unique(y))
        if self.unique_class.shape[0] > 2:
            for i in range(self.unique_class.shape[0]-1):
                # for each k - 1 ordinal value we fit a binary classification problem
                binary_y = (y > self.unique_class[i]).astype(np.uint8)
                clf = clone(self.clf)
                clf.fit(X, binary_y)
                self.clfs[i] = clf
    
    def predict_proba(self, X):
        clfs_predict = {k:self.clfs[k].predict_proba(X) for k in self.clfs}
        predicted = []
        for i,y in enumerate(self.unique_class):
            if i == 0:
                # V1 = 1 - Pr(y > V1)
                predicted.append(1 - clfs_predict[y][:,1])
            elif y in clfs_predict:
                # Vi = Pr(y > Vi-1) - Pr(y > Vi)
                 predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
            else:
                # Vk = Pr(y > Vk-1)
                predicted.append(clfs_predict[y-1][:,1])
        return np.vstack(predicted).T
    
    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

In [3]:
"""Dataset Parameter Setting"""
"""Load Dataset"""
r6g_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devset.csv")
r6g_tset1 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_batch1_devset.csv")
r6g_tset2 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_1.csv")
r6g_tset3 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_2.csv")
r6g_tset4 = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_devse_batch3_3.csv")

"""Set Output Path"""
fileout = "C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/3results/R6G_Classification/raw_data/baseline_r6g_ordinal_model_output_dev_to_test.csv"

## Class Definition
## 2: Conc. >= 10 mM  
## 1: 10 uM <= Conc. < 10 mM
## 0: Conc. < 10 uM

In [4]:
r6g_dset = r6g_dset.iloc[:,1:]
r6g_tset1 = r6g_tset1.iloc[:,2:]

In [5]:
r6g_dset.loc[r6g_dset.label==2, 'label'] = int(3)
r6g_dset.loc[r6g_dset.label==1, 'label'] = int(2)
r6g_dset.loc[r6g_dset.label==3, 'label'] = int(1)
dummy_label1 = r6g_dset[r6g_dset.label == 1]
dummy_label1 = dummy_label1.iloc[0:1,:].reset_index(drop=True)
dummy_label2 = r6g_dset[r6g_dset.label == 2]
dummy_label2 = dummy_label2.iloc[0:1,:].reset_index(drop=True)

In [6]:
r6g_tset1.loc[r6g_tset1.label==1, 'label'] = int(2)
r6g_tset3.loc[r6g_tset3.label==1, 'label'] = int(0)
r6g_tset4.loc[r6g_tset4.label==1, 'label'] = int(0)

In [7]:
r6g_tset1 = pd.concat([r6g_tset1, dummy_label1]).reset_index(drop=True)
r6g_tset2 = pd.concat([r6g_tset2, dummy_label2]).reset_index(drop=True)
r6g_tset3 = pd.concat([r6g_tset3, dummy_label1, dummy_label2]).reset_index(drop=True)
r6g_tset4 = pd.concat([r6g_tset4, dummy_label1, dummy_label2]).reset_index(drop=True)

In [8]:
X_dset = r6g_dset.iloc[:, 1:].to_numpy(dtype='float32')
X_tset1 = r6g_tset1.iloc[:, 1:].to_numpy(dtype='float32')
X_tset2 = r6g_tset2.iloc[:, 1:].to_numpy(dtype='float32')
X_tset3 = r6g_tset3.iloc[:, 1:].to_numpy(dtype='float32')
X_tset4 = r6g_tset4.iloc[:, 1:].to_numpy(dtype='float32')

y_dset = r6g_dset.iloc[:,0].to_numpy(dtype='int64') 
y_tset1 = r6g_tset1.iloc[:,0].to_numpy()            
y_tset2 = r6g_tset2.iloc[:,0].to_numpy()
y_tset3 = r6g_tset3.iloc[:,0].to_numpy()
y_tset4 = r6g_tset4.iloc[:,0].to_numpy()

## 2. Train Test Split

In [9]:
## 4.4 train and test split for Classical Learner
X_train, X_test, y_train, y_test = train_test_split(X_dset, y_dset, test_size = 0.2, 
                                                    random_state=123)

## 3. Classical Learner

### 3.1 Naive Bayes

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB

In [11]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(BernoulliNB())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [12]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [ 91   1   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 0.697
BACC: 0.67
F1_micro: 0.697
F1_macro: 0.565
AUROC_OVR: 0.763
AUROC_OVO: 0.755
Precisio_micro: 0.697
Precisio_macro: 0.836
Recall_micro: 0.697
Recall_macro: 0.67


In [13]:
NB_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [14]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  1   0   0]
 [142  15 394]]
[1mMetrics[0m
ACC: 0.85
BACC: 0.572
F1_micro: 0.85
F1_macro: 0.57
AUROC_OVR: 0.901
AUROC_OVO: 0.84
Precisio_micro: 0.85
Precisio_macro: 0.593
Recall_micro: 0.85
Recall_macro: 0.572


In [15]:
NB_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [16]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[499   1   0]
 [  0  90 410]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.589
BACC: 0.726
F1_micro: 0.589
F1_macro: 0.436
AUROC_OVR: 0.795
AUROC_OVO: 0.795
Precisio_micro: 0.589
Precisio_macro: 0.664
Recall_micro: 0.589
Recall_macro: 0.726


In [17]:
NB_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [18]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[999   1   0]
 [  1   0   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.666
F1_micro: 0.998
F1_macro: 0.666
AUROC_OVR: 0.916
AUROC_OVO: 0.916
Precisio_micro: 0.998
Precisio_macro: 0.666
Recall_micro: 0.998
Recall_macro: 0.666


In [19]:
NB_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [20]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[999   1   0]
 [  1   0   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.998
BACC: 0.666
F1_micro: 0.998
F1_macro: 0.666
AUROC_OVR: 0.916
AUROC_OVO: 0.916
Precisio_micro: 0.998
Precisio_macro: 0.666
Recall_micro: 0.998
Recall_macro: 0.666


In [21]:
NB_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [22]:
outF = open(fileout, "w")
outF.write("Naive_Bayes, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, NB_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, NB_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, NB_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, NB_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, NB_tset4_res)))
outF.write('\n')
outF.close()

### 3.2 Decision Tree

In [23]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [24]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(DecisionTreeClassifier())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [25]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  1  91   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 0.997
BACC: 0.996
F1_micro: 0.997
F1_macro: 0.996
AUROC_OVR: 0.997
AUROC_OVO: 0.997
Precisio_micro: 0.997
Precisio_macro: 0.996
Recall_micro: 0.997
Recall_macro: 0.996


In [26]:
DT_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [27]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0   1   0]
 [ 50 178 323]]
[1mMetrics[0m
ACC: 0.783
BACC: 0.862
F1_micro: 0.783
F1_macro: 0.568
AUROC_OVR: 0.888
AUROC_OVO: 0.897
Precisio_micro: 0.783
Precisio_macro: 0.638
Recall_micro: 0.783
Recall_macro: 0.862


In [28]:
DT_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [29]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[485  15   0]
 [  0  91 409]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.576
BACC: 0.717
F1_micro: 0.576
F1_macro: 0.43
AUROC_OVR: 0.786
AUROC_OVO: 0.788
Precisio_micro: 0.576
Precisio_macro: 0.62
Recall_micro: 0.576
Recall_macro: 0.717


In [30]:
DT_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [31]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[485 515   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.486
BACC: 0.828
F1_micro: 0.486
F1_macro: 0.552
AUROC_OVR: 0.828
AUROC_OVO: 0.871
Precisio_micro: 0.486
Precisio_macro: 0.667
Recall_micro: 0.486
Recall_macro: 0.828


In [32]:
DT_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [33]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[485 515   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.486
BACC: 0.828
F1_micro: 0.486
F1_macro: 0.552
AUROC_OVR: 0.828
AUROC_OVO: 0.871
Precisio_micro: 0.486
Precisio_macro: 0.667
Recall_micro: 0.486
Recall_macro: 0.828


In [34]:
DT_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [35]:
outF = open(fileout, "a")
outF.write("Decision_Tree, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, DT_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, DT_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, DT_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, DT_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, DT_tset4_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression Classifier

In [36]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [37]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegression())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [38]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [39]:
LR_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [40]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0   1   0]
 [  0 196 355]]
[1mMetrics[0m
ACC: 0.814
BACC: 0.881
F1_micro: 0.814
F1_macro: 0.598
AUROC_OVR: 0.952
AUROC_OVO: 0.936
Precisio_micro: 0.814
Precisio_macro: 0.668
Recall_micro: 0.814
Recall_macro: 0.881


In [41]:
LR_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [42]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0 422  78]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.922
BACC: 0.948
F1_micro: 0.922
F1_macro: 0.647
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.922
Precisio_macro: 0.671
Recall_micro: 0.922
Recall_macro: 0.948


In [43]:
LR_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [44]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 0.998
AUROC_OVO: 0.999
Precisio_micro: 0.501
Precisio_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [45]:
LR_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [46]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.501
Precisio_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [47]:
LR_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [48]:
outF = open(fileout, "a")
outF.write("Logistic_Regression, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LR_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LR_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LR_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LR_tset4_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regresssion CV

In [49]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV

In [50]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegressionCV())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [51]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [52]:
LR_CV_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [53]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0   1   0]
 [ 15 182 354]]
[1mMetrics[0m
ACC: 0.813
BACC: 0.881
F1_micro: 0.813
F1_macro: 0.593
AUROC_OVR: 0.954
AUROC_OVO: 0.936
Precisio_micro: 0.813
Precisio_macro: 0.659
Recall_micro: 0.813
Recall_macro: 0.881


In [54]:
LR_CV_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [55]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[498   2   0]
 [  0 446  54]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.944
BACC: 0.963
F1_micro: 0.944
F1_macro: 0.658
AUROC_OVR: 0.998
AUROC_OVO: 0.999
Precisio_micro: 0.944
Precisio_macro: 0.671
Recall_micro: 0.944
Recall_macro: 0.963


In [56]:
LR_CV_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [57]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[790 210   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.79
BACC: 0.93
F1_micro: 0.79
F1_macro: 0.631
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.79
Precisio_macro: 0.668
Recall_micro: 0.79
Recall_macro: 0.93


In [58]:
LR_CV_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [59]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[985  15   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.985
BACC: 0.995
F1_micro: 0.985
F1_macro: 0.703
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.985
Precisio_macro: 0.688
Recall_micro: 0.985
Recall_macro: 0.995


In [60]:
LR_CV_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [61]:
outF = open(fileout, "a")
outF.write("Logistic_Regression_CV, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LR_CV_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LR_CV_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LR_CV_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LR_CV_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LR_CV_tset4_res)))
outF.write('\n')
outF.close()

### 3.4 MLP

In [62]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [63]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(MLPClassifier())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [64]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [65]:
MLP_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [66]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0   1   0]
 [ 27 130 394]]
[1mMetrics[0m
ACC: 0.851
BACC: 0.905
F1_micro: 0.851
F1_macro: 0.608
AUROC_OVR: 0.953
AUROC_OVO: 0.935
Precisio_micro: 0.851
Precisio_macro: 0.652
Recall_micro: 0.851
Recall_macro: 0.905


In [67]:
MLP_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [68]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [ 70   0 430]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.5
BACC: 0.667
F1_micro: 0.5
F1_macro: 0.313
AUROC_OVR: 0.684
AUROC_OVO: 0.684
Precision_micro: 0.5
Precision_macro: 0.293
Recall_micro: 0.5
Recall_macro: 0.667


  _warn_prf(average, modifier, msg_start, len(result))


In [69]:
MLP_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

  _warn_prf(average, modifier, msg_start, len(result))


In [70]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 0.994
AUROC_OVO: 0.996
Precision_micro: 0.501
Precision_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [71]:
MLP_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [72]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precision_micro: 0.501
Precision_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [73]:
MLP_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [74]:
outF = open(fileout, "a")
outF.write("MLP, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, MLP_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, MLP_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, MLP_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, MLP_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, MLP_tset4_res)))
outF.write('\n')
outF.close()

### 3.5 Random Forest

In [75]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [76]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(RandomForestClassifier())
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [77]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 93   1   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 0.997
BACC: 0.996
F1_micro: 0.997
F1_macro: 0.996
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.997
Precisio_macro: 0.996
Recall_micro: 0.997
Recall_macro: 0.996


In [78]:
RF_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [79]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[499   1   0]
 [  0   1   0]
 [ 38 156 357]]
[1mMetrics[0m
ACC: 0.815
BACC: 0.882
F1_micro: 0.815
F1_macro: 0.587
AUROC_OVR: 0.967
AUROC_OVO: 0.967
Precision_micro: 0.815
Precision_macro: 0.645
Recall_micro: 0.815
Recall_macro: 0.882


In [80]:
RF_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [81]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[493   7   0]
 [  0 448  52]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.941
BACC: 0.961
F1_micro: 0.941
F1_macro: 0.656
AUROC_OVR: 0.997
AUROC_OVO: 0.998
Precision_micro: 0.941
Precision_macro: 0.668
Recall_micro: 0.941
Recall_macro: 0.961


In [82]:
RF_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [83]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[493 507   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.494
BACC: 0.831
F1_micro: 0.494
F1_macro: 0.555
AUROC_OVR: 0.835
AUROC_OVO: 0.876
Precision_micro: 0.494
Precision_macro: 0.667
Recall_micro: 0.494
Recall_macro: 0.831


In [84]:
RF_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [85]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[493 507   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.494
BACC: 0.831
F1_micro: 0.494
F1_macro: 0.555
AUROC_OVR: 0.847
AUROC_OVO: 0.885
Precision_micro: 0.494
Precision_macro: 0.667
Recall_micro: 0.494
Recall_macro: 0.831


In [86]:
RF_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [87]:
outF = open(fileout, "a")
outF.write("Random_Forest, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RF_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, RF_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, RF_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, RF_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, RF_tset4_res)))
outF.write('\n')
outF.close()

### 3.6 Linear SVM

In [88]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [89]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'linear', probability=True))
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [90]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [91]:
LinSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [92]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0   1   0]
 [  0 197 354]]
[1mMetrics[0m
ACC: 0.813
BACC: 0.881
F1_micro: 0.813
F1_macro: 0.597
AUROC_OVR: 0.922
AUROC_OVO: 0.91
Precision_micro: 0.813
Precision_macro: 0.668
Recall_micro: 0.813
Recall_macro: 0.881


In [93]:
LinSVM_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [94]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500   0   0]
 [  0 432  68]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.932
BACC: 0.955
F1_micro: 0.932
F1_macro: 0.652
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precision_micro: 0.932
Precision_macro: 0.671
Recall_micro: 0.932
Recall_macro: 0.955


In [95]:
LinSVM_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [96]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 0.76
AUROC_OVO: 0.844
Precision_micro: 0.501
Precision_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [97]:
LinSVM_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [98]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[500 500   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.501
BACC: 0.833
F1_micro: 0.501
F1_macro: 0.557
AUROC_OVR: 0.828
AUROC_OVO: 0.911
Precision_micro: 0.501
Precision_macro: 0.667
Recall_micro: 0.501
Recall_macro: 0.833


In [99]:
LinSVM_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [100]:
outF = open(fileout, "a")
outF.write("Linear_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, LinSVM_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, LinSVM_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, LinSVM_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, LinSVM_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, LinSVM_tset4_res)))
outF.write('\n')
outF.close()

### 3.7 RBF SVM (Nonlinear SVM)

In [101]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [102]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'rbf', probability=True))
scaler.fit(X_train)
X_train_sds = scaler.transform(X_train)
X_test_sds = scaler.transform(X_test)
clf.fit(X_train_sds, y_train)

In [103]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[ 94   0   0]
 [  0  92   0]
 [  0   0 114]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [104]:
RBFSVM_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [105]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset1)
y_tset = y_tset1
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[499   1   0]
 [  0   1   0]
 [ 43 202 306]]
[1mMetrics[0m
ACC: 0.766
BACC: 0.851
F1_micro: 0.766
F1_macro: 0.561
AUROC_OVR: 0.96
AUROC_OVO: 0.954
Precision_micro: 0.766
Precision_macro: 0.642
Recall_micro: 0.766
Recall_macro: 0.851


In [106]:
RBFSVM_tset1_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [107]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset2)
y_tset = y_tset2
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[498   2   0]
 [402  27  71]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.525
BACC: 0.683
F1_micro: 0.525
F1_macro: 0.28
AUROC_OVR: 0.857
AUROC_OVO: 0.854
Precision_micro: 0.525
Precision_macro: 0.499
Recall_micro: 0.525
Recall_macro: 0.683


In [108]:
RBFSVM_tset2_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [109]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset3)
y_tset = y_tset3
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[498 502   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.499
BACC: 0.833
F1_micro: 0.499
F1_macro: 0.556
AUROC_OVR: 0.69
AUROC_OVO: 0.762
Precision_micro: 0.499
Precision_macro: 0.667
Recall_micro: 0.499
Recall_macro: 0.833


In [110]:
RBFSVM_tset3_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [111]:
### Independent Test Result
X_tset_sds = scaler.transform(X_tset4)
y_tset = y_tset4
yp_test = clf.predict(X_tset_sds)
ys_test = clf.predict_proba(X_tset_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_tset, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_tset, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_tset, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_tset, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_tset, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3)))
print('Precision_micro: {}'.format(round(precision_score(y_tset, yp_test, average='micro'),3)))
print('Precision_macro: {}'.format(round(precision_score(y_tset, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_tset, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_tset, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[498 502   0]
 [  0   1   0]
 [  0   0   1]]
[1mMetrics[0m
ACC: 0.499
BACC: 0.833
F1_micro: 0.499
F1_macro: 0.556
AUROC_OVR: 0.826
AUROC_OVO: 0.829
Precision_micro: 0.499
Precision_macro: 0.667
Recall_micro: 0.499
Recall_macro: 0.833


In [112]:
RBFSVM_tset4_res = [round(accuracy_score(y_tset, yp_test), 3), round(balanced_accuracy_score(y_tset, yp_test), 3),
                 round(f1_score(y_tset, yp_test, average='micro'),3), round(f1_score(y_tset, yp_test, average='macro'),3),
                 round(roc_auc_score(y_tset, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_tset, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_tset, yp_test, average='micro'),3), round(precision_score(y_tset, yp_test, average='macro'),3),
                 round(recall_score(y_tset, yp_test, average='micro'),3), round(recall_score(y_tset, yp_test, average='macro'),3)]

In [113]:
outF = open(fileout, "a")
outF.write("RBF_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('DevSet, ')
outF.write(', '.join(map(str, RBFSVM_devset_res)))
outF.write('\n')
outF.write('IndSet1, ')
outF.write(', '.join(map(str, RBFSVM_tset1_res)))
outF.write('\n')
outF.write('IndSet2, ')
outF.write(', '.join(map(str, RBFSVM_tset2_res)))
outF.write('\n')
outF.write('IndSet3, ')
outF.write(', '.join(map(str, RBFSVM_tset3_res)))
outF.write('\n')
outF.write('IndSet4, ')
outF.write(', '.join(map(str, RBFSVM_tset4_res)))
outF.write('\n')
outF.close()