## 1. Load Datasets

In [1]:
## Utils
import re
import numpy as np
import pandas as pd
import itertools

## Classical Learner
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_validate

In [2]:
from sklearn.base import clone


class OrdinalClassifier():
    
    def __init__(self, clf):
        self.clf = clf
        self.clfs = {}
    
    def fit(self, X, y):
        self.unique_class = np.sort(np.unique(y))
        if self.unique_class.shape[0] > 2:
            for i in range(self.unique_class.shape[0]-1):
                # for each k - 1 ordinal value we fit a binary classification problem
                binary_y = (y > self.unique_class[i]).astype(np.uint8)
                clf = clone(self.clf)
                clf.fit(X, binary_y)
                self.clfs[i] = clf
    
    def predict_proba(self, X):
        clfs_predict = {k:self.clfs[k].predict_proba(X) for k in self.clfs}
        predicted = []
        for i,y in enumerate(self.unique_class):
            if i == 0:
                # V1 = 1 - Pr(y > V1)
                predicted.append(1 - clfs_predict[y][:,1])
            elif y in clfs_predict:
                # Vi = Pr(y > Vi-1) - Pr(y > Vi)
                 predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
            else:
                # Vk = Pr(y > Vk-1)
                predicted.append(clfs_predict[y-1][:,1])
        return np.vstack(predicted).T
    
    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

In [3]:
"""Dataset Parameter Setting"""
"""Load Dataset"""
cdcl2_b1_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_cdcl2_b1_bn_bl_corrected.csv")
cdcl2_b2_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_cdcl2_b2_bn_bl_corrected.csv")

pbcl2_b1_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_pbcl2_b1_bn_bl_corrected.csv")
pbcl2_b2_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_pbcl2_b2_bn_bl_corrected.csv")

pbno32_b1_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_pbno32_b1_bn_bl_corrected.csv")
pbno32_b2_dset = pd.read_csv("C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/2data/_preprocessed/sersnet_pbno32_b2_bn_bl_corrected.csv")


"""Set Output Path"""
fileout = "C:/Users/sypark/Desktop/Projects/w_MinSeok/1SERSNet/3results/HM_Classification/raw_data/baseline_ordinal_hm_model_output_b2_to_b1_bn_bl_corrected_comb.csv"

In [4]:
cdcl2_b1_dset = pd.concat([pd.DataFrame(cdcl2_b1_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}), 
                           cdcl2_b1_dset.iloc[:, 5:]], axis=1)
cdcl2_b2_dset = pd.concat([pd.DataFrame(cdcl2_b2_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}), 
                           cdcl2_b2_dset.iloc[:, 5:]], axis=1)
cdcl2_all_dset = pd.concat([cdcl2_b1_dset, cdcl2_b2_dset], axis=0).reset_index(drop=True)

pbcl2_b1_dset = pd.concat([pd.DataFrame(pbcl2_b1_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}), 
                           pbcl2_b1_dset.iloc[:, 5:]], axis=1)
pbcl2_b2_dset = pd.concat([pd.DataFrame(pbcl2_b2_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}), 
                           pbcl2_b2_dset.iloc[:, 5:]], axis=1)
pbcl2_all_dset = pd.concat([pbcl2_b1_dset, pbcl2_b2_dset], axis=0).reset_index(drop=True)

pbno32_b1_dset = pd.concat([pd.DataFrame(pbno32_b1_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}), 
                            pbno32_b1_dset.iloc[:, 5:]], axis=1)
pbno32_b2_dset = pd.concat([pd.DataFrame(pbno32_b2_dset.iloc[:, 3]).rename(columns={'Concentration_uM': 'label'}),
                            pbno32_b2_dset.iloc[:, 5:]], axis=1)
pbno32_all_dset = pd.concat([pbno32_b1_dset, pbno32_b2_dset], axis=0).reset_index(drop=True)

In [5]:
hm_all_dset = pd.concat([cdcl2_all_dset, pbcl2_all_dset, pbno32_all_dset], axis=0).reset_index(drop=True)

In [6]:
## Get Label Encoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(hm_all_dset.iloc[:,0])

LabelEncoder()

In [7]:
## CdCl2 Dataset
tmp1 = le.transform(cdcl2_b1_dset.iloc[:,0])
tmp2 = le.transform(cdcl2_b2_dset.iloc[:,0])

cdcl2_b1_dset = pd.concat([pd.DataFrame(tmp1).rename(columns={0:'label'}), cdcl2_b1_dset.iloc[:,1:]], axis=1)
cdcl2_b2_dset = pd.concat([pd.DataFrame(tmp2).rename(columns={0:'label'}), cdcl2_b2_dset.iloc[:,1:]], axis=1)
cdcl2_dset = pd.concat([cdcl2_b1_dset, cdcl2_b2_dset], axis=0).reset_index(drop=True)

In [8]:
print(np.unique(cdcl2_b1_dset.label))
print(np.unique(cdcl2_b2_dset.label))

[0 2 3]
[0 1 2 3]


In [9]:
## Pbcl2 Dataset
tmp1 = le.transform(pbcl2_b1_dset.iloc[:,0])
tmp2 = le.transform(pbcl2_b2_dset.iloc[:,0])

pbcl2_b1_dset = pd.concat([pd.DataFrame(tmp1).rename(columns={0:'label'}), pbcl2_b1_dset.iloc[:,1:]], axis=1)
pbcl2_b2_dset = pd.concat([pd.DataFrame(tmp2).rename(columns={0:'label'}), pbcl2_b2_dset.iloc[:,1:]], axis=1)
pbcl2_dset = pd.concat([pbcl2_b1_dset, pbcl2_b2_dset], axis=0).reset_index(drop=True)

In [10]:
print(np.unique(pbcl2_b1_dset.label))
print(np.unique(pbcl2_b2_dset.label))

[1 2 3]
[0 1 2 3]


In [11]:
## Pb(NO3)2 Dataset
tmp1 = le.transform(pbno32_b1_dset.iloc[:,0])
tmp2 = le.transform(pbno32_b2_dset.iloc[:,0])

pbno32_b1_dset = pd.concat([pd.DataFrame(tmp1).rename(columns={0:'label'}), pbno32_b1_dset.iloc[:,1:]], axis=1)
pbno32_b2_dset = pd.concat([pd.DataFrame(tmp2).rename(columns={0:'label'}), pbno32_b2_dset.iloc[:,1:]], axis=1)
pbno32_dset = pd.concat([pbno32_b1_dset, pbno32_b2_dset], axis=0).reset_index(drop=True)

In [12]:
print(np.unique(pbno32_b1_dset.label))
print(np.unique(pbno32_b2_dset.label))

[1 2 3]
[0 1 2 3]


In [13]:
cdcl2_dset = cdcl2_dset.iloc[:,:-1]
pbcl2_dset = pbcl2_dset.iloc[:,:-1]
pbno32_dset = pbno32_dset.iloc[:,:-1]

In [14]:
X_cdcl2_dset = cdcl2_dset.iloc[:, 1:].to_numpy(dtype='float32')
y_cdcl2_dset = cdcl2_dset.iloc[:,0].to_numpy(dtype='int64') 

X_pbcl2_dset = pbcl2_dset.iloc[:, 1:].to_numpy(dtype='float32')
y_pbcl2_dset = pbcl2_dset.iloc[:,0].to_numpy(dtype='int64') 

X_pbno32_dset = pbno32_dset.iloc[:, 1:].to_numpy(dtype='float32')
y_pbno32_dset = pbno32_dset.iloc[:,0].to_numpy(dtype='int64') 

## 2. Train Test Split

In [15]:
## 4.4 train and test split for Classical Learner
X_cdcl2_train, X_cdcl2_test, y_cdcl2_train, y_cdcl2_test = train_test_split(X_cdcl2_dset, y_cdcl2_dset, test_size = 0.2, 
                                                    random_state=123)

X_pbcl2_train, X_pbcl2_test, y_pbcl2_train, y_pbcl2_test = train_test_split(X_pbcl2_dset, y_pbcl2_dset, test_size = 0.2, 
                                                    random_state=123)

X_pbno32_train, X_pbno32_test, y_pbno32_train, y_pbno32_test = train_test_split(X_pbno32_dset, y_pbno32_dset, test_size = 0.2, 
                                                    random_state=123)

## 3. Classical Learner

### 3.1 Naive Bayes

In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB

In [17]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(BernoulliNB())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [18]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[122  64   4   0]
 [  3 105   0   0]
 [ 71 148   0   0]
 [ 74  99   5   5]]
[1mMetrics[0m
ACC: 0.331
BACC: 0.41
F1_micro: 0.331
F1_macro: 0.246
AUROC_OVR: 0.68
AUROC_OVO: 0.686
Precisio_micro: 0.331
Precisio_macro: 0.426
Recall_micro: 0.331
Recall_macro: 0.41


In [19]:
NB_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [20]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(BernoulliNB())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [21]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [ 68  96   5   3]
 [  0  61 164   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 0.804
BACC: 0.822
F1_micro: 0.804
F1_macro: 0.792
AUROC_OVR: 0.954
AUROC_OVO: 0.955
Precisio_micro: 0.804
Precisio_macro: 0.795
Recall_micro: 0.804
Recall_macro: 0.822


In [22]:
NB_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [23]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(BernoulliNB())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [24]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[103   5   0   0]
 [ 89  95  11   0]
 [ 13  16 161  35]
 [  0   0   1 171]]
[1mMetrics[0m
ACC: 0.757
BACC: 0.788
F1_micro: 0.757
F1_macro: 0.746
AUROC_OVR: 0.922
AUROC_OVO: 0.925
Precisio_micro: 0.757
Precisio_macro: 0.771
Recall_micro: 0.757
Recall_macro: 0.788


In [25]:
NB_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [26]:
outF = open(fileout, "w")
outF.write("Naive_Bayes, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, NB_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, NB_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, NB_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.2 Decision Tree

In [27]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [28]:
## Cdcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(DecisionTreeClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [29]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[181   9   0   0]
 [  1 105   2   0]
 [  9   7 201   2]
 [  2   3   7 171]]
[1mMetrics[0m
ACC: 0.94
BACC: 0.944
F1_micro: 0.94
F1_macro: 0.937
AUROC_OVR: 0.965
AUROC_OVO: 0.966
Precisio_micro: 0.94
Precisio_macro: 0.933
Recall_micro: 0.94
Recall_macro: 0.944


In [30]:
DT_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [31]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(DecisionTreeClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [32]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  2 165   5   0]
 [  1   2 222   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 0.986
BACC: 0.986
F1_micro: 0.986
F1_macro: 0.986
AUROC_OVR: 0.991
AUROC_OVO: 0.991
Precisio_micro: 0.986
Precisio_macro: 0.985
Recall_micro: 0.986
Recall_macro: 0.986


In [33]:
DT_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [34]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(DecisionTreeClassifier())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [35]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[104   4   0   0]
 [  1 192   2   0]
 [  3   1 221   0]
 [  0   1   1 170]]
[1mMetrics[0m
ACC: 0.981
BACC: 0.98
F1_micro: 0.981
F1_macro: 0.98
AUROC_OVR: 0.989
AUROC_OVO: 0.988
Precisio_micro: 0.981
Precisio_macro: 0.98
Recall_micro: 0.981
Recall_macro: 0.98


In [36]:
DT_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [37]:
outF = open(fileout, "a")
outF.write("Decision_Tree, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, DT_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, DT_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, DT_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression

In [38]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [39]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegression())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [40]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  0 108   0   0]
 [  3   1 215   0]
 [  0   0   0 183]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.995
F1_micro: 0.994
F1_macro: 0.995
AUROC_OVR: 0.998
AUROC_OVO: 0.998
Precisio_micro: 0.994
Precisio_macro: 0.994
Recall_micro: 0.994
Recall_macro: 0.995


In [41]:
LR_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [42]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegression())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [43]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [44]:
LR_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [45]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegression())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [46]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 195   0   0]
 [  0   0 225   0]
 [  0   0   0 172]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [47]:
LR_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [48]:
outF = open(fileout, "a")
outF.write("Logistic_Regression, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, LR_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, LR_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, LR_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.3 Logistic Regression with CV

In [49]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegressionCV

In [50]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegressionCV())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [51]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  0 108   0   0]
 [  3   1 215   0]
 [  0   0   0 183]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.995
F1_micro: 0.994
F1_macro: 0.995
AUROC_OVR: 0.999
AUROC_OVO: 0.999
Precisio_micro: 0.994
Precisio_macro: 0.994
Recall_micro: 0.994
Recall_macro: 0.995


In [52]:
LR_CV_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [53]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegressionCV())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [54]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [55]:
LR_CV_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [56]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(LogisticRegressionCV())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [57]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 195   0   0]
 [  0   0 225   0]
 [  0   0   0 172]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [58]:
LR_CV_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [59]:
outF = open(fileout, "a")
outF.write("Logistic_Regression_CV, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, LR_CV_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, LR_CV_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, LR_CV_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.4 MLP

In [60]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [61]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(MLPClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [62]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  0 108   0   0]
 [  2   0 217   0]
 [  0   1   0 182]]
[1mMetrics[0m
ACC: 0.996
BACC: 0.996
F1_micro: 0.996
F1_macro: 0.996
AUROC_OVR: 0.999
AUROC_OVO: 0.999
Precisio_micro: 0.996
Precisio_macro: 0.995
Recall_micro: 0.996
Recall_macro: 0.996


In [63]:
MLP_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [64]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(MLPClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [65]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [66]:
MLP_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [67]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(MLPClassifier())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [68]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 195   0   0]
 [  0   0 224   1]
 [  0   0   0 172]]
[1mMetrics[0m
ACC: 0.999
BACC: 0.999
F1_micro: 0.999
F1_macro: 0.999
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.999
Precisio_macro: 0.999
Recall_micro: 0.999
Recall_macro: 0.999


In [69]:
MLP_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [70]:
outF = open(fileout, "a")
outF.write("MLP, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, MLP_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, MLP_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, MLP_pbno32_devset_res)))
outF.write('\n')
outF.close()

### Random Forest

In [71]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [72]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(RandomForestClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [73]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  0 108   0   0]
 [  4   0 215   0]
 [  0   0   0 183]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.995
F1_micro: 0.994
F1_macro: 0.995
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.994
Precisio_macro: 0.995
Recall_micro: 0.994
Recall_macro: 0.995


In [74]:
RF_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [75]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(RandomForestClassifier())
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [76]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [77]:
RF_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [78]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(RandomForestClassifier())
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [79]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 193   2   0]
 [  1   0 224   0]
 [  0   0   1 171]]
[1mMetrics[0m
ACC: 0.994
BACC: 0.995
F1_micro: 0.994
F1_macro: 0.995
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.994
Precisio_macro: 0.994
Recall_micro: 0.994
Recall_macro: 0.995


In [80]:
RF_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [81]:
outF = open(fileout, "a")
outF.write("Random_Forest, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, RF_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, RF_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, RF_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.6 Linear SVM

In [82]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [83]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'linear', probability=True))
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [84]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  0 108   0   0]
 [  2   1 216   0]
 [  0   0   0 183]]
[1mMetrics[0m
ACC: 0.996
BACC: 0.997
F1_micro: 0.996
F1_macro: 0.996
AUROC_OVR: 0.998
AUROC_OVO: 0.998
Precisio_micro: 0.996
Precisio_macro: 0.995
Recall_micro: 0.996
Recall_macro: 0.997


In [85]:
LinSVM_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [86]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'linear', probability=True))
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [87]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [88]:
LinSVM_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [89]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'linear', probability=True))
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [90]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 195   0   0]
 [  0   0 225   0]
 [  0   0   0 172]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [91]:
LinSVM_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [92]:
outF = open(fileout, "a")
outF.write("Linear_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, LinSVM_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, LinSVM_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, LinSVM_pbno32_devset_res)))
outF.write('\n')
outF.close()

### 3.7 RBF SVM ( Nonlinear SVM)

In [93]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [94]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'rbf', probability=True))
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_cdcl2_train)
X_test_sds = scaler.transform(X_cdcl2_test)
y_train = y_cdcl2_train
y_test = y_cdcl2_test
clf.fit(X_train_sds, y_train)

In [95]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[190   0   0   0]
 [  1 104   3   0]
 [  3   1 215   0]
 [  0   0   1 182]]
[1mMetrics[0m
ACC: 0.987
BACC: 0.985
F1_micro: 0.987
F1_macro: 0.986
AUROC_OVR: 0.999
AUROC_OVO: 0.998
Precisio_micro: 0.987
Precisio_macro: 0.988
Recall_micro: 0.987
Recall_macro: 0.985


In [96]:
RBFSVM_cdcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [97]:
## Pbcl2 Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'rbf', probability=True))
scaler.fit(X_cdcl2_train)
X_train_sds = scaler.transform(X_pbcl2_train)
X_test_sds = scaler.transform(X_pbcl2_test)
y_train = y_pbcl2_train
y_test = y_pbcl2_test
clf.fit(X_train_sds, y_train)

In [98]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[107   1   0   0]
 [  0 172   0   0]
 [  0   0 225   0]
 [  0   0   0 195]]
[1mMetrics[0m
ACC: 0.999
BACC: 0.998
F1_micro: 0.999
F1_macro: 0.998
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 0.999
Precisio_macro: 0.999
Recall_micro: 0.999
Recall_macro: 0.998


In [99]:
RBFSVM_pbcl2_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [100]:
## Learn Classifier
scaler = StandardScaler()
clf = OrdinalClassifier(SVC(kernel = 'rbf', probability=True))
scaler.fit(X_pbno32_train)
X_train_sds = scaler.transform(X_pbno32_train)
X_test_sds = scaler.transform(X_pbno32_test)
y_train = y_pbno32_train
y_test = y_pbno32_test
clf.fit(X_train_sds, y_train)

In [101]:
### Test within batch
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
yp_test = clf.predict(X_test_sds)
ys_test = clf.predict_proba(X_test_sds)

print('\033[1m' + 'Confusion Matrix' + '\033[0m')
print(confusion_matrix(y_test, yp_test))

print('\033[1m' + 'Metrics' + '\033[0m')
print('ACC: {}'.format(round(accuracy_score(y_test, yp_test), 3)))
print('BACC: {}'.format(round(balanced_accuracy_score(y_test, yp_test), 3)))
print('F1_micro: {}'.format(round(f1_score(y_test, yp_test, average='micro'),3)))
print('F1_macro: {}'.format(round(f1_score(y_test, yp_test, average='macro'),3)))
print('AUROC_OVR: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3)))
print('AUROC_OVO: {}'.format(round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3)))
print('Precisio_micro: {}'.format(round(precision_score(y_test, yp_test, average='micro'),3)))
print('Precisio_macro: {}'.format(round(precision_score(y_test, yp_test, average='macro'),3)))
print('Recall_micro: {}'.format(round(recall_score(y_test, yp_test, average='micro'),3)))
print('Recall_macro: {}'.format(round(recall_score(y_test, yp_test, average='macro'),3)))

[1mConfusion Matrix[0m
[[108   0   0   0]
 [  0 195   0   0]
 [  0   0 225   0]
 [  0   0   0 172]]
[1mMetrics[0m
ACC: 1.0
BACC: 1.0
F1_micro: 1.0
F1_macro: 1.0
AUROC_OVR: 1.0
AUROC_OVO: 1.0
Precisio_micro: 1.0
Precisio_macro: 1.0
Recall_micro: 1.0
Recall_macro: 1.0


In [102]:
RBFSVM_pbno32_devset_res = [round(accuracy_score(y_test, yp_test), 3), round(balanced_accuracy_score(y_test, yp_test), 3),
                 round(f1_score(y_test, yp_test, average='micro'),3), round(f1_score(y_test, yp_test, average='macro'),3),
                 round(roc_auc_score(y_test, ys_test, multi_class='ovr'),3), round(roc_auc_score(y_test, ys_test, multi_class='ovo'),3),
                 round(precision_score(y_test, yp_test, average='micro'),3), round(precision_score(y_test, yp_test, average='macro'),3),
                 round(recall_score(y_test, yp_test, average='micro'),3), round(recall_score(y_test, yp_test, average='macro'),3)]

In [103]:
outF = open(fileout, "a")
outF.write("RBF_SVM, ")
outF.write("ACC, BACC, F1_micro, F1_macro, AUROC_OVR, AUROC_OVO, Precision_micro, Precision_macro, Recall_micro, Recall_macro\n")
outF.write('Cdcl2 DevSet, ')
outF.write(', '.join(map(str, RBFSVM_cdcl2_devset_res)))
outF.write('\n')
outF.write('Pbcl2 DevSet, ')
outF.write(', '.join(map(str, RBFSVM_pbcl2_devset_res)))
outF.write('\n')
outF.write('Pb(No3)2 DevSet, ')
outF.write(', '.join(map(str, RBFSVM_pbno32_devset_res)))
outF.write('\n')
outF.close()