In [1]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import pandas as pd
import math

def categorical_probas_to_classes(p):
    return np.argmax(p, axis=1)

def to_categorical(y, nb_classes=None):
    '''Convert class vector (integers from 0 to nb_classes)
    to binary class matrix, for use with categorical_crossentropy.
    '''
    y = np.array(y, dtype='int')
    if not nb_classes:
        nb_classes = np.max(y)+1
    Y = np.zeros((len(y), nb_classes))
    for i in range(len(y)):
        Y[i, y[i]] = 1.
    return Y


def calculate_performace(test_num, pred_y, labels):
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    for index in range(test_num):
        if labels[index] == 1:
            if labels[index] == pred_y[index]:
                tp = tp + 1
            else:
                fn = fn + 1
        else:
            if labels[index] == pred_y[index]:
                tn = tn + 1
            else:
                fp = fp + 1

    acc = float(tp + tn) / test_num
    precision = float(tp) / (tp + fp + 1e-06)
    npv = float(tn) / (tn + fn + 1e-06)
    sensitivity = float(tp) / (tp + fn + 1e-06)
    specificity = float(tn) / (tn + fp + 1e-06)
    mcc = float(tp * tn - fp * fn) / (math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) + 1e-06)
    f1 = float(tp * 2) / (tp * 2 + fp + fn + 1e-06)
    return acc, precision, npv, sensitivity, specificity, mcc, f1

**Indpendent data test **

In [7]:
from sklearn.preprocessing import scale, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

#############################################################
path = "/content/drive/MyDrive/QSAR/Update_2/Other_classifiers/"
data_=pd.read_csv(path+'data/X_train_rfe_EN.csv',header=None)
data=np.array(data_)
data = data
[m1, n1] = np.shape(data)
# label1 = np.ones((int(m1 / 2), 1))
# label2 = np.zeros((int(m1 / 2), 1))
# label = np.append(label1, label2)
# shu = scale(data)
X = data
labels = pd.read_csv(path+'data/y_train_rfe_EN.csv',header=None).values
y = labels
#########################################################################################
data_t = pd.read_csv(path+'data/X_test_rfe_EN.csv', header=None)
dataTest = np.array(data_t)
[m1, n1] = np.shape(dataTest)
# label1 = np.ones((int(m1 / 2), 1))
# label2 = np.zeros((int(m1 / 2), 1))
# label = np.append(label1, label2)
label = pd.read_csv(path+'data/y_test_rfe_EN.csv',header=None).values
# shu = scale(dataTest)
Xt = dataTest
yt = label
###########################################################################################
sepscores = []
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5

cv_clf =  SVC(C=0.05,gamma=0.001,probability=True)

cv_clf=cv_clf.fit(X, y)

y_score = cv_clf.predict_proba(Xt)
yscore = np.vstack((yscore, y_score))
y_test = to_categorical(yt)
ytest = np.vstack((ytest, y_test))
fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
roc_auc = auc(fpr, tpr)
y_class = categorical_probas_to_classes(y_score)
y_test_tmp = yt
acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(len(y_class), y_class,
                                                                                    y_test_tmp)
sepscores.append([acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])
print('MLP:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
      % (acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc))
scores = np.array(sepscores)
print("acc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[0] * 100, np.std(scores, axis=0)[0] * 100))
print("precision=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[1] * 100, np.std(scores, axis=0)[1] * 100))
print("npv=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[2] * 100, np.std(scores, axis=0)[2] * 100))
print("sensitivity=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[3] * 100, np.std(scores, axis=0)[3] * 100))
print("specificity=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[4] * 100, np.std(scores, axis=0)[4] * 100))
print("mcc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[5] * 100, np.std(scores, axis=0)[5] * 100))
print("f1=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[6] * 100, np.std(scores, axis=0)[6] * 100))
print("roc_auc=%.2f%% (+/- %.2f%%)" % (np.mean(scores, axis=0)[7] * 100, np.std(scores, axis=0)[7] * 100))
result1 = np.mean(scores, axis=0)
H1 = result1.tolist()
sepscores.append(H1)
result = sepscores
row = yscore.shape[0]
yscore = yscore[np.array(range(1, row)), :]
yscore_sum = pd.DataFrame(data=yscore)
yscore_sum.to_csv(path+'DL_results/SVC_yscore_test.csv')
ytest = ytest[np.array(range(1, row)), :]
ytest_sum = pd.DataFrame(data=ytest)
ytest_sum.to_csv(path+'DL_results/SVC_ytest_test.csv')

data_csv = pd.DataFrame(data=result)
colum = ['ACC', 'precision', 'npv', 'Sn', 'Sp', 'MCC', 'F1', 'AUC']
#ro = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']
data_csv = pd.DataFrame(columns=colum, data=result)
data_csv.to_csv(path+'DL_results/SVC_test_results.csv')




  y = column_or_1d(y, warn=True)


MLP:acc=0.864629,precision=0.959184,npv=0.793893,sensitivity=0.776859,specificity=0.962963,mcc=0.746420,f1=0.858447,roc_auc=0.960132
acc=86.46% (+/- 0.00%)
precision=95.92% (+/- 0.00%)
npv=79.39% (+/- 0.00%)
sensitivity=77.69% (+/- 0.00%)
specificity=96.30% (+/- 0.00%)
mcc=74.64% (+/- 0.00%)
f1=85.84% (+/- 0.00%)
roc_auc=96.01% (+/- 0.00%)
