-
Notifications
You must be signed in to change notification settings - Fork 0
/
PSSM_EDT-lag6-915.py
103 lines (95 loc) · 3.62 KB
/
PSSM_EDT-lag6-915.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from featureGenerator import *
from readToMatrix import *
import numpy as np
import re
import os
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, LeaveOneOut, cross_val_score, KFold
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, roc_curve ,confusion_matrix
def getMatrix(dirname):
pssmList = os.listdir(dirname)
pssmList.sort(key=lambda x: eval(x[:]))
m = len(pssmList)
reMatrix = np.zeros((m, 400*6))
for i in range(m):
matrix = readToMatrix(dirname + '/' + pssmList[i], 'pssm')
matrix = autoNorm(matrix, 'pssm')
reMatrix[i, :] = np.hstack(getEDT(matrix,6))
print(reMatrix.shape)
return reMatrix
def main():
x1 = getMatrix("data/Train915/result/negative/pssm_profile_uniref50")
x2 = getMatrix("data/Train915/result/positive/pssm_profile_uniref50")
x = np.vstack((x1, x2))
y = [-1 for i in range(x1.shape[0])]
y.extend([1 for i in range(x2.shape[0])])
y = np.array(y)
#
CC = []
gammas = []
for i in range(-5, 16, 2):
CC.append(2 ** i)
for i in range(3, -16, -2):
gammas.append(2 ** i)
param_grid = {"C": CC, "gamma": gammas}
gs = GridSearchCV(SVC(probability=True), param_grid, cv=10)
gs.fit(x, y)
print(gs.best_estimator_)
print(gs.best_score_)
#
clf = gs.best_estimator_
loo = LeaveOneOut()
score = cross_val_score(clf, x, y, cv=loo).mean()
print("LOO:{}".format(score))
#
loo_probas_y = [] #
loo_test_y = [] #
loo_predict_y = [] #
for train, test in loo.split(x):
clf.fit(x[train], y[train])
loo_predict_y.extend(clf.predict(x[test])) #
loo_probas_y.extend(clf.predict_proba(x[test])) #
loo_test_y.extend(y[test]) #
loo_probas_y = np.array(loo_probas_y)
loo_test_y = np.array(loo_test_y)
print(loo_probas_y.shape)
#
confusion = confusion_matrix(loo_test_y, loo_predict_y)
TP = confusion[1, 1]
TN = confusion[0, 0]
FP = confusion[0, 1]
FN = confusion[1, 0]
print("ROC:{}".format(roc_auc_score(loo_test_y, loo_probas_y[:, 1])))
print("SP:{}".format(TN / (TN + FP)))
print("SN:{}".format(TP / (TP + FN)))
n = (TP * TN - FP * FN) / (((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) ** 0.5)
print("PRE:{}".format(TP / (TP + FP)))
print("MCC:{}".format(n))
print("F-score:{}".format((2 * TP) / (2 * TP + FP + FN)))
print("ACC:{}".format((TP + TN) / (TP + FP + TN + FN)))
#
test_x1 = getMatrix("data/Test850/result/negative/pssm_profile_uniref50")
test_x2 = getMatrix("data/Test850/result/positive/pssm_profile_uniref50")
test_x = np.vstack((test_x1, test_x2))
test_y = [-1 for i in range(test_x1.shape[0])]
test_y.extend([1 for i in range(test_x2.shape[0])])
clf = gs.best_estimator_
clf.fit(x, y)
predict_y = clf.predict(test_x)
probas_y = clf.predict_proba(test_x)
print("IND:{}".format(accuracy_score(test_y, predict_y)))
#
confusion = confusion_matrix(test_y, predict_y)
TP = confusion[1, 1]
TN = confusion[0, 0]
FP = confusion[0, 1]
FN = confusion[1, 0]
print("ROC:{}".format(roc_auc_score(test_y, probas_y[:, 1])))
print("SP:{}".format(TN / (TN + FP)))
print("SN:{}".format(TP / (TP + FN)))
n = (TP * TN - FP * FN) / (((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) ** 0.5)
print("PRE:{}".format(TP / (TP + FP)))
print("MCC:{}".format(n))
print("F-score:{}".format((2 * TP) / (2 * TP + FP + FN)))
print("ACC:{}".format((TP + TN) / (TP + FP + TN + FN)))
main()