-
Notifications
You must be signed in to change notification settings - Fork 0
/
backwards.py
38 lines (34 loc) · 1.3 KB
/
backwards.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from loadData import TCGAData
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.feature_selection import RFECV
from testUtils import print_genes_nonzero_coeff
from sklearn.metrics import zero_one
from sklearn.feature_selection import RFE
def rec_feature_elim(data,num_features=17700):
X = data.get_gene_exp_matrix()
y = data.get_labels()
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=num_features, step=1)
selector = rfe.fit(X, y)
mask = map(lambda x: 1 if x is True else 0,selector.support_)
print_genes_nonzero_coeff(data,mask)
def rec_feature_elim_with_KFold(data):
"""Recursive feature elimination
FIXME: How to pick a kernel?
WARNING: ridiculously slow?
"""
X = data.get_gene_exp_matrix()
y = data.get_labels()
# Create the RFE object and compute a cross-validated score.
svc = SVC(kernel="linear")
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(y, 2),loss_func=zero_one)
selector = rfecv.fit(X, y)
mask = map(lambda x: 1 if x is True else 0,selector.support_)
print_genes_nonzero_coeff(data,mask)
print "Optimal number of features : %d" % rfecv.n_features_
def main():
data = TCGAData()
rec_feature_elim(data)
if __name__=="__main__":
main()