-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex_marray.py
116 lines (88 loc) · 3.93 KB
/
ex_marray.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
from pylab import *
import datetime
from gather_sda import Gather_sda
from knn import knn
"""
Sample*features
"""
X=np.loadtxt('marray.txt') # (102, 293)
dataset=(X-np.mean(X,axis=0))/np.std(X,axis=0)
print(dataset.shape)
percent = int(dataset.shape[0] * 0.8) ### %80 of dataset for training
train, test_set = dataset[:percent] ,dataset[percent:]
percent_valid = int(train.shape[0] * 0.8)
train_set, valid_set = train[:percent_valid] , train[percent_valid:]
b_error=[]
mean_error=[]
knn_error=[]
missing_percent=np.linspace(0.1,0.9,9)
sdaw=[]
#missing_percent=[0.9]
for mis in missing_percent:
print('missing percentage: ',mis)
available_mask=np.random.binomial(n=1, p = 1-mis, size = dataset.shape)
rest_mask, test_mask = available_mask[:percent], available_mask[percent:]
train_mask = np.random.binomial(n=1, p = 1, size = train_set.shape) ##rest_mask[:percent_valid]
valid_mask = rest_mask[percent_valid:]
data= (train_set*train_mask, valid_set *valid_mask ,test_set *test_mask)
mask= train_mask, valid_mask, test_mask
#### SDA
# method = 'rmsprop' 'adam' 'nes_mom' 'adadelta'
gather=Gather_sda(dataset = test_set*test_mask,
portion_data = data,
problem = 'regression',
available_mask = mask,
method = 'nes_mom',
pretraining_epochs = 30,
pretrain_lr = 0.0001,
training_epochs = 200,
finetune_lr = 0.0001,
batch_size = 5,
hidden_size = [400,100,20],
corruption_da = [0.1, 0.1, 0.1],
dA_initiall = True,
error_known = True )
gather.finetuning()
knn_result = knn(dataset,available_mask)
#########run the result for test
dd_mask=test_mask
dd = test_set
b_error.append(sum((1-dd_mask)*((dd-gather.gather_out())**2), axis=1).mean())
mean_error.append(sum((1-available_mask)*((dataset-dataset.mean(axis=0))**2), axis=1).mean())
knn_error.append(sum((1-available_mask)*((dataset-knn_result)**2), axis=1).mean())
plot(mis,b_error[-1],'ro')
plot(mis,mean_error[-1],'bo')
plot(mis,knn_error[-1],'g*')
#### SDA with corruption in training
train_mask = rest_mask[:percent_valid]
data= (train_set*train_mask, valid_set *valid_mask ,test_set *test_mask)
mask= train_mask, valid_mask, test_mask
gather=Gather_sda(dataset = test_set*test_mask,
portion_data = data,
problem = 'regression',
available_mask = mask,
method = 'adam',
pretraining_epochs = 10,
pretrain_lr = 0.0001,
training_epochs = 200,
finetune_lr = 0.00001,
batch_size = 5,
hidden_size = [300,200,100],
corruption_da = [0.1, 0.1, 0.1],
dA_initiall = True ,
error_known = True )
gather.finetuning()
sdaw.append(sum((1-dd_mask)*((dd-gather.gather_out())**2), axis=1).mean())
plot(mis,sdaw[-1],'m+')
print(mis,b_error[-1],mean_error[-1],knn_error[-1],sdaw[-1])
figtext(.02,.02, "problem = regression,available_mask = mask,method = 'nes_mom',pretraining_epochs = 30,pretrain_lr = 0.0001,training_epochs = 200,finetune_lr = 0.0001,batch_size = 5,hidden_size = [400,100,20],corruption_da = [0.1, 0.1, 0.1],dA_initiall = True,error_known = True ")
plot(missing_percent,mean_error,'b',label='mean_row')
plot(missing_percent,knn_error,'g',label='knn' )
plot(missing_percent,b_error,'r',label='sda')
plot(missing_percent,sdaw,'m',label='sdaw')
xlabel('corruption percentage')
ylabel('MSE')
title('dataset: Ovarian Cancer Samples') ###RNA
legend(loc=4,prop={'size':9})
show()