1414from util import relu , error_rate , getKaggleMNIST , init_weights
1515
1616
17+ def T_shared_zeros_like32 (p ):
18+ # p is a Theano shared itself
19+ return theano .shared (np .zeros_like (p .get_value (), dtype = np .float32 ))
20+
21+ def momentum_updates (cost , params , mu , learning_rate ):
22+ # momentum changes
23+ dparams = [T_shared_zeros_like32 (p ) for p in params ]
24+
25+ updates = []
26+ grads = T .grad (cost , params )
27+ for p , dp , g in zip (params , dparams , grads ):
28+ dp_update = mu * dp - learning_rate * g
29+ p_update = p + dp_update
30+
31+ updates .append ((dp , dp_update ))
32+ updates .append ((p , p_update ))
33+ return updates
34+
35+
1736class AutoEncoder (object ):
1837 def __init__ (self , M , an_id ):
1938 self .M = M
2039 self .id = an_id
2140
2241 def fit (self , X , learning_rate = 0.5 , mu = 0.99 , epochs = 1 , batch_sz = 100 , show_fig = False ):
42+ # cast to float
43+ mu = np .float32 (mu )
44+ learning_rate = np .float32 (learning_rate )
45+
2346 N , D = X .shape
2447 n_batches = N // batch_sz
2548
2649 W0 = init_weights ((D , self .M ))
2750 self .W = theano .shared (W0 , 'W_%s' % self .id )
28- self .bh = theano .shared (np .zeros (self .M ), 'bh_%s' % self .id )
29- self .bo = theano .shared (np .zeros (D ), 'bo_%s' % self .id )
51+ self .bh = theano .shared (np .zeros (self .M , dtype = np . float32 ), 'bh_%s' % self .id )
52+ self .bo = theano .shared (np .zeros (D , dtype = np . float32 ), 'bo_%s' % self .id )
3053 self .params = [self .W , self .bh , self .bo ]
3154 self .forward_params = [self .W , self .bh ]
3255
@@ -61,18 +84,17 @@ def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=Fa
6184 outputs = cost ,
6285 )
6386
64- updates = [
65- (p , p + mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
66- ] + [
67- (dp , mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
68- ]
87+
88+
89+ updates = momentum_updates (cost , self .params , mu , learning_rate )
6990 train_op = theano .function (
7091 inputs = [X_in ],
7192 updates = updates ,
7293 )
7394
7495 costs = []
7596 print ("training autoencoder: %s" % self .id )
97+ print ("epochs to do:" , epochs )
7698 for i in range (epochs ):
7799 print ("epoch:" , i )
78100 X = shuffle (X )
@@ -117,9 +139,22 @@ def __init__(self, hidden_layer_sizes, UnsupervisedModel=AutoEncoder):
117139 count += 1
118140
119141
120- def fit (self , X , Y , Xtest , Ytest , pretrain = True , learning_rate = 0.01 , mu = 0.99 , reg = 0.1 , epochs = 1 , batch_sz = 100 ):
142+ def fit (self , X , Y , Xtest , Ytest ,
143+ pretrain = True ,
144+ train_head_only = False ,
145+ learning_rate = 0.1 ,
146+ mu = 0.99 ,
147+ reg = 0.0 ,
148+ epochs = 1 ,
149+ batch_sz = 100 ):
150+
151+ # cast to float32
152+ learning_rate = np .float32 (learning_rate )
153+ mu = np .float32 (mu )
154+ reg = np .float32 (reg )
155+
121156 # greedy layer-wise training of autoencoders
122- pretrain_epochs = 1
157+ pretrain_epochs = 2
123158 if not pretrain :
124159 pretrain_epochs = 0
125160
@@ -135,38 +170,27 @@ def fit(self, X, Y, Xtest, Ytest, pretrain=True, learning_rate=0.01, mu=0.99, re
135170 K = len (set (Y ))
136171 W0 = init_weights ((self .hidden_layers [- 1 ].M , K ))
137172 self .W = theano .shared (W0 , "W_logreg" )
138- self .b = theano .shared (np .zeros (K ), "b_logreg" )
173+ self .b = theano .shared (np .zeros (K , dtype = np . float32 ), "b_logreg" )
139174
140175 self .params = [self .W , self .b ]
141- for ae in self .hidden_layers :
142- self .params += ae .forward_params
143-
144- # for momentum
145- self .dW = theano .shared (np .zeros (W0 .shape ), "dW_logreg" )
146- self .db = theano .shared (np .zeros (K ), "db_logreg" )
147- self .dparams = [self .dW , self .db ]
148- for ae in self .hidden_layers :
149- self .dparams += ae .forward_dparams
176+ if not train_head_only :
177+ for ae in self .hidden_layers :
178+ self .params += ae .forward_params
150179
151180 X_in = T .matrix ('X_in' )
152181 targets = T .ivector ('Targets' )
153182 pY = self .forward (X_in )
154183
155- # squared_magnitude = [(p*p).sum() for p in self.params]
156- # reg_cost = T.sum(squared_magnitude)
157- cost = - T .mean ( T .log (pY [T .arange (pY .shape [0 ]), targets ]) ) # + reg*reg_cost
184+ squared_magnitude = [(p * p ).sum () for p in self .params ]
185+ reg_cost = T .sum (squared_magnitude )
186+ cost = - T .mean ( T .log (pY [T .arange (pY .shape [0 ]), targets ]) ) + reg * reg_cost
158187 prediction = self .predict (X_in )
159188 cost_predict_op = theano .function (
160189 inputs = [X_in , targets ],
161190 outputs = [cost , prediction ],
162191 )
163192
164- updates = [
165- (p , p + mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
166- ] + [
167- (dp , mu * dp - learning_rate * T .grad (cost , p )) for p , dp in zip (self .params , self .dparams )
168- ]
169- # updates = [(p, p - learning_rate*T.grad(cost, p)) for p in self.params]
193+ updates = momentum_updates (cost , self .params , mu , learning_rate )
170194 train_op = theano .function (
171195 inputs = [X_in , targets ],
172196 updates = updates ,
@@ -209,7 +233,8 @@ def main():
209233 # dnn.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=3)
210234 # vs
211235 dnn = DNN ([1000 , 750 , 500 ])
212- dnn .fit (Xtrain , Ytrain , Xtest , Ytest , pretrain = False , epochs = 10 )
236+ dnn .fit (Xtrain , Ytrain , Xtest , Ytest , pretrain = True , train_head_only = False , epochs = 3 )
237+ # note: try training the head only too! what does that mean?
213238
214239
215240def test_single_autoencoder ():
@@ -239,5 +264,5 @@ def test_single_autoencoder():
239264
240265
241266if __name__ == '__main__' :
242- # main()
243- test_single_autoencoder ()
267+ main ()
268+ # test_single_autoencoder()
0 commit comments