Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
138 lines (115 sloc) 3.64 KB
import theano
import theano.tensor as T
import numpy as np
from theano_toolkit import utils as U
from theano_toolkit import updates
from numpy_hinton import print_arr
from theano.printing import Print
import adadelta
def unroll(final_rep,W1_i,W1_m,b2_m,b2_i,n_steps):
def step(curr_rep,W1_m,b2_m,W1_i,b2_i):
next_rep = T.dot(curr_rep,W1_m.T) + b2_m
input_rep = T.dot(curr_rep,W1_i.T) + b2_i
return next_rep,input_rep
[_,recon],_ = theano.scan(
step,
outputs_info = [final_rep,None],
non_sequences = [W1_m,b2_m,W1_i,b2_i],
n_steps = n_steps
)
return recon
def make_rae(inputs,W1_i,W1_m,b_h,i_h,b2_m,b2_i):
def step(inputs,hidden_1,W1_m,W1_i,b_h,b2_m,b2_i):
hidden = T.tanh(
T.dot(hidden_1,W1_m) +\
T.dot(inputs,W1_i) +\
b_h
)
"""
hidden = T.dot(hidden_1,W1_m) +\
T.dot(inputs,W1_i) +\
b_h
hidden = (hidden > 0) * hidden
"""
reproduction_m = T.dot(hidden,W1_m.T) + b2_m
reproduction_i = T.dot(hidden,W1_i.T) + b2_i
return hidden,reproduction_m,reproduction_i
[hidden_,reproduction_m_,reproduction_i_],_ = theano.scan(
step,
sequences = [inputs],
outputs_info = [i_h,None,None],
non_sequences = [W1_m,W1_i,b_h,b2_m,b2_i]
)
return hidden_,reproduction_m_,reproduction_i_
def build_network(input_size,hidden_size):
X = T.dmatrix('X')
W_input_to_hidden = U.create_shared(U.initial_weights(input_size,hidden_size))
W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size))
initial_hidden = U.create_shared(U.initial_weights(hidden_size))
b_hidden = U.create_shared(U.initial_weights(hidden_size))
b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size))
b_input_reproduction = U.create_shared(U.initial_weights(input_size))
parameters = [
W_input_to_hidden,
W_hidden_to_hidden,
b_hidden,
initial_hidden,
b_hidden_reproduction,
b_input_reproduction,
]
hidden, hidden1_reproduction, input_reproduction = make_rae(
X,
W_input_to_hidden,
W_hidden_to_hidden,
b_hidden,
initial_hidden,
b_hidden_reproduction,
b_input_reproduction
)
unrolled = unroll(
hidden[-1],
W_input_to_hidden,
W_hidden_to_hidden,
b_hidden_reproduction,
b_input_reproduction,
hidden.shape[0]
)
return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
def build_error(X,hidden,hidden1_reproduction,input_reproduction):
input_reproduction_sqerror = T.mean((X - input_reproduction)**2)
hidden_reproduction_sqerror = T.mean((hidden[:-1] - hidden1_reproduction[1:])**2)
return input_reproduction_sqerror + hidden_reproduction_sqerror
if __name__ == '__main__':
X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled = build_network(8,64)
f = theano.function(
inputs = [X],
outputs = [hidden,hidden1_reproduction,input_reproduction,unrolled]
)
error = build_error(X,hidden,hidden1_reproduction,input_reproduction)
cost = error # + 1e-6*sum( T.sum(abs(p)) for p in parameters )
gradients = T.grad(cost,wrt=parameters)
eps = T.dscalar('eps')
mu = T.dscalar('mu')
train = theano.function(
inputs = [X,eps,mu],
updates = updates.adadelta(parameters,gradients,mu,eps),
outputs = error
)
#example = np.vstack((np.eye(8),np.eye(8)))
example = np.eye(8)
error = 10
lr = 0.0001
t = 0
while error > 0.0001:
np.random.shuffle(example)
#error = train(example,lr,min(1 - 3.0/(t+5),0.999))
error = train(example,1e-6,0.95)
#error = train(example,lr,0)
print error
t += 1
np.random.shuffle(example)
hidden, hidden_rep, input_rep, unrlld = f(example)
print_arr(example)
print_arr(unrlld)
print_arr(parameters[1].get_value())
# print_arr(unrlld,hidden)
You can’t perform that action at this time.