In [1]:
import numpy
import theano
import theano.tensor as T
rng = numpy.random

N = 400                                   # training sample size
feats = 784                               # number of input variables
# generate a dataset: D = (input_values, target_class)
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 10000

In [2]:
D[0].shape

(400, 784)

In [4]:
# Declare Theano symbolic variables
x = T.dmatrix("x")
y = T.dvector("y")

# initialize the weight vector w randomly
#
# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)
w = theano.shared(rng.randn(feats), name="w")

# initialize the bias term
b = theano.shared(0., name="b")

print("Initial model:")
print(w.get_value())
print(b.get_value())

Initial model:
[ -3.82899850e-01  -2.85553801e-01  -1.45376555e-01  -1.05117445e+00
  -1.95773824e-01   1.37857050e+00  -1.16927337e+00  -1.14789003e+00
   9.31331343e-01  -1.20545245e+00  -4.82487313e-01  -5.67714072e-01
  -7.96182235e-01  -3.96439435e-01  -4.87726897e-01   4.31025849e-01
  -3.74904050e-01   2.05393362e+00  -1.02006622e+00  -5.85660874e-01
  -8.75456754e-01   1.96724866e-01   2.11633373e+00  -4.33824109e-01
  -1.03884009e-01   7.48635154e-02   1.69307354e+00  -4.68313028e-01
   1.50974058e-01   6.97187661e-01   1.07214846e+00   5.73609348e-01
  -8.94736505e-01   8.16332566e-01  -2.95824658e-01   5.53250359e-01
  -1.70771856e-01   3.30328055e-01   1.43156163e+00  -7.52309086e-01
  -2.98092534e-01  -6.14155136e-01  -1.18231252e-01   1.57324473e+00
  -1.82397611e+00  -6.75457248e-02  -6.51923606e-01   3.40602459e-01
  -1.24441234e+00   6.36506313e-01  -6.44539407e-01   9.99454350e-01
  -2.37604846e+00   7.15216970e-01   3.47282641e-01  -1.66204936e+00
   8.75093666e-02  

In [5]:
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
prediction = p_1 > 0.5                    # The prediction thresholded
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
gw, gb = T.grad(cost, [w, b])  # Compute the gradient of the cost
                                          # w.r.t weight vector w and
                                          # bias term b
                                          # (we shall return to this in a
                                          # following section of this tutorial)


In [6]:
# Compile
train = theano.function(
          inputs=[x,y],
          outputs=[prediction, xent],
          updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
predict = theano.function(inputs=[x], outputs=prediction)

# Train
for i in range(training_steps):
    pred, err = train(D[0], D[1])

#print("Final model:")
#print(w.get_value())
#print(b.get_value())
#print("target values for D:")
#print(D[1])
#print("prediction on D:")


In [7]:
print(predict(D[0]))

[0 1 0 1 1 1 0 1 0 1 1 0 0 0 1 0 1 1 1 0 0 1 1 1 0 0 1 0 0 0 1 1 0 1 0 0 1
 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 1 0 1 0 1 1 1 1 0 0 1 0 1 0 0 0 0 0 1 1 1 0
 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0
 0 0 0 0 0 1 1 1 0 1 1 0 0 0 1 0 1 1 1 0 1 0 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1
 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1
 0 0 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 0 1 0 0 0 1 0 1 0 1 1 1 1
 1 0 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 0 1
 1 0 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0
 0 1 1 0 0 0 1 1 1 1 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 1 0 1 1 0 1
 1 1 1 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0
 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1]


In [12]:
# Finding the derivative
x = T.dmatrix('x')
s = T.sum(1 / (1 + T.exp(-x)))
gs = T.grad(s, x)

In [10]:
dlogistic = theano.function([x], gs)
dlogistic([[0, 1], [-1, -2]])

array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])

In [30]:
import numpy as np
#scan Example: Computing tanh(x(t).dot(W) + b) elementwise
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")

results, updates = theano.scan(lambda v: T.dot(v,W)+ b_sym, sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)

# test values
x = np.random.randn(2,2)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2

print(compute_elementwise(x, w, b))

# comparison with numpy
print(np.tanh(x.dot(w) + b))
x
w
b

[[ 2.45684203  3.45684203]
 [-0.99082286  0.00917714]]
[[ 0.98541637  0.9980138 ]
 [-0.75771297  0.00917689]]


array([ 1.,  2.])

In [31]:
x

array([[ 0.54846717,  0.90837486],
       [-0.13125812, -1.85956473]])

In [24]:
w

array([[ 1.,  1.],
       [ 1.,  1.]])

In [25]:
b

array([ 1.,  2.])

In [33]:
np.dot(x[0],w[0])

1.456842026032199

(2,)