# Theano Tutorial Notes

Personal notes for the official tutorial [http://deeplearning.net/software/theano/tutorial/](http://deeplearning.net/software/theano/tutorial/)

In [1]:
import theano
import theano.tensor as T
from theano import pp
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# numpy arrays
print np.array([1,2,3,4])
print np.array([1,2,3,4], copy=False)
print np.array([[1,2],[3,4]], copy=False, order='C') # A (default), F (Fortran), C (contiguous)
print np.asarray([1,2,3,4])
print np.array([1,2,3,4], dtype=complex)
print np.array([1,2,3,4], ndmin=2)
print np.array([[1,2,3],[4,5,6],[7,8,9]])
print np.matrix('1 2; 3 4') # subtype of ndarray (convenience method)

[1 2 3 4]
[1 2 3 4]
[[1 2]
 [3 4]]
[1 2 3 4]
[ 1.+0.j  2.+0.j  3.+0.j  4.+0.j]
[[1 2 3 4]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2]
 [3 4]]


In [3]:
# theano types
x = T.fscalar('x')
print pp(x)
print pp(T.fscalar())
print type(x)
print x.type()
print T.fscalars('a','b','c','d','e')
print T.fscalar(name='foo')

x
<TensorType(float32, scalar)>
<class 'theano.tensor.var.TensorVariable'>
<TensorType(float32, scalar)>
[a, b, c, d, e]
foo


In [4]:
# function & gradient
y = 2 * x + 4
dy = theano.grad(y,x)
y_f = theano.function([x],y)
y_f = theano.function(inputs=[x], outputs=y) # same as above
dy_f = theano.function([x],dy)
print y_f(1)
print y.eval({x : 1})
print dy_f(1)

6.0
6.0
2.0


In [5]:
# matrix & vector
A = T.dmatrix('A')
x = T.dvector('x')
y = A.dot(x)
y.eval({A: np.array([[1,2],[3,4]]), x: np.array([2,0])})


array([ 2.,  6.])

In [6]:
# More functions
x = T.scalar('x')
print T.exp(x).eval({x:1})
print T.tanh(x).eval({x:1})
print T.log(x).eval({x:1})

2.71828182846
0.761594155956
0.0


In [7]:
# computing more at the same time
x = T.scalar('x')
print theano.function([x], [T.exp(x), T.tanh(x)])(1)

[array(2.718281828459045), array(0.7615941559557649)]


In [8]:
# default & named parameters
x, y, w = T.scalars('x', 'y', 'w')
f = theano.function([x,
                     theano.Param(y, default=1), 
                     theano.Param(w, default=1, name='foo')], 
                    w*(x+y))
print f(1)
print f(1, 2)
print f(1, 2, 10)
print f(1, 2, foo=10)


2.0
3.0
30.0
30.0


In [9]:
# shared variables
s = theano.shared(np.float32(0))
print s, s.eval(), s.get_value()

<TensorType(float32, scalar)> 0.0 0.0


In [10]:
# function with side effects
x = T.fscalar('x')
function_of_state = s + x
f = theano.function([x], outputs=s, updates=[(s,function_of_state)])

In [11]:
# function evaluation returns previous state. state access with #get_value()
s.set_value(0)
print s.get_value()
print f(1)
print f(1)
print s.get_value()

0.0
0.0
1.0
2.0


In [12]:
# skip shared variables
s_alt = T.fscalar()
g = theano.function([x,s_alt], function_of_state, givens=[(s,s_alt)])
print g(1,2)

3.0


In [72]:
# Generating sequences

# compute recursive function: x_{t+1} = x_t.dot([[1,1],[1,0]]) with x0=[1,0]

# numpy 
a = np.array([[1,1],[1,0]], dtype=theano.config.floatX)
n = 4
x0 = np.array([1,0], dtype=theano.config.floatX)

def fibonacci_numpy(X0,A,N):
    X = np.zeros((n, 2), dtype=theano.config.floatX)
    prev = X0
    for i in range(0,N):
        X[i] = prev.dot(A)
        prev = X[i]
    return X

print fibonacci_numpy(x0,a,n)


# theano 
X = T.vector('X')
A = T.matrix('A')
N = T.iscalar('N')
result, updates = theano.scan(lambda x_tm1, A_t: x_tm1.dot(A_t),
                              outputs_info=[X], # initialization
                              non_sequences=A, # put non sequence items here for optimizing memory access of A
                               n_steps=N)
fibonacci_theano = theano.function(inputs=[X,A,N], outputs=result, updates=updates)

print fibonacci_theano(x0,a,n)

[[ 1.  1.]
 [ 2.  1.]
 [ 3.  2.]
 [ 5.  3.]]
[[ 1.  1.]
 [ 2.  1.]
 [ 3.  2.]
 [ 5.  3.]]


In [90]:
# Mapping sequences to sequences (no sideeffects)
x = np.arange(1,11, dtype=theano.config.floatX)
print x**2
X = T.vector('X')
result, updates = theano.scan(lambda x_tm1: x_tm1**2,
                              sequences=X)
f = theano.function(inputs=[X], outputs=result, updates=updates)
print f(x)

[   1.    4.    9.   16.   25.   36.   49.   64.   81.  100.]
[   1.    4.    9.   16.   25.   36.   49.   64.   81.  100.]


In [91]:
# Mapping sequences to sequences (no sideeffects) and accumulate
x = np.arange(1,11, dtype=theano.config.floatX)
print np.sum(x**2)
X = T.vector('X')
result, updates = theano.scan(lambda x_tm1: x_tm1**2,
                              sequences=X)
f = theano.function(inputs=[X], outputs=result.sum(), updates=updates)
print f(x)

385.0
385.0


In [95]:
# Mapping sequences to sequences with shared variable
x = np.arange(1,11, dtype=theano.config.floatX).reshape(5,2)
print x * 2
X = T.matrix('X')
A = theano.shared(2*np.eye(2))
result, updates = theano.scan(lambda x_tm1, A_t: x_tm1.dot(A_t),
                              sequences=X,
                              non_sequences=A)
f = theano.function(inputs=[X], outputs=result, updates=updates)
print f(x)

[[  2.   4.]
 [  6.   8.]
 [ 10.  12.]
 [ 14.  16.]
 [ 18.  20.]]
[[  2.   4.]
 [  6.   8.]
 [ 10.  12.]
 [ 14.  16.]
 [ 18.  20.]]


In [109]:
# Mapping sequences to sequences with shared variable and reduce
x = np.arange(1,11, dtype=theano.config.floatX).reshape(5,2) + np.random.randn(5,2) * 0.01
y = 2*x + np.random.randn(5,2) * 0.01
print x,y
print np.mean(2*x-y)

X = T.matrix('X')
A = theano.shared()
result, updates = theano.scan(lambda x_tm1, A_t: x_tm1.dot(A_t),
                              sequences=X,
                              non_sequences=A)
f = theano.function(inputs=[X], outputs=result, updates=updates)
print f(x)

[[  1.01369532   2.00094874]
 [  2.99101545   3.99925445]
 [  4.99999175   6.01572802]
 [  7.01620135   7.99215864]
 [  9.01413549  10.01187349]] [[  2.02737478   3.99988745]
 [  5.97583899   7.98836623]
 [  9.99072336  12.03666211]
 [ 14.0464243   15.98242209]
 [ 18.03866795  20.02779045]]
-0.000415230225648
[[  2.22767021   3.59648528]
 [  4.54811112   7.68616266]
 [  6.89100542  11.82008057]
 [  9.19204923  15.8962263 ]
 [ 11.53729394  20.02942472]]
