# CONTENT

* **1. Basics**
* **2. Derivatives**
* **3. Gradient (Use Demos)**

# Basics

### A. Algebra

In [1]:
import numpy
import theano.tensor as T
from theano import function

In [10]:
# SCALAR ARITHMETIC
x = T.dscalar('x')
y = T.dscalar('y')
    # define type of variables.
    # optionally assign variable name.
z = x + y
f = function([x,y],z)
    # define function.
print f(2,3)

5.0


In [16]:
# MATRIX ARITHMETIC I: VECTOR
v = T.vector()
w = T.vector()
r = v**2 + w**2 + 2*v*w
f = function([w, v], r)
print f([1,2],[4,5])

[ 25.  49.]


In [7]:
# MATRIX ARITHMETIC II: MATRIX
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x + y
f = function([x,y],z)
print f([[1,2],[3,4]], [[1,2],[3,4]])

[[ 2.  4.]
 [ 6.  8.]]


In [109]:
# DOT (MANUAL)
x = T.fvector('x')
W = theano.shared(np.asarray([.2,.7]), 'W')
y = (x * W).sum()
    # * for Hadamard Product.
f = function([x], y)
print f([1.,1.]) # expected: 0.2*1.0 + 0.7*1.0 = 0.9.

0.9


In [None]:
# COMMONLY USED VARIABLE TYPES
#  - fvector: float32, ndim=1.
#  - ivector: int32, ndim=1.
#  - fscalar: float32, ndim=0.
#  - fmatrix: float32, ndim=2.
#  - ftensor3: float32, ndim=3.
#  - dtensor3: float64, ndim=3.

### B. Function 

In [22]:
import theano.tensor as T
from theano import function

In [17]:
x = T.dmatrix()
s = 1 / (1 + T.exp(-x))
logistic = function([x], s)
print logistic([[0,1],[-1,-2]])

[[ 0.5         0.73105858]
 [ 0.26894142  0.11920292]]


### C. Compute Multiple Things

In [23]:
import theano.tensor as T
from theano import function

In [21]:
a, b = T.dmatrices('a','b')
    # assigning names is necessary here to indicate unpacking multiple vars.
add = a + b
subtract = a - b
dot = T.dot(a,b)
f = function([a, b], [add, subtract, dot])
ret = f([[1,2],[3,4]], [[1,2],[3,4]])
print ret[0]
print ret[1]
print ret[2]

[[ 2.  4.]
 [ 6.  8.]]
[[ 0.  0.]
 [ 0.  0.]]
[[  7.  10.]
 [ 15.  22.]]


### D. Set Default Arg-Value

In [24]:
import theano.tensor as T
from theano import function, In

In [25]:
x, y = T.dscalars('x','y')
z = x + y
f = function([x, In(y, value=1)], z)
print f(10)
print f(10,2)

11.0
12.0


In [30]:
x, y, z = T.dscalars('x','y','z')
w = (x+y) * z
f = function([x, In(y,value=1), In(z,value=2,name='zzz')], w)
print f(10)
print f(10,zzz=1)
print f(10,1,2)

22.0
11.0
22.0


### E. Shared Variables

In [38]:
# COUNTER
from theano import shared
    # @ param 'givens' omitted.
state = shared(0)
    # initialize counter.
i = T.iscalar('int')
accumulator = function([i], state, updates=[(state, state+i)])
    #                         ^               ^      ^
    #                         |               |      |
    #       shared var. (returned on print)  current next (change by any arbitrary value)
decrementor = function([i], state, updates=[(state, state-i)])
print state.get_value()
accumulator(10)
print state.get_value()
decrementor(5)
print state.get_value()

0
10
5


In [54]:
# COPY -> NEW COUNTER
#  purpuse: avoiding compiling the same function again.
newState = shared(-100)
newAccumulator = accumulator.copy(swap={state:newState})
    # old function _accumulator_ usus 'state' as counter, 
    #  we'd like copied function to use a new state 'newState'.
print state.get_value()
newAccumulator(10)
print state.get_value, newState.get_value()

5
<bound method ScalarSharedVariable.get_value of <TensorType(int64, scalar)>> -90


### F. Random Numbers

In [82]:
from theano.tensor.shared_randomstreams import RandomStreams
from theano import function
rs = RandomStreams(seed=0)
randomFloat = rs.normal()
randomMatrix = rs.uniform((2,2))
f = function([], randomFloat)
g = function([], randomMatrix, no_default_updates=True)
    # no_default_updates: keep the generated random the first time.
print f()
print f()
print g()
print g()

1.98197037268
-0.148906614065
[[ 0.53096416  0.67607951]
 [ 0.62270975  0.64667994]]
[[ 0.53096416  0.67607951]
 [ 0.62270975  0.64667994]]


# Derivatives

### A. Standard Gradient

In [85]:
# BASICS
import numpy as np
import theano
import theano.tensor as T
from theano import pp, function
x = T.dscalar('x')
y = x ** 2
    # y = f(x) = x^2
yGrad = T.grad(y, x)
    # dy/dx = 2x
    # this can be shown using: print pp(f.maker.fgraph.outputs[0]).
    #  (TensorConstant{2.0} * x)
f = function([x], yGrad)
print f(4) # expected: 2x, i.e. 8.

8.0


In [96]:
# SIGMOID I: SINGLE NUMBER
x = T.dscalar('x')
sigmoid = 1 / (1 + T.exp(-x))
sigmoidGrad = T.grad(sigmoid, x)
f = function([x], sigmoidGrad)
print f(0) # dsigmoid = sigmoid*(1-sigmoid), expected: .5*(1-.5) = .25.

0.25


In [98]:
# SIGMOID II: MATRIX
x = T.dmatrix('x')
sigmoid = T.sum(1 / (1 + T.exp(-x)))
    # .sum is a must, because the first arg in .grad must be a scalar.
sigmoidGrad = T.grad(sigmoid, x)
f = function([x], sigmoidGrad)
print f([[0,1],[-1,-2]])

[[ 0.25        0.19661193]
 [ 0.19661193  0.10499359]]


### B. Jacobian

In [103]:
import theano
import theano.tensor as T
x = T.dvector('x')
y = x**2
grad = theano.gradient.jacobian(y, x)
f = theano.function([x], grad)
print f([0,1,2,3]) # 0-terms: partial deriv. wrt. other x's returns 0.

[[ 0.  0.  0.  0.]
 [ 0.  2.  0.  0.]
 [ 0.  0.  4.  0.]
 [ 0.  0.  0.  6.]]


# Gradient

In [111]:
# INPUT -- WEIGHTS -- OUTPUT
import theano
from theano import function, shared
import theano.tensor as T
import numpy as np

x = T.fvector('x')
target = T.fscalar('target')

W = shared(np.array([.2,.7]), 'W')
y = (x*W).sum()

cost = T.sqr(target-y) # .sqr: ^2
grads = T.grad(cost, [W]) # dcost/dW
WUpdated = W - (.1 * grads[0])
updates = [(W, WUpdated)]

f = function([x, target], y, updates=updates)

for i in xrange(10):
    output = f([1.,1.], 20.)
    print output

0.9
8.54
13.124
15.8744
17.52464
18.514784
19.1088704
19.46532224
19.679193344
19.8075160064


In [133]:
# .eval: GET NUMERICAL VALUE OF A VAR.
W = shared(np.array([.2,.7]), 'W')
print W
print W.eval()
tmp = W*2
print tmp
print tmp.eval()

W
[ 0.2  0.7]
Elemwise{mul,no_inplace}.0
[ 0.4  1.4]


In [138]:
# ACTIVATION FUNCTIONS
x = shared(np.array([-3.,-2.,-1.,0.,1.,2.,3.]), 'x')
print x.eval()
sigmoidX = T.nnet.sigmoid(x)
tanhX = T.tanh(x)
softmaxX = T.nnet.softmax(x)
print sigmoidX.eval()
print tanhX.eval()
print softmaxX.eval()

[-3. -2. -1.  0.  1.  2.  3.]
[ 0.04742587  0.11920292  0.26894142  0.5         0.73105858  0.88079708
  0.95257413]
[-0.99505475 -0.96402758 -0.76159416  0.          0.76159416  0.96402758
  0.99505475]
[[ 0.0015683   0.00426308  0.01158826  0.03150015  0.0856263   0.2327564
   0.6326975 ]]


In [168]:
# OTHER FUNCTIONS
x = shared(np.array([-3.,-2.,-1.,0.,1.,2.,3.,4.]), 'x')
print x.sum().eval()
print x.max().eval()
print x.argmax().eval()
print x.reshape((2,4)).eval()
print x.zeros_like(x).eval()
print x.shape.eval()

4.0
4.0
7
[[-3. -2. -1.  0.]
 [ 1.  2.  3.  4.]]
[ 0.  0.  0.  0.  0.  0.  0.  0.]
[8]


In [173]:
# INDEXING
x = shared(np.array([-3.,-2.,-1.,0.,1.,2.,3.,4.]), 'x')
idx = [1,1,1,6,6,6]
print x[idx].eval() # retrieve value by index.
xNew = T.set_subtensor(x[idx],0.) # set value by index.
print xNew.eval()

[-2. -2. -2.  3.  3.  3.]
[-3.  0. -1.  0.  1.  2.  0.  4.]
