In [1]:
import theano
import theano.tensor as T
from theano import function

# logistic function
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = function([x], s) # elementwise
logistic([[-2, -1], [1,2]])

array([[ 0.11920292,  0.26894142],
       [ 0.73105858,  0.88079708]])

In [2]:
# multiple output
a, b = T.dmatrices('a', 'b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff ** 2 
f = function([a,b], [diff, abs_diff, diff_squared])
f([[1,2], [5,1]], [[10,-2], [3,1]])

[array([[-9.,  4.],
        [ 2.,  0.]]), array([[ 9.,  4.],
        [ 2.,  0.]]), array([[ 81.,  16.],
        [  4.,   0.]])]

In [3]:
# setting default value
from theano import Param
x, y = T.dscalars('x', 'y')
z = x + y
f = function([x, Param(y, default = 1)], z)
f(33)

array(34.0)

In [4]:
f(31,2)

array(33.0)

In [5]:
# naming paramators
x, y = T.dscalars('x', 'y')
z = x + y
f = function([x, Param(y, default = 1, name = 'y_value')], z)
f(33, y_value = 5)

array(38.0)

In [6]:
# shared variables
# accumulator is a process
# state is an output
from theano import shared
state = shared(0)
inc = T.iscalar('inc')
accumulator = function([inc], state, updates = [(state, state+inc)])

In [7]:
state.get_value()

array(0)

In [8]:
accumulator(1)

array(0)

In [9]:
state.get_value()

array(1)

In [10]:
accumulator(300)

array(1)

In [11]:
state.get_value()

array(301)

In [12]:
state.set_value(-1)

In [13]:
state.get_value()

array(-1)

In [14]:
accumulator(3)

array(-1)

In [15]:
state.get_value()

array(2)

In [16]:
decrementor = function([inc], state, updates=[(state, state-inc)])
decrementor(2)

array(2)

In [17]:
state.get_value()

array(0)

In [18]:
# givens paramator
up_state = state * 2 + inc
foo = T.scalar(dtype = state.dtype)
skip_shared = function([inc, foo], up_state, givens = [(state, foo)])
skip_shared(1,3)

array(7)

In [19]:
skip_shared(1,3)

array(7)

In [20]:
# old state is still there and we do not use it
state.get_value()

array(0)

In [21]:
accumulator(300)

array(0)

In [22]:
state.get_value()

array(300)

In [23]:
# output is the same because we use foo instead of state
skip_shared(1,3)

array(7)

In [3]:
# random variable
# basic example
from theano.tensor.shared_randomstreams import RandomStreams
from theano import function
srng = RandomStreams(seed = 234)
rv_u = srng.uniform((2,2))
rv_n = srng.normal((2,2))
f = function([], rv_u)
g = function([], rv_n, no_default_updates=True)
nearly_zeros = function([], rv_u + rv_u - 2 * rv_u)

In [4]:
# fはアップデートされるので毎回違う
f_val1 = f()
f_val1

array([[ 0.12672381,  0.97091597],
       [ 0.13989098,  0.88754825]])

In [5]:
f_val2 = f()
f_val2

array([[ 0.31971415,  0.47584377],
       [ 0.24129163,  0.42046081]])

In [6]:
# gはアップデートしないようにしているので毎回同じ
g_val1 = g()
g_val1

array([[ 0.37328447, -0.65746672],
       [-0.36302373, -0.97484625]])

In [7]:
g_val2 = g()
g_val2

array([[ 0.37328447, -0.65746672],
       [-0.36302373, -0.97484625]])

In [8]:
nearly_zeros()

array([[ 0.,  0.],
       [ 0.,  0.]])

In [2]:
# numpy
# randn
import numpy
a = numpy.random
a.randn(2)

array([ 0.13547117,  0.52158931])

In [26]:
# randint
# max, matrix shape
a.randint(5, size = (3,4))

array([[0, 4, 2, 4],
       [4, 0, 0, 3],
       [4, 2, 2, 3]])

In [6]:
# こんなのあるよね
a = 20 > 9
a

True

In [1]:
# example
# logistic regression
# 教師あり学習ってやつ
# まだ微妙にわかってないので、後で戻ってこよう

import numpy 
import theano
import theano.tensor as T
rng = numpy.random

N = 400
feats = 784
D = (rng.randn(N, feats), rng.randint(size = N, low = 0, high = 2))
# サンプル数400で、変数が784個ある（全てが標準正規分布に従って出力される）
# 各サンプルに対して結果変数として0,1のどちらかが報告されている
# この二つの情報をまとめたものがD(tuple)
training_steps = 10000

x = T.matrix("x") # 変数を入れる箱
y = T.vector("y") # 結果を入れる箱
w = theano.shared(rng.randn(feats))
b = theano.shared(0., name = "b") # bは定数項？

print("Initial Value:")
print(w.get_value())
print(b.get_value())

# wは係数
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # dot is inner product
prediction = p_1 > 0.5 # 論理値として、p_1が0.5よりも大きいと1を出力する（振り分け）
xent = -y * T.log(p_1) - (1 - y) * T.log(1 - p_1) # 尤度関数の逆
# 下のcost functionはgenralized logistic regressionのコスト関数
cost = xent.mean() + 0.01 * (w ** 2).sum() 
gw, gb = T.grad(cost, [w,b]) # gradient descentをやるよ

# compile
train = theano.function(inputs = [x, y], outputs = [prediction, xent]
                       ,updates = ((w, w - 0.1 * gw), (b, b - 0.1 * gb))) # 0.1 is a step
predict = theano.function(inputs = [x], outputs = prediction)

# train
for i in range(training_steps):
    pred, err = train(D[0], D[1]) # pred とerrなんていないんだけど
    
print("Final model:") # 推定されたモデル
print(w.get_value())
print(b.get_value())
print("target values for D:") # 本当の結果
print(D[1])
print("prediction on D:") # 推測されたモデルに基づいて、与えられた変数の値を元に2項判別した結果
print(predict(D[0]))

Initial Value:
[ -1.61315041e-01  -6.61784179e-01   3.24272490e-01   1.97061432e-01
   9.78836431e-02   8.77386873e-01   9.79034116e-01   1.70020461e+00
  -6.10468327e-01   2.78172885e+00   1.16681592e+00   2.77971335e+00
   2.40265164e+00   8.38259382e-01  -8.10641231e-01   4.30013511e-01
  -8.45109439e-01   1.02578100e+00  -1.03961683e+00   1.13794689e+00
   2.65996544e-01  -1.55484002e+00  -7.47099820e-01   5.91907846e-01
   7.11044075e-01  -3.55631017e-01  -1.57548150e+00  -2.46551426e-01
   4.46848079e-02   1.58111444e+00  -2.81996891e-01   1.00521669e+00
   1.59985801e+00   1.81851610e+00   2.95760368e-01  -9.62497565e-01
  -1.10618939e+00  -1.94280530e+00  -1.42403471e+00  -2.11243725e+00
  -2.39338634e+00   1.50837387e+00  -1.58945210e+00  -1.62513512e+00
   1.63545747e-01  -1.92186746e+00  -1.21269377e+00  -7.74234387e-01
  -3.76340934e-01   9.18129607e-02   1.65239914e-01  -3.46922043e-01
  -8.31474769e-01   1.10287395e+00  -1.11492883e+00  -5.53481873e-02
  -7.76890521e-01  

In [30]:
len(D[1])

400