In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable

In [None]:
#テンソルを作成
x = Variable(torch.Tensor([1]), requires_grad=True)
w = Variable(torch.Tensor([2]),requires_grad=True)
b = Variable(torch.Tensor([2]), requires_grad=True)

# 計算グラフを構築
# y = 2 * x + 3
y = w*x + b

# 勾配を計算
y.backward()

# 勾配を表示
print(x.grad)  # dy/dx = w = 2
print(w.grad)  # dy/dw = x = 1
print(b.grad)  # dy/db = 1

# 例１

\begin{align} 
y = x^2
\end{align}

\begin{align} 
\displaystyle \frac{dy}{dx} = 2x
\end{align}





In [None]:
x = Variable(torch.Tensor([2]), requires_grad=True)
y = x ** 2
y.backward()
print(x.grad)

# 例 2

\begin{align} 
y = e^x
\end{align}

\begin{align} 
\displaystyle \frac{dy}{dx} = e^x
\end{align}


In [None]:
x = Variable(torch.Tensor([2]), requires_grad=True)
y = torch.exp(x)  # e ~ 2.71828
y.backward()
print(x.grad)

# 例 3

\begin{align} 
y = \sin(x)
\end{align}

\begin{align} 
\displaystyle \frac{dy}{dx} = \cos(x)
\end{align}


In [None]:
x = Variable(torch.Tensor([np.pi]), requires_grad=True)
y = torch.sin(x)
y.backward()
print(x.grad)

# 例 4

\begin{align} 
y = (x - 4)(x^2 + 6)
\end{align}

\begin{align} 
\displaystyle \frac{dy}{dx} = 3 x^2 - 8 x + 6
\end{align}


In [None]:
x = Variable(torch.Tensor([0]), requires_grad=True)
y = (x - 4) * (x ** 2 + 6)
y.backward()
print(x.grad)

# 例 5

\begin{align} 
y = (\sqrt{x} + 1)^3
\end{align}

\begin{align} 
\displaystyle \frac{dy}{dx} = \frac{3 (\sqrt{x} + 1)^2}{2 \sqrt{x}}
\end{align}

In [None]:
x = Variable(torch.Tensor([2]), requires_grad=True)
y = (torch.sqrt(x) + 1) ** 3
y.backward()
print(x.grad)

# 例 6

\begin{align} 
z = (x + 2 y)^2
\end{align}

\begin{align} 
\displaystyle \frac{\partial z}{\partial x} = 2 (x + 2y)
\end{align}

\begin{align} 
\displaystyle \frac{\partial z}{\partial y} = 4 (x + 2y)
\end{align}

In [None]:
x = Variable(torch.Tensor([1]), requires_grad=True)
y = Variable(torch.Tensor([2]), requires_grad=True)
z = (x + 2 * y) ** 2
z.backward()
print(x.grad)  # dz/dx
print(y.grad)  # dz/dy

# lossを微分する


In [35]:
# バッチサンプル数=5、入力特徴量の次元数=3
x = Variable(torch.randn(5,3))
# バッチサンプル数=5、出力特徴量の次元数=2
y = Variable(torch.randn(5, 2))

# Linear層を作成
# 3ユニット => 2ユニット
linear = nn.Linear(3,2)

# Linear層のパラメータ
print('w:', linear.weight)
print('b:', linear.bias)

w: Parameter containing:
tensor([[ 0.3974, -0.1892, -0.2784],
        [ 0.3327, -0.3742,  0.5401]])
b: Parameter containing:
tensor([-0.5308, -0.5098])


# MSELoss

- Creates a criterion that measures the mean squared error between n elements in the input x and target y.


\begin{align} 
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
l_n = \left( x_n - y_n \right)^2,
\end{align}

- where N is the batch size. If reduce is True, then:
\begin{split}\ell(x, y) = \begin{cases}
    \operatorname{mean}(L), & \text{if}\; \text{size_average} = \text{True},\\
    \operatorname{sum}(L),  & \text{if}\; \text{size_average} = \text{False}.
\end{cases}\end{split}

- The sum operation still operates over all the elements, and divides by n.

# CrossEntropyLoss
- This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.

- The input is expected to contain scores for each class.

- input has to be a Tensor of size either (minibatch,C) or (minibatch,C,d1,d2,...,dK) with K≥2 for the K-dimensional case (described later).
- The loss can be described as:
\begin{split}\ell(x, y) = \begin{cases}
\text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right)
               = -x[class] + \log\left(\sum_j \exp(x[j])\right)
\end{cases}\end{split}

or in the case of the weight argument being specified:

\begin{align} 
\text{loss}(x, class) = weight[class] \left(-x[class] + \log\left(\sum_j \exp(x[j])\right)\right)
\end{align}

In [27]:
# lossとoptimizer
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)

# forward
pred = linear(x)

# loss = L
loss = criterion(pred,Variable(y.float()))
print('loss:', loss)

loss: tensor(1.2442)


In [19]:
# backpropagation
loss.backward()

In [20]:
# 勾配を表示
print('dL/dw:', linear.weight.grad)
print('dL/db:', linear.bias.grad)

dL/dw: tensor([[ 0.1615, -0.4457, -0.1647],
        [ 0.1898, -1.3372, -0.4121]])
dL/db: tensor([-0.1185, -0.8956])


In [22]:
# 勾配を用いてパラメータを更新
print('*** by hand')
print(linear.weight.sub(0.01 * linear.weight.grad))
print(linear.bias.sub(0.01 * linear.bias.grad))

*** by hand
tensor([[ 0.5757, -0.0990, -0.0727],
        [ 0.4901, -0.5506, -0.4168]])
tensor([ 0.4210, -0.3707])


In [23]:
# 勾配降下法
optimizer.step()

In [24]:
# 1ステップ更新後のパラメータを表示
# 上の式と結果が一致することがわかる
print('*** by optimizer.step()')
print(linear.weight)
print(linear.bias)

*** by optimizer.step()
Parameter containing:
tensor([[ 0.5757, -0.0990, -0.0727],
        [ 0.4901, -0.5506, -0.4168]])
Parameter containing:
tensor([ 0.4210, -0.3707])
