# Short Tutorial

## Tensor

In [1]:
import torch

x = torch.Tensor(2, 2)
x = torch.Tensor([[1, 2], [3, 4]])

In [2]:
import numpy as np

x = [[1, 2], [3, 4]]
x = np.array(x)
x = torch.from_numpy(x)

$$x=\begin{bmatrix}
1, 2 \\
3, 4
\end{bmatrix}$$

## Autograd

In [3]:
import torch

x = torch.FloatTensor(2, 2)
y = torch.FloatTensor(2, 2)
y.requires_grad_(True)

z = (x + y) + torch.FloatTensor(2, 2)

In [4]:
import torch

x = torch.FloatTensor(2, 2)
y = torch.FloatTensor(2, 2)
y.requires_grad_(True)

with torch.no_grad():
    z = (x + y) + torch.FloatTensor(2, 2)

## Feed-forward

$$\begin{gathered}
y = xW+ b \\
\text{where }x\in\mathbb{R}^{M\times N},W\in\mathbb{R}^{N\times P}\text{ and }b\in\mathbb{R}^P. \\
\text{Thus, }y\in\mathbb{R}^{M\times P}.
\end{gathered}$$

$$\begin{aligned}
y&=f(x; \theta)\text{ where }\theta=\{W, b\}
\end{aligned}$$

In [5]:
import torch

def linear(x, W, b):
    y = torch.mm(x, W) + b

    return y

x = torch.FloatTensor(16, 10)
W = torch.FloatTensor(10, 5)
b = torch.FloatTensor(5)

y = linear(x, W, b)

## nn.Module

In [6]:
import torch
import torch.nn as nn

class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super().__init__()

        self.W = torch.FloatTensor(input_size, output_size)
        self.b = torch.FloatTensor(output_size)

    def forward(self, x):
        y = torch.mm(x, self.W) + self.b

        return y

In [7]:
x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
y = linear(x)

In [8]:
params = [p.size() for p in linear.parameters()]
print(params)

[]


참고: http://pytorch.org/docs/master/nn.html?highlight=parameter#parameters

In [9]:
class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyLinear, self).__init__()

        self.W = nn.Parameter(torch.FloatTensor(input_size, output_size), requires_grad=True)
        self.b = nn.Parameter(torch.FloatTensor(output_size), requires_grad=True)

    def forward(self, x):
        y = torch.mm(x, self.W) + self.b

        return y

In [10]:
linear = MyLinear(10, 5)
params = [p.size() for p in linear.parameters()]
print(params)

[torch.Size([10, 5]), torch.Size([5])]


In [11]:
class MyLinear(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyLinear, self).__init__()

        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        y = self.linear(x)

        return y

In [12]:
linear = MyLinear(10, 5)
print(linear)

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)


## Backward (Back-propagation)

In [13]:
objective = 100

x = torch.FloatTensor(16, 10)
linear = MyLinear(10, 5)
y = linear(x)
loss = (objective - y.sum())**2

loss.backward()

## train() and eval()

In [14]:
# Training...
linear.eval()
# Do some inference process.
linear.train()
# Restart training, again.

MyLinear(
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

## Linear regression example

$$\mathcal{L}_{\text{MSE}}(\hat{y}, y)=\frac{1}{N}\sum^N_{i=1}{(\hat{y}_i - y_i)^2}$$

In [15]:
import random

import torch
import torch.nn as nn

class MyModel(nn.Module):

    def __init__(self, input_size, output_size):
        super(MyModel, self).__init__()

        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        y = self.linear(x)

        return y

$$\begin{gathered}
y=f(x_1, x_2, x_3) = 3x_1 + x_2 - 2x_3 \\
\hat{y}=\tilde{f}(x_1,x_2,x_3;\theta) \\
\hat{\theta}=\underset{\theta\in\Theta}{\text{argmin }}\mathcal{L}(\hat{y},y)
\end{gathered}$$

In [16]:
def ground_truth(x):
    return 3 * x[:, 0] + x[:, 1] - 2 * x[:, 2]

In [17]:
def train(model, x, y, optim):
    # initialize gradients in all parameters in module.
    optim.zero_grad()

    # feed-forward
    y_hat = model(x)
    # get error between answer and inferenced.
    loss = ((y - y_hat)**2).sum() / x.size(0)

    # back-propagation
    loss.backward()

    # one-step of gradient descent
    optim.step()

    return loss.data

In [18]:
batch_size = 1
n_epochs = 1000
n_iter = 10000

model = MyModel(3, 1)
optim = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.1)

print(model)

MyModel(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)


In [19]:
for epoch in range(n_epochs):
    avg_loss = 0

    for i in range(n_iter):
        x = torch.rand(batch_size, 3)
        y = ground_truth(x.data)

        loss = train(model, x, y, optim)

        avg_loss += loss
    avg_loss = avg_loss / n_iter

    # simple test sample to check the network.
    x_valid = torch.FloatTensor([[.3, .2, .1]])
    y_valid = ground_truth(x_valid.data)

    model.eval()
    y_hat = model(x_valid)
    model.train()

    print(avg_loss, y_valid.data[0], y_hat.data[0, 0])

    if avg_loss < .001: # finish the training if the loss is smaller than .001.
        break

tensor(1.3006) tensor(0.9000) tensor(0.7660)
tensor(0.8309) tensor(0.9000) tensor(0.8191)
tensor(0.5856) tensor(0.9000) tensor(0.8623)
tensor(0.3986) tensor(0.9000) tensor(0.8907)
tensor(0.2776) tensor(0.9000) tensor(0.9006)
tensor(0.1915) tensor(0.9000) tensor(0.9341)
tensor(0.1384) tensor(0.9000) tensor(0.9347)
tensor(0.0972) tensor(0.9000) tensor(0.9446)
tensor(0.0682) tensor(0.9000) tensor(0.9480)
tensor(0.0470) tensor(0.9000) tensor(0.9511)
tensor(0.0328) tensor(0.9000) tensor(0.9492)
tensor(0.0241) tensor(0.9000) tensor(0.9528)
tensor(0.0167) tensor(0.9000) tensor(0.9528)
tensor(0.0121) tensor(0.9000) tensor(0.9500)
tensor(0.0085) tensor(0.9000) tensor(0.9475)
tensor(0.0061) tensor(0.9000) tensor(0.9445)
tensor(0.0043) tensor(0.9000) tensor(0.9428)
tensor(0.0032) tensor(0.9000) tensor(0.9403)
tensor(0.0023) tensor(0.9000) tensor(0.9385)
tensor(0.0017) tensor(0.9000) tensor(0.9346)
tensor(0.0013) tensor(0.9000) tensor(0.9328)
tensor(0.0009) tensor(0.9000) tensor(0.9305)


## Use GPU

In [20]:
# Note that tensor is declared in torch.cuda.
x = torch.cuda.FloatTensor(16, 10)
linear = MyLinear(10, 5)
# .cuda() let module move to GPU memory.
linear.cuda()
y = linear(x)

RuntimeError: Cannot initialize CUDA without ATen_cuda library. PyTorch splits its backend into two shared libraries: a CPU library and a CUDA library; this error has occurred because you are trying to use some CUDA functionality, but the CUDA library has not been loaded by the dynamic linker for some reason.  The CUDA library MUST be loaded, EVEN IF you don't directly use any symbols from the CUDA library! One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many dynamic linkers will delete dynamic library dependencies if you don't depend on any of their symbols.  You can check if this has occurred by using ldd on your binary to see if there is a dependency on *_cuda.so library.