출처 : https://github.com/qbxlvnf11/machine-learning-basic/blob/master/Pytorch_Backpropagation.ipynb

## pytorch function module
 
- torch.nn : 파이토치, 인공 신경망 모델을 가짐

- torch.optim : 모델 최적화

- torch.nn.functional : nn 모듈의 함수화

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import warnings
warnings.filterwarnings(action='ignore')

- Input & Weights

In [2]:
# Define input
input_tensor = torch.tensor([0.2, 0.1], dtype=torch.float64)

# Define weights : w1, w2, b1, b2
w1 = nn.Embedding(2, 2, dtype=torch.float64)
w2 = nn.Embedding(2, 2, dtype=torch.float64)
b1 = nn.Embedding(1, 2, dtype=torch.float64)
b2 = nn.Embedding(1, 2, dtype=torch.float64)

# init weights : w1, w2, b1, b2
w1.weight.data = torch.tensor([[0.2, 0.1], [0.4, 0.15]], dtype=torch.float64, requires_grad=True).t()
w2.weight.data = torch.tensor([[0.65, 0.7], [0.45, 0.3]], dtype=torch.float64, requires_grad=True).t()
b1.weight.data = torch.tensor([[0.3]], dtype=torch.float64, requires_grad=True).t()
b2.weight.data = torch.tensor([[0.5]], dtype=torch.float64, requires_grad=True).t()
 

In [3]:
# print weights
print('*'*30)
print('input_tensor:', input_tensor)
print('*'*30)
print('w1.weight:', w1.weight)

print('w1.weight.grad:', w1.weight.grad)
print('b1.weight:', b1.weight)
print('b1.weight.grad:', b1.weight.grad)
print('*'*30)
print('w2.weight:', w2.weight)
print('w2.weight.grad:', w2.weight.grad)
print('b2.weight:', w2.weight)
print('b2.weight.grad:', w2.weight.grad)
print('b2.weight:', b2.weight)
print('b2.weight.grad:', b2.weight.grad)
print('*'*30)

******************************
input_tensor: tensor([0.2000, 0.1000], dtype=torch.float64)
******************************
w1.weight: Parameter containing:
tensor([[0.2000, 0.4000],
        [0.1000, 0.1500]], dtype=torch.float64, requires_grad=True)
w1.weight.grad: None
b1.weight: Parameter containing:
tensor([[0.3000]], dtype=torch.float64, requires_grad=True)
b1.weight.grad: None
******************************
w2.weight: Parameter containing:
tensor([[0.6500, 0.4500],
        [0.7000, 0.3000]], dtype=torch.float64, requires_grad=True)
w2.weight.grad: None
b2.weight: Parameter containing:
tensor([[0.6500, 0.4500],
        [0.7000, 0.3000]], dtype=torch.float64, requires_grad=True)
b2.weight.grad: None
b2.weight: Parameter containing:
tensor([[0.5000]], dtype=torch.float64, requires_grad=True)
b2.weight.grad: None
******************************


- Forward Propagation

In [4]:
# Hidden layer(MLP)
net_h1_h2 = torch.matmul(input_tensor, w1.weight) + b1.weight
out_h1_h2 = F.sigmoid(net_h1_h2)

In [5]:
# [[net_h1, net_h2]]
print('net_h1_h2:', net_h1_h2)

# [[out_h1, out_h2]]
print('out_h1_h2:', out_h1_h2)
print('out_h1_h2.grad:', out_h1_h2.grad)

net_h1_h2: tensor([[0.3500, 0.3950]], dtype=torch.float64, grad_fn=<AddBackward0>)
out_h1_h2: tensor([[0.5866, 0.5975]], dtype=torch.float64, grad_fn=<SigmoidBackward0>)
out_h1_h2.grad: None


In [6]:
# Output layer(MLP)
net_o1_o2 = torch.matmul(out_h1_h2, w2.weight) + b2.weight
out_o1_o2 = F.sigmoid(net_o1_o2)

In [7]:
# [[net_h1, net_h2]]
print('net_o1_o2:', net_o1_o2)
# [[out_h1, out_h2]]
print('out_o1_o2:', out_o1_o2)
print('out_o1_o2.grad:', out_o1_o2.grad)

net_o1_o2: tensor([[1.2995, 0.9432]], dtype=torch.float64, grad_fn=<AddBackward0>)
out_o1_o2: tensor([[0.7858, 0.7198]], dtype=torch.float64, grad_fn=<SigmoidBackward0>)
out_o1_o2.grad: None


- Loss

In [8]:
label = torch.tensor([0.99, 0.01], dtype=torch.float64, requires_grad=True)

In [9]:
# Loss function
loss = torch.sum(0.5*torch.square(label - out_o1_o2))

In [10]:
print('loss:', loss)

loss: tensor(0.2727, dtype=torch.float64, grad_fn=<SumBackward0>)


\- Backward Propagation

In [11]:
# Get gradient of each weight & bias
loss.backward()

In [12]:
# Gradients
# Save gradients in weight.grad attribute
print('w1.weight.grad:', w1.weight.grad)
print('b1.weight.grad:', b1.weight.grad)
print('w2.weight.grad:', w2.weight.grad)
print('b2.weight.grad:', b2.weight.grad)

w1.weight.grad: tensor([[0.0020, 0.0009],
        [0.0010, 0.0005]], dtype=torch.float64)
b1.weight.grad: tensor([[0.0147]], dtype=torch.float64)
w2.weight.grad: tensor([[-0.0202,  0.0840],
        [-0.0205,  0.0855]], dtype=torch.float64)
b2.weight.grad: tensor([[0.1088]], dtype=torch.float64)


\- Optimization (1 epoch)

In [13]:
# Before optimization
# Weights
print('w1.weight:', w1.weight)
print('b1.weight', b1.weight)
print('w2.weight', w2.weight)
print('b2.weight', b2.weight)

# Loss
h1 = F.sigmoid(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.sigmoid(torch.matmul(h1, w2.weight) + b2.weight)
print('loss:', torch.sum(0.5*torch.square(label - output)))

# output
print('label:', label)
print('output:', output)

w1.weight: Parameter containing:
tensor([[0.2000, 0.4000],
        [0.1000, 0.1500]], dtype=torch.float64, requires_grad=True)
b1.weight Parameter containing:
tensor([[0.3000]], dtype=torch.float64, requires_grad=True)
w2.weight Parameter containing:
tensor([[0.6500, 0.4500],
        [0.7000, 0.3000]], dtype=torch.float64, requires_grad=True)
b2.weight Parameter containing:
tensor([[0.5000]], dtype=torch.float64, requires_grad=True)
loss: tensor(0.2727, dtype=torch.float64, grad_fn=<SumBackward0>)
label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[0.7858, 0.7198]], dtype=torch.float64, grad_fn=<SigmoidBackward0>)


In [14]:
# Learning rate
lr = 0.5 
# optimizer
optimizer = optim.SGD((w1.weight, w2.weight, b1.weight, b2.weight), lr=0.5)

In [15]:
# Optimization
optimizer.step()

In [16]:
# Optimization(1 epoch)
# Optimizing weights
print('w1.weight:', w1.weight)
print('b1.weight', b1.weight)
print('w2.weight', w2.weight)
print('b2.weight:', b2.weight)

# Decreasing loss
h1 = F.sigmoid(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.sigmoid(torch.matmul(h1, w2.weight) + b2.weight)
print('loss:', torch.sum(0.5*torch.square(label -output)))
# More optimizing output
print('label:', label)
print('output:', output)

w1.weight: Parameter containing:
tensor([[0.1990, 0.3995],
        [0.0995, 0.1498]], dtype=torch.float64, requires_grad=True)
b1.weight Parameter containing:
tensor([[0.2926]], dtype=torch.float64, requires_grad=True)
w2.weight Parameter containing:
tensor([[0.6601, 0.4080],
        [0.7103, 0.2572]], dtype=torch.float64, requires_grad=True)
b2.weight: Parameter containing:
tensor([[0.4456]], dtype=torch.float64, requires_grad=True)
loss: tensor(0.2591, dtype=torch.float64, grad_fn=<SumBackward0>)
label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[0.7781, 0.6979]], dtype=torch.float64, grad_fn=<SigmoidBackward0>)


\- Optimization(1000 epochs)

In [17]:
# 1000 epochs
for i in range(1, 1001):

    # init gradient of optimizer
    # If this method not called, gradient is stacjed
    optimizer.zero_grad()

    # Forward pass
    h1 = F.sigmoid(torch.matmul(input_tensor, w1.weight) + b1.weight)
    output = F.sigmoid(torch.matmul(h1, w2.weight) + b2.weight)

    # Loss
    loss = torch.sum(0.5 *torch.square(label - output))

    if i == 1 or i % 100 == 0:
        # Decreasing loss
        print('loss:', loss)

    # Backward pass
    loss.backward()

    # Optimization
    optimizer.step()



loss: tensor(0.2591, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0095, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0034, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0019, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0013, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0009, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0007, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0006, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0005, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0004, dtype=torch.float64, grad_fn=<SumBackward0>)
loss: tensor(0.0003, dtype=torch.float64, grad_fn=<SumBackward0>)


In [18]:
# validation of output(1000 epochs)

h1 = F.sigmoid(torch.matmul(input_tensor, w1.weight) + b1.weight)
output = F.sigmoid(torch.matmul(h1, w2.weight) + b2.weight)

# Output : close to the label
print('label:', label)
print('output:', output)

label: tensor([0.9900, 0.0100], dtype=torch.float64, requires_grad=True)
output: tensor([[0.9717, 0.0287]], dtype=torch.float64, grad_fn=<SigmoidBackward0>)
