In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim

### 搭建模型

In [2]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

In [3]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

### 準備輸入資料、優化器、標籤資料、模型輸出

In [4]:
model = Model(input_dimention=256,output_classes=10)
optimizer = optim.Adam(model.parameters())

In [5]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

#target = torch.empty(4, dtype=torch.float).random_(10)
target = torch.tensor([9., 5., 4., 4.], dtype=torch.long)

In [6]:
output = model(dummy_input)
print(output)

tensor([[0.0745, 0.1650, 0.0368, 0.1430, 0.0909, 0.0640, 0.0819, 0.0563, 0.1683,
         0.1192],
        [0.0590, 0.0593, 0.0590, 0.0563, 0.0916, 0.0671, 0.0470, 0.1618, 0.1404,
         0.2585],
        [0.0953, 0.0631, 0.0911, 0.0448, 0.1053, 0.1130, 0.1365, 0.0933, 0.1094,
         0.1483],
        [0.0838, 0.0953, 0.0655, 0.0877, 0.0938, 0.0915, 0.1105, 0.1290, 0.0902,
         0.1526]], grad_fn=<SoftmaxBackward>)


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [7]:
from torch.nn import NLLLoss, LogSoftmax, CrossEntropyLoss

In [8]:
criterion = NLLLoss()
loss = criterion(torch.log(output), target)
loss

tensor(2.3618, grad_fn=<NllLossBackward>)

In [9]:
criterion = CrossEntropyLoss()
loss = criterion(torch.log(output), target)
loss

tensor(2.3618, grad_fn=<NllLossBackward>)

### 完成back propagation並更新梯度

In [10]:
loss.backward()

In [11]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0478,  0.0341,  0.0029,  ...,  0.0571, -0.0068, -0.0381],
        [ 0.0192,  0.0503,  0.0244,  ..., -0.0235,  0.0092,  0.0015],
        [-0.0625,  0.0586,  0.0055,  ..., -0.0138,  0.0270,  0.0242],
        ...,
        [ 0.0623, -0.0507, -0.0317,  ..., -0.0457, -0.0431, -0.0385],
        [-0.0440, -0.0611,  0.0355,  ..., -0.0441, -0.0460,  0.0226],
        [-0.0303,  0.0565,  0.0513,  ...,  0.0221, -0.0123, -0.0329]],
       requires_grad=True)


grad : tensor([[-1.4388e-02,  4.0130e-03,  6.7798e-02,  ...,  5.8259e-03,
          1.0939e-02, -2.0396e-03],
        [-6.6714e-02, -2.9093e-01, -8.4435e-01,  ...,  2.3720e-01,
         -6.2348e-01, -3.8151e-01],
        [-1.4267e-03,  1.8643e-03,  9.2419e-03,  ...,  1.0470e-02,
         -1.8479e-02,  2.8169e-04],
        ...,
        [ 9.7395e-02,  7.4536e-02, -7.8448e-02,  ..., -3.3999e-02,
          6.8368e-03,  1.1823e-01],
        [-3.9688e-06, -2.5760e-06, -2.4402e-07,  ...,  1.8225e-06,
       

In [12]:
optimizer.step()

In [13]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0468,  0.0331,  0.0019,  ...,  0.0561, -0.0078, -0.0371],
        [ 0.0202,  0.0513,  0.0254,  ..., -0.0245,  0.0102,  0.0025],
        [-0.0615,  0.0576,  0.0045,  ..., -0.0148,  0.0280,  0.0232],
        ...,
        [ 0.0613, -0.0517, -0.0307,  ..., -0.0447, -0.0441, -0.0395],
        [-0.0430, -0.0601,  0.0365,  ..., -0.0451, -0.0450,  0.0236],
        [-0.0313,  0.0555,  0.0523,  ...,  0.0231, -0.0133, -0.0339]],
       requires_grad=True)


grad : tensor([[-1.4388e-02,  4.0130e-03,  6.7798e-02,  ...,  5.8259e-03,
          1.0939e-02, -2.0396e-03],
        [-6.6714e-02, -2.9093e-01, -8.4435e-01,  ...,  2.3720e-01,
         -6.2348e-01, -3.8151e-01],
        [-1.4267e-03,  1.8643e-03,  9.2419e-03,  ...,  1.0470e-02,
         -1.8479e-02,  2.8169e-04],
        ...,
        [ 9.7395e-02,  7.4536e-02, -7.8448e-02,  ..., -3.3999e-02,
          6.8368e-03,  1.1823e-01],
        [-3.9688e-06, -2.5760e-06, -2.4402e-07,  ...,  1.8225e-06,
       

### 清空 gradient

In [14]:
optimizer.zero_grad()

In [15]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0468,  0.0331,  0.0019,  ...,  0.0561, -0.0078, -0.0371],
        [ 0.0202,  0.0513,  0.0254,  ..., -0.0245,  0.0102,  0.0025],
        [-0.0615,  0.0576,  0.0045,  ..., -0.0148,  0.0280,  0.0232],
        ...,
        [ 0.0613, -0.0517, -0.0307,  ..., -0.0447, -0.0441, -0.0395],
        [-0.0430, -0.0601,  0.0365,  ..., -0.0451, -0.0450,  0.0236],
        [-0.0313,  0.0555,  0.0523,  ...,  0.0231, -0.0133, -0.0339]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
