In [1]:
%load_ext autotime

time: 0 ns


In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import numpy as np
import torch.optim as optim


time: 4.73 s


### 搭建模型

In [4]:
class LinearBNAC(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, dropout=0.3, is_output=False):
        super(LinearBNAC, self).__init__()
        if is_output and out_channels==1:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Sigmoid()
            )
        elif is_output:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Softmax(dim=1)
            )   
        else:
            self.linear = nn.Sequential(
                nn.Linear(in_channels, out_channels, bias=bias),
                nn.Dropout(dropout),
                nn.BatchNorm1d(out_channels),
                nn.LeakyReLU(inplace=True)
            )
            
    def forward(self, x):
        out=self.linear(x)
        return out

time: 0 ns


In [5]:
class Model(nn.Module):
    def __init__(self, input_dimention, output_classes=1):
        super(Model, self).__init__()
        self.layer1 = LinearBNAC(input_dimention, 128)
        self.layer2 = LinearBNAC(128, 64)
        self.layer3 = LinearBNAC(64, 32)
        self.output = LinearBNAC(32, output_classes, is_output=True)
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output(x)
        return x 
        

time: 0 ns


### 準備輸入資料、優化器、標籤資料、模型輸出

In [6]:
model = Model(input_dimention=256,output_classes=10)
# optimizer = "使用Adam optimizer"
optimizer = optim.Adam(params=model.parameters(), lr=0.001, weight_decay=1e-3)

time: 47 ms


In [19]:
batch_size = 4
input_features = 256
dummy_input = torch.randn(batch_size, input_features,)

target = torch.empty(4, 10, dtype=torch.float).random_(10)
#target = torch.tensor([9, 5, 4, 4], dtype=torch.long)

time: 0 ns


In [9]:
output = model(dummy_input)
print(output)

tensor([[0.0612, 0.0970, 0.1042, 0.1053, 0.0973, 0.0761, 0.1358, 0.1142, 0.1030,
         0.1059],
        [0.1192, 0.0771, 0.0395, 0.1361, 0.1230, 0.1113, 0.0631, 0.1663, 0.0441,
         0.1202],
        [0.1123, 0.0838, 0.0673, 0.1095, 0.0904, 0.1455, 0.1139, 0.1140, 0.0421,
         0.1213],
        [0.0971, 0.1054, 0.0821, 0.0589, 0.0570, 0.1163, 0.1232, 0.1385, 0.0512,
         0.1702]], grad_fn=<SoftmaxBackward>)
time: 78 ms


In [10]:
output.shape

torch.Size([4, 10])

time: 16 ms


### 計算 CrossEntropy Loss
* 請注意哪一個 Loss最適合：我們已經使用 softmax
* 因為我們有使用dropout，並隨機產生dummy_input，所以各為學員得到的值會與解答不同，然而步驟原理需要相同

In [13]:
from torch.nn import CrossEntropyLoss

time: 0 ns


In [14]:
criterion = CrossEntropyLoss()

time: 0 ns


In [16]:
target

tensor([9., 7., 6., 4., 3., 1., 2., 8., 2., 3.])

time: 16 ms


In [30]:
torch.log(output).reshape(-1)

tensor([-2.7932, -2.3326, -2.2615, -2.2510, -2.3298, -2.5763, -1.9964, -2.1700,
        -2.2730, -2.2455, -2.1271, -2.5627, -3.2326, -1.9942, -2.0954, -2.1953,
        -2.7626, -1.7939, -3.1202, -2.1187, -2.1862, -2.4790, -2.6991, -2.2119,
        -2.4035, -1.9277, -2.1729, -2.1715, -3.1679, -2.1099, -2.3325, -2.2502,
        -2.4992, -2.8314, -2.8642, -2.1512, -2.0936, -1.9772, -2.9717, -1.7709],
       grad_fn=<ViewBackward>)

time: 0 ns


In [33]:
target.shape

torch.Size([4, 10])

time: 0 ns


In [35]:
output.shape

torch.Size([4, 10])

time: 0 ns


In [31]:
loss = criterion(torch.log(output), target)

RuntimeError: 1D target tensor expected, multi-target not supported

time: 16 ms


### 完成back propagation並更新梯度

In [37]:
"自行輸入"

In [38]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0010, -0.0383, -0.0060,  ...,  0.0358, -0.0245, -0.0277],
        [-0.0432, -0.0382,  0.0384,  ...,  0.0360,  0.0118,  0.0447],
        [-0.0276, -0.0522, -0.0146,  ...,  0.0208,  0.0569,  0.0154],
        ...,
        [ 0.0122, -0.0478,  0.0501,  ..., -0.0181, -0.0309, -0.0114],
        [ 0.0263,  0.0399,  0.0579,  ..., -0.0354, -0.0192, -0.0358],
        [-0.0495, -0.0101,  0.0543,  ...,  0.0428,  0.0069,  0.0212]],
       requires_grad=True)


grad : tensor([[ 0.0834, -0.0256, -0.0764,  ...,  0.0718,  0.0144,  0.0422],
        [ 0.1144, -0.0429, -0.0292,  ..., -0.0337,  0.0184, -0.0535],
        [-0.2403,  0.1393,  0.2140,  ..., -0.1664, -0.1559, -0.0831],
        ...,
        [-0.2777,  0.1198,  0.1241,  ..., -0.1771,  0.0594, -0.0452],
        [-0.0297,  0.1155, -0.0428,  ...,  0.0807, -0.1282,  0.0959],
        [ 0.0055, -0.0410, -0.0648,  ...,  0.0877,  0.0826,  0.0698]])


In [39]:
"自行輸入"

In [40]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0020, -0.0373, -0.0050,  ...,  0.0348, -0.0255, -0.0287],
        [-0.0442, -0.0372,  0.0394,  ...,  0.0370,  0.0108,  0.0457],
        [-0.0266, -0.0532, -0.0156,  ...,  0.0218,  0.0579,  0.0164],
        ...,
        [ 0.0132, -0.0488,  0.0491,  ..., -0.0171, -0.0319, -0.0104],
        [ 0.0273,  0.0389,  0.0589,  ..., -0.0364, -0.0182, -0.0368],
        [-0.0505, -0.0091,  0.0553,  ...,  0.0418,  0.0059,  0.0202]],
       requires_grad=True)


grad : tensor([[ 0.0834, -0.0256, -0.0764,  ...,  0.0718,  0.0144,  0.0422],
        [ 0.1144, -0.0429, -0.0292,  ..., -0.0337,  0.0184, -0.0535],
        [-0.2403,  0.1393,  0.2140,  ..., -0.1664, -0.1559, -0.0831],
        ...,
        [-0.2777,  0.1198,  0.1241,  ..., -0.1771,  0.0594, -0.0452],
        [-0.0297,  0.1155, -0.0428,  ...,  0.0807, -0.1282,  0.0959],
        [ 0.0055, -0.0410, -0.0648,  ...,  0.0877,  0.0826,  0.0698]])


### 清空 gradient

In [41]:
"自行輸入"

In [42]:
print('weight : {}'.format(model.layer1.linear[0].weight))
print('\n')
print('grad : {}'.format(model.layer1.linear[0].weight.grad))

weight : Parameter containing:
tensor([[-0.0020, -0.0373, -0.0050,  ...,  0.0348, -0.0255, -0.0287],
        [-0.0442, -0.0372,  0.0394,  ...,  0.0370,  0.0108,  0.0457],
        [-0.0266, -0.0532, -0.0156,  ...,  0.0218,  0.0579,  0.0164],
        ...,
        [ 0.0132, -0.0488,  0.0491,  ..., -0.0171, -0.0319, -0.0104],
        [ 0.0273,  0.0389,  0.0589,  ..., -0.0364, -0.0182, -0.0368],
        [-0.0505, -0.0091,  0.0553,  ...,  0.0418,  0.0059,  0.0202]],
       requires_grad=True)


grad : tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
