# `nn.Linear`

REF1: https://ckmarkoh.github.io/blog/2016/12/19/torch-nn-tutorial-1-nn-module/

In [10]:
import torch
import torch.nn as nn

m = nn.Linear(5, 10)
input = torch.randn(128, 5)
output = m(input)
print(output.size())

torch.Size([128, 10])


In [11]:
m.bias

Parameter containing:
tensor([ 0.1969,  0.2371,  0.3612,  0.2130, -0.3776,  0.1876, -0.2564, -0.3319,
         0.2288, -0.3284], requires_grad=True)

In [13]:
m.weight

Parameter containing:
tensor([[-0.0962,  0.2057,  0.4163, -0.3912,  0.4219],
        [-0.1839,  0.2085, -0.1107,  0.0592,  0.4059],
        [-0.3353, -0.0965, -0.2885, -0.3037,  0.1759],
        [-0.2194,  0.1496,  0.2203, -0.2606,  0.2971],
        [ 0.0797, -0.3582,  0.3527,  0.3037, -0.2373],
        [-0.3618, -0.3244, -0.0241, -0.2118, -0.1215],
        [ 0.0011,  0.2425, -0.1974, -0.2197, -0.1732],
        [-0.0120, -0.3694,  0.1556,  0.4180,  0.3679],
        [-0.3019,  0.0101,  0.3345,  0.3555,  0.0627],
        [-0.3992,  0.4079, -0.1096, -0.0224, -0.3396]], requires_grad=True)

In [15]:
t = torch.randn(1, 5)
y = m.forward(t)
y

tensor([[ 1.2996, -0.0411, -0.1183,  0.8387, -0.0985,  0.1079, -0.2075, -0.8026,
          0.5888, -0.0592]], grad_fn=<AddmmBackward0>)

# `nn.dropout`

REF1: https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html

nn.droput(p:float, inplace:bool):

+ p: the probability of an element to be zeroed
+ in-place: directly on the values in the same memory locations

In [19]:
import torch
import torch.nn as nn

m = nn.Dropout(p=0.2)
input = torch.randn(5, 5)
print(input)
output = m(input)
output

tensor([[-0.2997, -0.2020, -0.5611, -0.4798,  0.3730],
        [-0.8609,  0.7355,  0.3023, -1.2100,  0.5945],
        [ 1.2881, -0.3667, -0.7829,  0.3815, -1.5687],
        [-0.7829,  0.7235,  0.2427, -0.6216,  0.8851],
        [ 0.9593,  0.0796,  1.8839,  0.3918,  0.2607]])


tensor([[-0.3746, -0.2525, -0.0000, -0.5997,  0.0000],
        [-1.0761,  0.0000,  0.3778, -1.5125,  0.7431],
        [ 1.6101, -0.0000, -0.9786,  0.0000, -0.0000],
        [-0.9787,  0.9044,  0.3034, -0.7770,  1.1064],
        [ 1.1992,  0.0995,  0.0000,  0.4897,  0.3259]])

# `view`

In [59]:
import torch

torch.manual_seed(1)
t = torch.randn(2, 3, 20)
t

tensor([[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002, -0.6092, -0.9798,
          -1.6091, -0.7121,  0.3037, -0.7773, -0.2515, -0.2223,  1.6871,
           0.2284,  0.4676, -0.6970, -1.1608,  0.6995,  0.1991],
         [ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017, -0.1759, -2.2456,
          -1.4465,  0.0612, -0.6177, -0.7981, -0.1316,  1.8793, -0.0721,
           0.1578, -0.7735,  0.1991,  0.0457,  0.1530, -0.4757],
         [-0.1110,  0.2927, -0.1578, -0.0288,  2.3571, -1.0373,  1.5748,
          -0.6298, -0.9274,  0.5451,  0.0663, -0.4370,  0.7626,  0.4415,
           1.1651,  2.0154,  0.1374,  0.9386, -0.1860, -0.6446]],

        [[ 1.5392, -0.8696, -3.3312, -0.7479, -0.0255, -1.0233, -0.5962,
          -1.0055, -0.2106, -0.0075,  1.6734,  0.0103, -0.7040, -0.1853,
          -0.9962, -0.8313, -0.4610, -0.5601,  0.3956, -0.9823],
         [-0.5065,  0.0998, -0.6540,  0.7317, -1.4344, -0.5008,  0.1716,
          -0.1600,  0.2546, -0.5020, -1.0412,  0.7323, -1.0483, -0.4709,
        

In [60]:
x = t.view(2, 3, 4, 5)
x

tensor([[[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002],
          [-0.6092, -0.9798, -1.6091, -0.7121,  0.3037],
          [-0.7773, -0.2515, -0.2223,  1.6871,  0.2284],
          [ 0.4676, -0.6970, -1.1608,  0.6995,  0.1991]],

         [[ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017],
          [-0.1759, -2.2456, -1.4465,  0.0612, -0.6177],
          [-0.7981, -0.1316,  1.8793, -0.0721,  0.1578],
          [-0.7735,  0.1991,  0.0457,  0.1530, -0.4757]],

         [[-0.1110,  0.2927, -0.1578, -0.0288,  2.3571],
          [-1.0373,  1.5748, -0.6298, -0.9274,  0.5451],
          [ 0.0663, -0.4370,  0.7626,  0.4415,  1.1651],
          [ 2.0154,  0.1374,  0.9386, -0.1860, -0.6446]]],


        [[[ 1.5392, -0.8696, -3.3312, -0.7479, -0.0255],
          [-1.0233, -0.5962, -1.0055, -0.2106, -0.0075],
          [ 1.6734,  0.0103, -0.7040, -0.1853, -0.9962],
          [-0.8313, -0.4610, -0.5601,  0.3956, -0.9823]],

         [[-0.5065,  0.0998, -0.6540,  0.7317, -1.4344],
          [-0.5008,  

In [62]:
# REF: https://stackoverflow.com/questions/48915810/what-does-contiguous-do-in-pytorch
# contiguous: make a deep copy of the tensor 
x.transpose(1, 2).contiguous()

tensor([[[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002],
          [ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017],
          [-0.1110,  0.2927, -0.1578, -0.0288,  2.3571]],

         [[-0.6092, -0.9798, -1.6091, -0.7121,  0.3037],
          [-0.1759, -2.2456, -1.4465,  0.0612, -0.6177],
          [-1.0373,  1.5748, -0.6298, -0.9274,  0.5451]],

         [[-0.7773, -0.2515, -0.2223,  1.6871,  0.2284],
          [-0.7981, -0.1316,  1.8793, -0.0721,  0.1578],
          [ 0.0663, -0.4370,  0.7626,  0.4415,  1.1651]],

         [[ 0.4676, -0.6970, -1.1608,  0.6995,  0.1991],
          [-0.7735,  0.1991,  0.0457,  0.1530, -0.4757],
          [ 2.0154,  0.1374,  0.9386, -0.1860, -0.6446]]],


        [[[ 1.5392, -0.8696, -3.3312, -0.7479, -0.0255],
          [-0.5065,  0.0998, -0.6540,  0.7317, -1.4344],
          [ 1.3851, -0.8138, -0.9276,  1.1120,  0.1573]],

         [[-1.0233, -0.5962, -1.0055, -0.2106, -0.0075],
          [-0.5008,  0.1716, -0.1600,  0.2546, -0.5020],
          [ 1.2540,

In [66]:
x.transpose(1, 2).transpose(1, 2).contiguous().view(2, -1, 20)

tensor([[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002, -0.6092, -0.9798,
          -1.6091, -0.7121,  0.3037, -0.7773, -0.2515, -0.2223,  1.6871,
           0.2284,  0.4676, -0.6970, -1.1608,  0.6995,  0.1991],
         [ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017, -0.1759, -2.2456,
          -1.4465,  0.0612, -0.6177, -0.7981, -0.1316,  1.8793, -0.0721,
           0.1578, -0.7735,  0.1991,  0.0457,  0.1530, -0.4757],
         [-0.1110,  0.2927, -0.1578, -0.0288,  2.3571, -1.0373,  1.5748,
          -0.6298, -0.9274,  0.5451,  0.0663, -0.4370,  0.7626,  0.4415,
           1.1651,  2.0154,  0.1374,  0.9386, -0.1860, -0.6446]],

        [[ 1.5392, -0.8696, -3.3312, -0.7479, -0.0255, -1.0233, -0.5962,
          -1.0055, -0.2106, -0.0075,  1.6734,  0.0103, -0.7040, -0.1853,
          -0.9962, -0.8313, -0.4610, -0.5601,  0.3956, -0.9823],
         [-0.5065,  0.0998, -0.6540,  0.7317, -1.4344, -0.5008,  0.1716,
          -0.1600,  0.2546, -0.5020, -1.0412,  0.7323, -1.0483, -0.4709,
        