In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# tensor

In [13]:
v = torch.tensor([1, 2, 3])
vi = v.clone()
vii = v.reshape((1, 3))
vi.fill_(0)
print(v, v.storage().data_ptr())
print(vi, vi.storage().data_ptr())
print(vii, vii.storage().data_ptr())

a = torch.randn(2, 2, 2, 2)
print(a)
# a[0, [0, 1, 1, 0], 0:2, 0:2] = 0      # right
# a[0, [0, 1, 1, 0], [0, 0], [0, 1]] = 0  # error
a[0, [0, 1, 1, 0], [0, 0, 1, 1], [0, 1, 0, 1]] = 0
print(a)

tensor([1, 2, 3]) 140242842820928
tensor([0, 0, 0]) 140242843457408
tensor([[1, 2, 3]]) 140242842820928
tensor([[[[ 1.0656,  0.6203],
          [ 0.2044,  0.1177]],

         [[ 1.0796, -0.0443],
          [-1.2144,  1.4232]]],


        [[[-0.0511, -0.0465],
          [-0.1236, -1.0207]],

         [[ 0.9134, -0.3741],
          [-0.6077,  1.1833]]]])
tensor([[[[ 0.0000,  0.6203],
          [ 0.2044,  0.0000]],

         [[ 1.0796,  0.0000],
          [ 0.0000,  1.4232]]],


        [[[-0.0511, -0.0465],
          [-0.1236, -1.0207]],

         [[ 0.9134, -0.3741],
          [-0.6077,  1.1833]]]])


## Dimension

In [25]:
v = torch.tensor([[1., 2., 3.]])
d_v = torch.tensor([[2.], [1.], [2.]])
print(v / d_v)

vi = torch.tensor([[1., 2., 3.],
                   [2., 4., 5.]])
d_vi = torch.tensor([[1.], [2.]])
print(vi / d_vi)

tensor([[0.5000, 1.0000, 1.5000],
        [1.0000, 2.0000, 3.0000],
        [0.5000, 1.0000, 1.5000]])
tensor([[1.0000, 2.0000, 3.0000],
        [1.0000, 2.0000, 2.5000]])


# Autograd

In [35]:
class X:
    
    def __init__(self):
        super(X, self).__init__()
        self.x = torch.ones((2, 2), requires_grad=True)
    
    def forward(self):
        y = self.x + 3
        z = y * y * 3
        out = z.mean(dim=1)
        
        return out

x = X()
for _ in range(3):
    print(x.x)
    out = x.forward()
    print(out)
#     x.x.zero_grad()
    out.backward(torch.tensor([1.0, 1.0]))
    print(x.x.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([48., 48.], grad_fn=<MeanBackward1>)
tensor([[12., 12.],
        [12., 12.]])
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([48., 48.], grad_fn=<MeanBackward1>)
tensor([[24., 24.],
        [24., 24.]])
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([48., 48.], grad_fn=<MeanBackward1>)
tensor([[36., 36.],
        [36., 36.]])


# About Layers

## Batchnorm

In [41]:
x = torch.tensor([[[[1., 2., 3.],
                    [1., 2., 3.],
                    [1., 2., 3.]],
                  
                   [[2., 2., 2.],
                    [3., 1., 1.],
                    [5., 4., 3.]],
                  
                   [[1., 1., 1.],
                    [5., 6., 8.],
                    [9., 0., 1.]]]])
mean_c = x.mean(dim=(0, 2, 3), keepdim=True)
# std_c = x.std(dim=(0, 2, 3), keepdim=True)
std_c = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True)
print("x:")
print(x)
print("x channel mean:")
print(mean_c)
print("x channel std:")
print(std_c)
print((x - mean_c) / torch.sqrt(std_c + 1e-5))


v = nn.BatchNorm2d(3, affine=False)
# # for p in v.named_parameters():
# #     print(p)
# # for b in v.named_buffers():
# #     print(b)
y = v(x)
print("batch norm forward:")
print(y)

x:
tensor([[[[1., 2., 3.],
          [1., 2., 3.],
          [1., 2., 3.]],

         [[2., 2., 2.],
          [3., 1., 1.],
          [5., 4., 3.]],

         [[1., 1., 1.],
          [5., 6., 8.],
          [9., 0., 1.]]]])
x channel mean:
tensor([[[[2.0000]],

         [[2.5556]],

         [[3.5556]]]])
x channel std:
tensor([[[[ 0.6667]],

         [[ 1.5802]],

         [[10.6914]]]])
tensor([[[[-1.2247,  0.0000,  1.2247],
          [-1.2247,  0.0000,  1.2247],
          [-1.2247,  0.0000,  1.2247]],

         [[-0.4419, -0.4419, -0.4419],
          [ 0.3536, -1.2374, -1.2374],
          [ 1.9445,  1.1490,  0.3536]],

         [[-0.7816, -0.7816, -0.7816],
          [ 0.4418,  0.7476,  1.3593],
          [ 1.6651, -1.0874, -0.7816]]]])
batch norm forward:
tensor([[[[-1.2247,  0.0000,  1.2247],
          [-1.2247,  0.0000,  1.2247],
          [-1.2247,  0.0000,  1.2247]],

         [[-0.4419, -0.4419, -0.4419],
          [ 0.3536, -1.2374, -1.2374],
          [ 1.9445,  1.1490,  0

# About nn.Module class

In [2]:
class ConvNet(nn.Module):
    
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_feats = 1
        for s in size:
            num_feats *= s
        return num_feats

In [3]:
net = ConvNet()
print(net)

# params = list(net.parameters())
# print(len(params))
# for i in range(len(params)):
#     print(params[i].size())

# for param in net.parameters():
#     print(type(param))

ConvNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## nn.Module attribute

In [4]:
class Sub(nn.Module):
    
    def __init__(self):
        super(Sub, self).__init__()
        
        self.fc1 = nn.Linear(3, 7)
    
    def forward(self, x):
        out = self.fc1(x)
        
        return out

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(3, 5)
        self.fc2 = nn.Linear(5, 3)
        self.fc3 = Sub()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
        
        return out

net = Net()
print("===> Net architecture:")
print(net)

print("===> buffers:")
for buf in net.buffers():
    print("---")
    print(type(buf), buf.size())

print("===> children:")
for chd in net.children():
    print(type(chd), chd)

print("===> named_children:")
for n, n_chd in net.named_children():
    print(n, f'-> ({type(n)}), ', n_chd)
    
print("===> modules:")
for modu in net.modules():
    print(modu, isinstance(modu, nn.Linear))

print("===> parameters:")
for para in net.parameters():
    print(para)
    
print("===> named_parameters:")
init_flag = True
for n, para in net.named_parameters(recurse=True):
    print(n, f'-> ({type(n)})')
    if init_flag:
        if n.endswith(".bias"):
            nn.init.constant_(para, 0.0)
    print(para)

# bad impelement
print("===> initial test:")
for chd in net.children():
    print(isinstance(chd, nn.Linear))
    if isinstance(chd, nn.Linear):
        for para_name, para in chd.named_parameters():
            print(para_name)
            if para_name.endswith(".weight"):
                nn.init.constant_(m.weight, 0.0)
                print(m.weight)
            if para_name.endswith(".bias"):
                nn.init.constant_(m.bias, 0.0)

===> Net architecture:
Net(
  (fc1): Linear(in_features=3, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Sub(
    (fc1): Linear(in_features=3, out_features=7, bias=True)
  )
)
===> buffers:
===> children:
<class 'torch.nn.modules.linear.Linear'> Linear(in_features=3, out_features=5, bias=True)
<class 'torch.nn.modules.linear.Linear'> Linear(in_features=5, out_features=3, bias=True)
<class '__main__.Sub'> Sub(
  (fc1): Linear(in_features=3, out_features=7, bias=True)
)
===> named_children:
fc1 -> (<class 'str'>),  Linear(in_features=3, out_features=5, bias=True)
fc2 -> (<class 'str'>),  Linear(in_features=5, out_features=3, bias=True)
fc3 -> (<class 'str'>),  Sub(
  (fc1): Linear(in_features=3, out_features=7, bias=True)
)
===> modules:
Net(
  (fc1): Linear(in_features=3, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=3, bias=True)
  (fc3): Sub(
    (fc1): Linear(in_features=3, out_features=7, bias=True)
  )
) False

## initialization

In [25]:
class Neti(nn.Module):
    
    def __init__(self):
        super(Neti, self).__init__()
        
        self.fc1 = nn.Linear(2, 2)
        self.fc2 = nn.Linear(2, 2, bias=False)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        
        return out

def _init_weight(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)


neti = Neti()

print("===> hasattr:")
for chld_name, chld in neti.named_children():
        print(f'{chld_name} -->', hasattr(chld, "bias"))
        
print("===> apply fn:")
neti.apply(_init_weight)
for para in neti.parameters():
    print(para)

===> hasattr:
fc1 --> True
fc2 --> True
===> apply fn:
Parameter containing:
tensor([[-0.2439,  0.4455],
        [-0.5618, -0.2808]], requires_grad=True)
Parameter containing:
tensor([0., 0.], requires_grad=True)
Parameter containing:
tensor([[-0.0714, -0.1229],
        [-0.3134, -0.4178]], requires_grad=True)


# About Loss

In [4]:
inpt = torch.randn(1, 1, 32, 32)
out = net(inpt)
print(out)
y = torch.randn(10).view(1, -1)
print(y)

res = torch.mean(torch.pow(out-y, 2))
print(res)

criterion = nn.MSELoss()
loss = criterion(out, y)
print(loss)

tensor([[ 0.0113,  0.0475, -0.0518,  0.0591,  0.0167,  0.0898,  0.1361, -0.1081,
          0.0605,  0.0512]], grad_fn=<AddmmBackward>)
tensor([[-1.1377, -0.5150, -1.2655,  0.6322,  0.5224, -2.1549, -0.8694,  0.9398,
          1.4885,  0.6093]])
tensor(1.3192, grad_fn=<MeanBackward0>)
tensor(1.3192, grad_fn=<MseLossBackward>)
