# PyTorch Documentation

### torch.Tensor vs. torch.tensor

In [None]:
# torch.Tensor 
# is a class.
# if input data is torch.Tensor, shares the memory space.
# if input data is list or numpy, copies the data.
# transforms input data into 32-bit floating point data type.
# also refers to the torch.Tensor data type??

# torch.tensor 
# is a function.
# always copies input data.
# infers input data's data type, unless specified by dtype.

In [11]:
import torch

# torch.Tensor - torch.Tensor input
orig_data = torch.Tensor([1])

new_data = torch.Tensor(orig_data)
print(orig_data, new_data)

orig_data[0] = 2
print(orig_data, new_data)

tensor([1.]) tensor([1.])
tensor([2.]) tensor([2.])


In [12]:
# torch.Tensor - list, numpy input
orig_data = [1]

new_data = torch.Tensor(orig_data)
print(orig_data, new_data)

orig_data[0] = 2
print(orig_data, new_data)

[1] tensor([1.])
[2] tensor([1.])


In [18]:
# torch.tensor - torch.Tensor input

orig_data = torch.tensor([1])
# orig_data = torch.Tensor([1]) # 처음 declare 할땐 뭐 쓰지? 노상관?
new_data = torch.tensor(orig_data)
print(orig_data, new_data)

orig_data[0] = 2
print(orig_data, new_data)

# 워닝 내용은 대충 if you want to avoid a copy, use these functions 인듯.

tensor([1.]) tensor([1.])
tensor([2.]) tensor([1.])


  new_data = torch.tensor(orig_data)


In [23]:
t = torch.Tensor([1])
type(t)

torch.Tensor

In [24]:
t = torch.Tensor([1])
type(t)

torch.Tensor

### index_select
### axis

In [50]:
# index_select

import torch

A = torch.Tensor([[1, 2],
                  [3, 4]])

# TODO : [1, 3]을 만드세요!

index = torch.tensor([0]) # must use int type for index
output = torch.index_select(A, 1, index)
print(output)

output = output.reshape(-1)
# output = output.view(-1) # tf에선 reshape인데 걍 reshape 쓰면 안됨?
print(output)

tensor([[1.],
        [3.]])
tensor([1., 3.])


In [33]:
output = A[:, 0] # 이걸로도 가능
output

tensor([1., 3.])

In [34]:
# axis test

output = torch.index_select(A, 0, index)
output

tensor([[1., 2.]])

In [44]:
# axis test - 3D

A = torch.arange(1, 9)
A = A.reshape(2, 2, 2)
A

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [52]:
output = torch.index_select(A, 0, index)
output

# check the axis orientation - from [1] to [5]
# this is the 'most complicated' axis.

tensor([[1., 2.]])

In [47]:
output = torch.index_select(A, 1, index)
output

# check the axis orientation - from [1] to [3]

tensor([[[1, 2]],

        [[5, 6]]])

In [48]:
output = torch.index_select(A, 2, index)
output

# check the axis orientation - from [1] to [2]
# this is the 'simplest' axis.

tensor([[[1],
         [3]],

        [[5],
         [7]]])

In [71]:
A = torch.arange(1, 25)
A = A.reshape(4, 3, 2)
print(A.shape)
print(A)

# axis 0: 4
# axis 1: 3
# axis 2: 2

torch.Size([4, 3, 2])
tensor([[[ 1,  2],
         [ 3,  4],
         [ 5,  6]],

        [[ 7,  8],
         [ 9, 10],
         [11, 12]],

        [[13, 14],
         [15, 16],
         [17, 18]],

        [[19, 20],
         [21, 22],
         [23, 24]]])


### gather
### view, reshape
### expand, repeat
### unsqueeze, squeeze

In [57]:
# 2D gather

A = torch.Tensor([[1, 2],
                  [3, 4]])

index = torch.tensor([[0],
                      [1]])
output = torch.gather(A, 1, index)
print(output)

output = output.reshape(-1)
print(output)

# 'from [1] to [3]' axis 의 방향으로 첫번째 라인에서 index 0, 
# 1번째 라인에서 index 1에 해당하는 값들.

tensor([[1.],
        [4.]])
tensor([1., 4.])


In [58]:
# 3D gather

A = torch.Tensor([[[1, 2],
                   [3, 4]],
                  
                  [[5, 6],
                   [7, 8]]])

index = torch.tensor([[[0],
                       [1]],
                      [[0],
                       [1]]])
output = torch.gather(A, 2, index)
print(output)
# axis가 2임. The simplest axis. 'from [1] to [2]' axis.
# 총 4개의 라인에 대해 index 0, 1, 0, 1에 해당하는 값을 gather.

output = output.reshape(2, 2)
print(output)

tensor([[[1.],
         [4.]],

        [[5.],
         [8.]]])
tensor([[1., 4.],
        [5., 8.]])


In [147]:
# 3D gather - arbitrary size

# TODO : 임의의 크기의 3D tensor에서 대각선 요소 가져와 2D로 반환하는 함수를 만드세요! 
def get_diag_element_3D(A):
    
    C, H, W = A.shape
    diag_size = min(H, W) 
    # C axis (channel axis = 0-axis = depth axis) is fixed.
    # H-W plane에 diagonal line을 그리는 것.
    
    # gather_index = torch.arange(diag_size).view(diag_size, -1).expand(C, diag_size, 1)
    gather_index = torch.arange(diag_size).reshape(diag_size, 1).expand(C, diag_size, 1)
    # view vs. reshape: reshape doesn't impose any contiguity constraints,
    # but also doesn't gunarantee data sharing. 
    print('gather_index:')
    print(gather_index)
    
    output = torch.gather(A, 2, gather_index)
    output = output.view(C, diag_size)

    return output

C = 1
H = 2
W = 3

A = torch.tensor([i for i in range(1, C*H*W + 1)])
A = A.reshape(C, H, W)
print(A)

A = get_diag_element_3D(A)
print(A)

tensor([[[1, 2, 3],
         [4, 5, 6]]])
gather_index:
tensor([[[0],
         [1]]])
tensor([[1, 5]])


In [148]:
# 아래 코드는 수정하실 필요가 없습니다!
A = torch.tensor([[[1]]])

if torch.all(get_diag_element_3D(A) == torch.Tensor([[1]])):
    print("🎉🎉🎉 성공!!! 🎉🎉🎉")
else:
    print("🦆 다시 도전해봐요!")

gather_index:
tensor([[[0]]])
🎉🎉🎉 성공!!! 🎉🎉🎉


In [149]:
# 아래 코드는 수정하실 필요가 없습니다!
A = torch.Tensor([[[1, 2],
                   [3, 4]],
                  [[5, 6],
                   [7, 8]]])

if torch.all(get_diag_element_3D(A) == torch.Tensor([[1, 4],
                                                     [5, 8]])):
    print("🎉🎉🎉 성공!!! 🎉🎉🎉")
else:
    print("🦆 다시 도전해봐요!")

gather_index:
tensor([[[0],
         [1]],

        [[0],
         [1]]])
🎉🎉🎉 성공!!! 🎉🎉🎉


In [150]:
# 아래 코드는 수정하실 필요가 없습니다!
A = torch.Tensor([[[1, 2, 3],
                   [4, 5, 6]]])

if torch.all(get_diag_element_3D(A) == torch.Tensor([[1, 5]])):
    print("🎉🎉🎉 성공!!! 🎉🎉🎉")
else:
    print("🦆 다시 도전해봐요!")

gather_index:
tensor([[[0],
         [1]]])
🎉🎉🎉 성공!!! 🎉🎉🎉


In [151]:
# 아래 코드는 수정하실 필요가 없습니다!
A = torch.tensor([[[ 1,  2,  3,  4,  5],
                   [ 6,  7,  8,  9, 10],
                   [11, 12, 13, 14, 15]],
          
                  [[16, 17, 18, 19, 20],
                   [21, 22, 23, 24, 25],
                   [26, 27, 28, 29, 30]]])

if torch.all(get_diag_element_3D(A) == torch.Tensor([[ 1,  7, 13],
                                                     [16, 22, 28]])):
    print("🎉🎉🎉 성공!!! 🎉🎉🎉")
else:
    print("🦆 다시 도전해봐요!")

gather_index:
tensor([[[0],
         [1],
         [2]],

        [[0],
         [1],
         [2]]])
🎉🎉🎉 성공!!! 🎉🎉🎉


In [152]:
# 아래 코드는 수정하실 필요가 없습니다!
A = torch.tensor([[[ 1,  2,  3],
                   [ 4,  5,  6],
                   [ 7,  8,  9],
                   [10, 11, 12],
                   [13, 14, 15]],
        
                  [[16, 17, 18],
                   [19, 20, 21],
                   [22, 23, 24],
                   [25, 26, 27],
                   [28, 29, 30]],
        
                  [[31, 32, 33],
                   [34, 35, 36],
                   [37, 38, 39],
                   [40, 41, 42],
                   [43, 44, 45]]])

if torch.all(get_diag_element_3D(A) == torch.Tensor([[ 1,  5,  9],
                                                     [16, 20, 24],
                                                     [31, 35, 39]])):
    print("🎉🎉🎉 성공!!! 🎉🎉🎉")
else:
    print("🦆 다시 도전해봐요!")

gather_index:
tensor([[[0],
         [1],
         [2]],

        [[0],
         [1],
         [2]],

        [[0],
         [1],
         [2]]])
🎉🎉🎉 성공!!! 🎉🎉🎉


In [153]:
A = torch.arange(1, 9)
A = A.reshape(2, 2, 2)
print(A.size())
print(A.shape)

torch.Size([2, 2, 2])
torch.Size([2, 2, 2])


In [157]:
# gather_index analysis

C = 1
H = 2
W = 3

diag_size = min(H, W) 
    
# A = torch.tensor([i for i in range(1, C*H*W + 1)])
# A = A.reshape(C, H, W)

gather_index = torch.arange(diag_size)
print(gather_index)

gather_index = torch.arange(diag_size).reshape(diag_size, 1)
print(gather_index)

gather_index = torch.arange(diag_size).reshape(diag_size, 1).expand(C, diag_size, 1)
print(gather_index)

# output = torch.gather(A, 2, gather_index)    
# output = output.view(C, diag_size)

# print(output)

tensor([0, 1])
tensor([[0],
        [1]])
tensor([[[0],
         [1]]])


In [156]:
# expand vs. repeat

t = torch.arange(1, 3)
t = t.reshape(2, 1)
print(t)

# expand
print(t.expand(2, 3)) # expand as a specified shape.
print(t.expand(-1, 4)) # -1: do not change this axis.

# repeat
print(t.repeat(2, 3)) # repeat as many times as.

tensor([[1],
        [2]])
tensor([[1, 1, 1],
        [2, 2, 2]])
tensor([[1, 1, 1, 1],
        [2, 2, 2, 2]])
tensor([[1, 1, 1],
        [2, 2, 2],
        [1, 1, 1],
        [2, 2, 2]])


In [160]:
# squeeze vs. unsqueeze

t = torch.arange(1, 4)
t = t.reshape(3, 1, 1)
print(t)
t = t.squeeze() # 크기가 1인 차원을 제거
print(t)

tensor([[[1]],

        [[2]],

        [[3]]])
tensor([1, 2, 3])


In [166]:
t = torch.arange(1, 4)
print(t.shape)
print(t)

t = t.unsqueeze(0) # 0-axis is always the 'most complicated' axis
print(t.shape)
print(t)

t = t.unsqueeze(0)
print(t.shape)
print(t)

torch.Size([3])
tensor([1, 2, 3])
torch.Size([1, 3])
tensor([[1, 2, 3]])
torch.Size([1, 1, 3])
tensor([[[1, 2, 3]]])


# nn.Module 클래스

In [167]:
# get two numbers and add them together

import torch
from torch import nn

class Add(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x1, x2):
        output = torch.add(x1, x2)
        
        return output

x1 = torch.tensor([1])
x2 = torch.tensor([2])

add = Add()

output = add(x1, x2)
output

tensor([3])

### Module, nn.ModuleList, nn.ModuleDict

In [168]:
# Sequential
# variable pre-run assignment in __init__()

class Add(nn.Module):
    def __init__(self, value): # value is initiated before run
        super().__init__()
        self.value = value # self.value
    
    def forward(self, x):
        return x + self.value

calculator = nn.Sequential(Add(3), # collection of modules
                           Add(2),
                           Add(5))

x = torch.tensor([1])

output = calculator(x)
output

tensor([11])

In [169]:
# ModuleList

class Add(nn.Module):
    def __init__(self, value):
        super().__init__()
        self.value = value
    
    def forward(self, x):
        return x + self.value

class Calculator(nn.Module):
    def __init__(self):
        super().__init__()
        
        # nn.ModuleList()
        self.add_list = nn.ModuleList([Add(2), Add(3), Add(5)])
        
    def forward(self, x):
        x = self.add_list[1](x)
        x = self.add_list[0](x)
        x = self.add_list[2](x)
        
        return x
    
x = torch.tensor([1])

calculator = Calculator()
output = calculator(x)
output

tensor([11])

In [171]:
# ModuleDict

class Add(nn.Module):
    def __init__(self, value):
        super().__init__()
        self.value = value
    
    def forward(self, x):
        return x + self.value

class Calculator(nn.Module):
    def __init__(self):
        super().__init__()
        self.add_dict = nn.ModuleDict({'add2': Add(2),
                                       'add3': Add(3),
                                       'add5': Add(5)})
        
    def forward(self, x):
        x = self.add_dict['add3'](x)
        x = self.add_dict['add2'](x)
        x = self.add_dict['add5'](x)
        
        return x
    
x = torch.tensor([1])

calculator = Calculator()
output = calculator(x)
output

tensor([11])

In [172]:
import torch
from torch import nn


# Function
class Function_A(nn.Module):
    def __init__(self):
        super().__init__()
        print(f"        Function A Initialized")

    def forward(self, x):
        print(f"        Function A started")
        print(f"        Function A done")

class Function_B(nn.Module):
    def __init__(self):
        super().__init__()
        print(f"        Function B Initialized")

    def forward(self, x):
        print(f"        Function B started")
        print(f"        Function B done")

class Function_C(nn.Module):
    def __init__(self):
        super().__init__()
        print(f"        Function C Initialized")

    def forward(self, x):
        print(f"        Function C started")
        print(f"        Function C done")

class Function_D(nn.Module):
    def __init__(self):
        super().__init__()
        print(f"        Function D Initialized")

    def forward(self, x):
        print(f"        Function D started")
        print(f"        Function D done")


# Layer
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A()
        self.b = Function_B()

        print(f"    Layer AB Initialized")

    def forward(self, x):
        print(f"    Layer AB started")
        self.a(x)
        self.b(x)
        print(f"    Layer AB done")

class Layer_CD(nn.Module):
    def __init__(self):
        super().__init__()

        self.c = Function_C()
        self.d = Function_D()

        print(f"    Layer CD Initialized")

    def forward(self, x):
        print(f"    Layer CD started")
        self.c(x)
        self.d(x)
        print(f"    Layer CD done")


# Model
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        self.cd = Layer_CD()

        print(f"Model ABCD Initialized\n")

    def forward(self, x):
        print(f"Model ABCD started")
        self.ab(x)
        self.cd(x)
        print(f"Model ABCD done\n")


x = torch.tensor([7])

model = Model()
model(x)

print("🎉🎉🎉 모든 딥러닝 모델은 이처럼 Module들이 쌓이고 쌓여서 만들어집니다! 🎉🎉🎉")
print("🎉🎉🎉 흐름을 느껴보시고 이 흐름이 이해가 되신 분은 다음으로 가시면 됩니다! 🎉🎉")

        Function A Initialized
        Function B Initialized
    Layer AB Initialized
        Function C Initialized
        Function D Initialized
    Layer CD Initialized
Model ABCD Initialized

Model ABCD started
    Layer AB started
        Function A started
        Function A done
        Function B started
        Function B done
    Layer AB done
    Layer CD started
        Function C started
        Function C done
        Function D started
        Function D done
    Layer CD done
Model ABCD done

🎉🎉🎉 모든 딥러닝 모델은 이처럼 Module들이 쌓이고 쌓여서 만들어집니다! 🎉🎉🎉
🎉🎉🎉 흐름을 느껴보시고 이 흐름이 이해가 되신 분은 다음으로 가시면 됩니다! 🎉🎉


### Parameter, Buffer, state_dict

In [173]:
# Parameter. 미리 만들어진 tensor들을 nn.Module 안에 보관.

import torch
from torch import nn
from torch.nn.parameter import Parameter

class Linear(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        
        # initialize as parameter
        self.W = Parameter(torch.ones((out_features, in_features)))
        self.b = Parameter(torch.ones(out_features))
        
    def forward(self, x):
        output = torch.addmm(self.b, x, self.W.T)
    
        return output
    
x = torch.Tensor([[1, 2],
                  [3, 4]])

linear = Linear(2, 3)
output = linear(x)
output

# Parameter를 사용해야만 output tensor에 gradient를 계산하는 grad_fn이 생성됨.

tensor([[4., 4., 4.],
        [8., 8., 8.]], grad_fn=<AddmmBackward>)

In [174]:
# state_dict()

linear.state_dict()

OrderedDict([('W',
              tensor([[1., 1.],
                      [1., 1.],
                      [1., 1.]])),
             ('b', tensor([1., 1., 1.]))])

In [5]:
# Buffer. Parameter는 아니지만 저장할 수 있는 tensor.
# 모델을 저장할 때 같이 저장됨.

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.parameter = Parameter(torch.Tensor([7]))
        self.tensor = torch.Tensor([7])
        self.register_buffer('buffer', torch.Tensor([7]), persistent=True)
        
model = Model()

buffer = model.get_buffer('buffer')
print(buffer)
print(model.state_dict())

tensor([7.])
OrderedDict([('parameter', tensor([7.])), ('buffer', tensor([7.]))])


In [6]:
import torch
from torch import nn
from torch.nn.parameter import Parameter


# 아래 코드는 수정하실 필요가 없습니다!
# 하지만 아래 과제를 진행하기 전에 아래 코드를 보면서 최대한 이해해보세요!

# Function
class Function_A(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name

    def forward(self, x):
        x = x * 2
        return x

class Function_B(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = Parameter(torch.Tensor([10]))
        self.W2 = Parameter(torch.Tensor([2]))

    def forward(self, x):
        x = x / self.W1
        x = x / self.W2

        return x

class Function_C(nn.Module):
    def __init__(self):
        super().__init__()
        self.register_buffer('duck', torch.Tensor([7]), persistent=True)

    def forward(self, x):
        x = x * self.duck
        
        return x

class Function_D(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = Parameter(torch.Tensor([3]))
        self.W2 = Parameter(torch.Tensor([5]))
        self.c = Function_C()

    def forward(self, x):
        x = x + self.W1
        x = self.c(x)
        x = x / self.W2

        return x


# Layer
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A('duck')
        self.b = Function_B()

    def forward(self, x):
        x = self.a(x) / 5
        x = self.b(x)

        return x

class Layer_CD(nn.Module):
    def __init__(self):
        super().__init__()

        self.c = Function_C()
        self.d = Function_D()

    def forward(self, x):
        x = self.c(x)
        x = self.d(x) + 1

        return x


# Model
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        self.cd = Layer_CD()

    def forward(self, x):
        x = self.ab(x)
        x = self.cd(x)

        return x

x = torch.tensor([7])

model = Model()
model(x)

tensor([6.5720], grad_fn=<AddBackward0>)

In [14]:
y = (((x*2/5/10/2*7)+3)*7/5)+1
y

tensor([6.5720])

In [7]:
model.state_dict()

OrderedDict([('ab.b.W1', tensor([10.])),
             ('ab.b.W2', tensor([2.])),
             ('cd.c.duck', tensor([7.])),
             ('cd.d.W1', tensor([3.])),
             ('cd.d.W2', tensor([5.])),
             ('cd.d.c.duck', tensor([7.]))])

### named_modules, named_children
### named_parameters, parameters
### named_buffers, buffers

In [8]:
# named_modules()

for name, module in model.named_modules():
    print(f"[ Name ] : {name}\n[ Module ]\n{module}")
    print("-" * 30)

[ Name ] : 
[ Module ]
Model(
  (ab): Layer_AB(
    (a): Function_A()
    (b): Function_B()
  )
  (cd): Layer_CD(
    (c): Function_C()
    (d): Function_D(
      (c): Function_C()
    )
  )
)
------------------------------
[ Name ] : ab
[ Module ]
Layer_AB(
  (a): Function_A()
  (b): Function_B()
)
------------------------------
[ Name ] : ab.a
[ Module ]
Function_A()
------------------------------
[ Name ] : ab.b
[ Module ]
Function_B()
------------------------------
[ Name ] : cd
[ Module ]
Layer_CD(
  (c): Function_C()
  (d): Function_D(
    (c): Function_C()
  )
)
------------------------------
[ Name ] : cd.c
[ Module ]
Function_C()
------------------------------
[ Name ] : cd.d
[ Module ]
Function_D(
  (c): Function_C()
)
------------------------------
[ Name ] : cd.d.c
[ Module ]
Function_C()
------------------------------


In [9]:
# named_children()

for name, child in model.named_children():
    print(f"[ Name ] : {name}\n[ Children ]\n{child}")
    print("-" * 30)

[ Name ] : ab
[ Children ]
Layer_AB(
  (a): Function_A()
  (b): Function_B()
)
------------------------------
[ Name ] : cd
[ Children ]
Layer_CD(
  (c): Function_C()
  (d): Function_D(
    (c): Function_C()
  )
)
------------------------------


In [10]:
# get_submodule()

submodule = model.get_submodule('ab.a')
submodule

Function_A()

In [11]:
# named_parameters()

for name, parameter in model.named_parameters():
    print(f"[ Name ] : {name}\n[ Parameter ]\n{parameter}")
    print("-" * 30)

[ Name ] : ab.b.W1
[ Parameter ]
Parameter containing:
tensor([10.], requires_grad=True)
------------------------------
[ Name ] : ab.b.W2
[ Parameter ]
Parameter containing:
tensor([2.], requires_grad=True)
------------------------------
[ Name ] : cd.d.W1
[ Parameter ]
Parameter containing:
tensor([3.], requires_grad=True)
------------------------------
[ Name ] : cd.d.W2
[ Parameter ]
Parameter containing:
tensor([5.], requires_grad=True)
------------------------------


In [15]:
# get_parameter()

parameter = model.get_parameter('ab.b.W1')
parameter

Parameter containing:
tensor([10.], requires_grad=True)

In [19]:
# named_buffers()

for name, buffer in model.named_buffers():
    print(f"[ Name ] : {name}\n[ Buffer ] : {buffer}")
    print("-" * 30)

[ Name ] : cd.c.duck
[ Buffer ] : tensor([7.])
------------------------------
[ Name ] : cd.d.c.duck
[ Buffer ] : tensor([7.])
------------------------------


In [20]:
# buffers()

for buffer in model.buffers():
    print(f"[ Buffer ] : {buffer}")
    print("-" * 30)

[ Buffer ] : tensor([7.])
------------------------------
[ Buffer ] : tensor([7.])
------------------------------


In [21]:
buffer = model.get_buffer('cd.c.duck')
buffer

tensor([7.])

In [25]:
# extra_repr()

class Function_A(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name

    def forward(self, x):
        x = x * 2
        return x

    def extra_repr(self):
        return f'name={self.name}'
    
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A('duck')
        # self.b = Function_B()

    def forward(self, x):
        x = self.a(x) / 5
        x = self.b(x)

        return x

class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        # self.cd = Layer_CD()

    def forward(self, x):
        x = self.ab(x)
        # x = self.cd(x)

        return x

model = Model()
model

Model(
  (ab): Layer_AB(
    (a): Function_A(name=duck)
  )
)

In [36]:
# parameters(), buffers(), named_buffers()

module = nn.BatchNorm1d(10)

parameter_n = len(list(module.parameters()))
buffer_n = len(list(module.buffers()))

print(parameter_n)
print(buffer_n)

buffer_names = [name for name, _ in module.named_buffers()]
print(buffer_names)

2
3
['running_mean', 'running_var', 'num_batches_tracked']


In [39]:
# named_parameters()

for name, parameter in module.named_parameters():
    print(f"[ Name ] : {name}\n[ Parameter ]\n{parameter}")
    print("-" * 30)

[ Name ] : weight
[ Parameter ]
Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True)
------------------------------
[ Name ] : bias
[ Parameter ]
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
------------------------------


In [40]:
# named_buffers()

for name, buffer in module.named_buffers():
    print(f"[ Name ] : {name}\n[ Buffer ] : {buffer}")
    print("-" * 30)

[ Name ] : running_mean
[ Buffer ] : tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
------------------------------
[ Name ] : running_var
[ Buffer ] : tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
------------------------------
[ Name ] : num_batches_tracked
[ Buffer ] : 0
------------------------------


In [41]:
# state_dict()

module.state_dict()

OrderedDict([('weight', tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])),
             ('bias', tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])),
             ('running_mean',
              tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])),
             ('running_var', tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])),
             ('num_batches_tracked', tensor(0))])

### hook

In [64]:
# 패키지화 된 코드에서 다른 프로그래머가
# custom 코드를 중간에 실행시킬 수 있도록 만들어놓은 인터페이스

# 프로그램의 실행 로직을 분석하거나
# 프로그램에 추가적인 기능을 제공할 때 사용

def program_A(x):
    print('program A processing!')
    return x + 3

def program_B(x):
    print('program B processing!')
    return x - 3

class Package(object):
    def __init__(self):
        self.programs = [program_A, program_B]
        
        # 이렇게 Package에는 self.hook이란 변수를 만들어줘야 함
        # 현재는 hook에 아무 것도 없는 상태
        self.hooks = []

    def __call__(self, x):
        for program in self.programs:
            x = program(x)

            # Package를 사용하는 사람이 자신만의 custom program을
            # 등록할 수 있도록 미리 만들어놓은 인터페이스 hook
            if self.hooks:
                for hook in self.hooks:
                    output = hook(x)

                    if output:
                        x = output

        return x

package = Package()

input = 3
output = package(input)

program A processing!
program B processing!


In [65]:
# hook으로 로직 분석하기

# module 내부에 print문 찍어주는 효과

def hook_analysis(x):
    print(f'hook for analysis, current value is {x}')

package.hooks = [] # 이게 왜 필요함???
package.hooks.append(hook_analysis)

input = 3
output = package(input)
print(f'output: {output}')

program A processing!
hook for analysis, current value is 6
program B processing!
hook for analysis, current value is 3
output: 3


In [66]:
# hook으로 기능 추가하기

def hook_multiply(x):
    print('hook for multiplying')
    return x * 3

package.hooks = []
package.hooks.append(hook_multiply)

input = 3
output = package(input)
print(f'output: {output}')

program A processing!
hook for multiplying
program B processing!
hook for multiplying
output: 45


In [67]:
# hook에 여러 개의 기능 넣기

package.hooks = []
package.hooks.append(hook_multiply)
package.hooks.append(hook_analysis)

input = 3
output = package(input)

print(f'output: {output}')

program A processing!
hook for multiplying
hook for analysis, current value is 18
program B processing!
hook for multiplying
hook for analysis, current value is 45
output: 45


In [68]:
# pre_hook()

# 프로그램 실행 전에 넣으나 후에 넣으나 설계자 마음

def program_A(x):
    print('program A processing!')
    return x + 3

def program_B(x):
    print('program B processing!')
    return x - 3

class Package(object):
    def __init__(self):
        self.programs = [program_A, program_B]

        # pre_hooks
        self.pre_hooks = []
        
        # hooks
        self.hooks = []

    def __call__(self, x):
        for program in self.programs:
            
            # pre_hooks
            if self.pre_hooks:
                for hook in self.pre_hooks:
                    output = hook(x)
                    if output:
                        x = output

            x = program(x)

            # hooks
            if self.hooks:
                for hook in self.hooks:
                    output = hook(x)
                    if output:
                        x = output

        return x

### hook in PyTorch

In [None]:
# hook for Tensor
    # backward hook
        # function to use: tensor.register_hook(hook)

# hook for Module
    # forward pre hook
        # function to use: register_forward_pre_hook(hook)
    # forward hook
        # function to use: register_forward_hook(hook)
    # backward hook
        # function to use: register_backward_hook(hook)
    # full backward hook
        # function to use: register_full_backward_hook(hook)

In [70]:
# register_hook
# _backward_hooks # ???

import torch

t = torch.rand(1, requires_grad=True)

def tensor_hook(grad):
    pass

t.register_hook(tensor_hook)

t._backward_hooks # ???

OrderedDict([(1, <function __main__.tensor_hook(grad)>)])

In [76]:
# register_forward_pre_hook()
# register_forward_hook()
# register_full_backward_hook()

from torch import nn

class Model(nn.Module):
    def __init__(self):
        super().__init__()

def module_hook(grad):
    pass
    
model = Model()
model.register_forward_pre_hook(module_hook)
model.register_forward_hook(module_hook)
model.register_full_backward_hook(module_hook)

model.__dict__

{'training': True,
 '_parameters': OrderedDict(),
 '_buffers': OrderedDict(),
 '_non_persistent_buffers_set': set(),
 '_backward_hooks': OrderedDict([(4, <function __main__.module_hook(grad)>)]),
 '_is_full_backward_hook': True,
 '_forward_hooks': OrderedDict([(3, <function __main__.module_hook(grad)>)]),
 '_forward_pre_hooks': OrderedDict([(2,
               <function __main__.module_hook(grad)>)]),
 '_state_dict_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_modules': OrderedDict()}

In [None]:
# forward_pre_hooks
# forward_hooks
# backward_hooks # deprecated # so backward_hooks is used for tensor only?
# full_backward_hooks
# state_dict_hooks # used internally

In [78]:
# forward_pre_hook
# forward_hook
# 으로 값 저장하기

class Add(nn.Module):
    def __init__(self):
        super().__init__() 

    def forward(self, x1, x2):
        output = torch.add(x1, x2)

        return output

add = Add()

answer = []

def pre_hook(module, input):
    answer.extend(input)

def hook(module, input, output):
    answer.extend(output)

add.register_forward_pre_hook(pre_hook)
add.register_forward_hook(hook)

x1 = torch.rand(1)
x2 = torch.rand(1)

output = add(x1, x2)

answer

[tensor([0.8208]), tensor([0.0552]), tensor(0.8760)]

In [81]:
# hook으로 값 수정하기

class Add(nn.Module):
    def __init__(self):
        super().__init__() 

    def forward(self, x1, x2):
        output = torch.add(x1, x2)

        return output

add = Add()

def hook(module, input, output):
    return output + 5

add.register_forward_hook(hook)

x1 = torch.rand(1)
x2 = torch.rand(1)

output = add(x1, x2)
print(x1+x2)
output

tensor([0.9335])


tensor([5.9335])

In [83]:
# list에 backprop gradient값 저장하기

import torch
from torch import nn
from torch.nn.parameter import Parameter

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.W = Parameter(torch.Tensor([5]))

    def forward(self, x1, x2):
        output = x1 * x2
        output = output * self.W

        return output

model = Model()

answer = []

# TODO : hook를 이용해서 x1.grad, x2.grad, output.grad 값을 알아내 answer에 저장하세요
def module_hook(module, grad_input, grad_output):
    answer.extend(grad_input)
    answer.extend(grad_output)

# full backward hook
model.register_full_backward_hook(module_hook)


x1 = torch.rand(1, requires_grad=True)
x2 = torch.rand(1, requires_grad=True)

output = model(x1, x2)
output.retain_grad() 
# Tensor.retain_grad() -> None
# Enables this tensor to have their grad populating during backward()
output.backward() # backprop

answer # [x1.grad, x2.grad, output.grad]

# ???
# x랑 w를 이용해 y_hat 값을 구하고 그걸 y가 포함된 cost function에 넣어서
# cost function의 w에 대한 gradient를 구하는게 layer 아닌가?
# y랑 cost function이 없는데 어떻게 gradient를 구하는거지?

# -> 여기에 넣는 계산식은 forward prop이 아니라 cost function임.
# d(output) / d(input) 을 구하는게 목적.

# -> 기본적으로 input을 output으로 변화시키는 식이 존재하니까,
# input에 대한 output의 미분값을 구하는 것.
# 이건 label이 없어도 가능.
# 즉, forward prop과 cost function은 본질적으로 다르지 않다. 
# 끝에 cost function만 붙이면 됨.
# 첫번째 질문에 대한 답: cost function도 그냥 function이고, 
# 결국 input과 식을 줬을 때 output을 계산하고, 
# d(output) / d(input) 을 계산하는 구조라는 것.
# label과 cost function을 끝에 추가해 완성시켜 주면,
# d(output, i.e. cost) / d(input, i.e. weight) 를 알아내고,
# 거기다 learning rate를 곱해 weight를 update할 amount를 정하는 것.

[tensor([4.8898]), tensor([2.8644]), tensor([1.])]

In [84]:
# module 단위는 backward hook은 input gradient, output gradient값만 가져와서
# module 내부의 tensor의 gradient값은 알아낼 수 없음.
# module의 Parameter W의 gradient값을 알아내기 위해
# tensor 단위의 backward hook 사용해야.

import torch
from torch import nn
from torch.nn.parameter import Parameter

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.W = Parameter(torch.Tensor([5]))

    def forward(self, x1, x2):
        output = x1 * x2
        output = output * self.W

        return output

model = Model()

answer = []

# TODO : hook를 이용해서 W의 gradient 값을 알아내 answer에 저장하세요
def tensor_hook(grad):
    answer.extend(grad)

# model.W에 register_hook() 적용
model.W.register_hook(tensor_hook)


x1 = torch.rand(1, requires_grad=True)
x2 = torch.rand(1, requires_grad=True)

output = model(x1, x2)
output.backward()

answer # [model.W.grad]

[tensor(0.3045)]

### apply

In [98]:
import torch
from torch import nn

@torch.no_grad() # ???
def init_weights(m):
    print(m) # ???
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        print(m.weight) # ???

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights) # ???

Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
Linear(in_features=2, out_features=2, bias=True)
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)


Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Linear(in_features=2, out_features=2, bias=True)
)

In [99]:
import torch
from torch import nn
from torch.nn.parameter import Parameter


# 아래 코드는 수정하실 필요가 없습니다!
# 하지만 아래 과제를 진행하기 전에 아래 코드를 보면서 최대한 이해해보세요!

# Function
class Function_A(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(1))

    def forward(self, x):
        return x + self.W

class Function_B(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(1))

    def forward(self, x):
        return x - self.W

class Function_C(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(1))

    def forward(self, x):
        return x * self.W

class Function_D(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(1))

    def forward(self, x):
        return x / self.W


# Layer
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A('plus')
        self.b = Function_B('substract')

    def forward(self, x):
        x = self.a(x)
        x = self.b(x)

        return x

class Layer_CD(nn.Module):
    def __init__(self):
        super().__init__()

        self.c = Function_C('multiply')
        self.d = Function_D('divide')

    def forward(self, x):
        x = self.c(x)
        x = self.d(x)

        return x


# Model
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        self.cd = Layer_CD()

    def forward(self, x):
        x = self.ab(x)
        x = self.cd(x)

        return x


model = Model()

In [100]:
def print_module(module):
    print(module)
    print("-" * 30)

# 🦆 apply는 apply가 적용된 module을 return 해줘요!
returned_module = model.apply(print_module)
# returned_module은 어디다 씀?

Function_A()
------------------------------
Function_B()
------------------------------
Layer_AB(
  (a): Function_A()
  (b): Function_B()
)
------------------------------
Function_C()
------------------------------
Function_D()
------------------------------
Layer_CD(
  (c): Function_C()
  (d): Function_D()
)
------------------------------
Model(
  (ab): Layer_AB(
    (a): Function_A()
    (b): Function_B()
  )
  (cd): Layer_CD(
    (c): Function_C()
    (d): Function_D()
  )
)
------------------------------


In [107]:
model = Model()

print(model.state_dict())

# TODO : apply를 이용해 모든 Parameter 값을 1로 만들어보세요!
def weight_initialization(module):
    module_name = module.__class__.__name__

    if module_name.split('_')[0] == "Function":
        module.W.data.fill_(1.)

# 🦆 apply는 apply가 적용된 module을 return 해줘요!
returned_module = model.apply(weight_initialization)


# 아래 코드는 수정하실 필요가 없습니다!
x = torch.rand(1)

output = model(x)

print(model.state_dict())

OrderedDict([('ab.a.W', tensor([0.1178])), ('ab.b.W', tensor([0.3057])), ('cd.c.W', tensor([0.1448])), ('cd.d.W', tensor([0.4942]))])
OrderedDict([('ab.a.W', tensor([1.])), ('ab.b.W', tensor([1.])), ('cd.c.W', tensor([1.])), ('cd.d.W', tensor([1.]))])


In [108]:
model = Model()

# TODO : apply를 이용해서 부덕이가 원하는대로 repr 출력을 수정해주세요!
from functools import partial

def function_repr(self):
    return f'name={self.name}'

def add_repr(module):
    module_name = module.__class__.__name__

    if module_name.split('_')[0] == "Function":
        module.extra_repr = partial(function_repr, module) # ???


# 🦆 apply는 apply가 적용된 module을 return 해줘요!
returned_module = model.apply(add_repr)

In [109]:
model

Model(
  (ab): Layer_AB(
    (a): Function_A(name=plus)
    (b): Function_B(name=substract)
  )
  (cd): Layer_CD(
    (c): Function_C(name=multiply)
    (d): Function_D(name=divide)
  )
)

In [110]:
model_repr = repr(model)
print(model_repr)

Model(
  (ab): Layer_AB(
    (a): Function_A(name=plus)
    (b): Function_B(name=substract)
  )
  (cd): Layer_CD(
    (c): Function_C(name=multiply)
    (d): Function_D(name=divide)
  )
)


In [None]:
# 현재 4개의 Function A, B, C, D가 있어요!

# - A : x + W
# - B : x - W
# - C : x * W
# - D : x / W

# 이걸 다음처럼 linear transformation처럼 동작하도록 바꿔보래요!

# - A : x @ W + b
# - B : x @ W + b
# - C : x @ W + b
# - D : x @ W + b

# W는 이미 각 Function에 생성된 Parameter이고
# b는 새롭게 만들어야 하는 Parameter에요!

In [118]:
import torch
from torch import nn
from torch.nn.parameter import Parameter


# 아래 코드는 수정하실 필요가 없습니다!
# 실행만 시켜주시고 다음 셀로 넘어가주세요!

# Function
class Function_A(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(2, 2))

    def forward(self, x):
        return x + self.W

class Function_B(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(2, 2))

    def forward(self, x):
        return x - self.W

class Function_C(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(2, 2))

    def forward(self, x):
        return x * self.W

class Function_D(nn.Module):
    def __init__(self, name):
        super().__init__()
        self.name = name
        self.W = Parameter(torch.rand(2, 2))

    def forward(self, x):
        return x / self.W


# Layer
class Layer_AB(nn.Module):
    def __init__(self):
        super().__init__()

        self.a = Function_A('plus')
        self.b = Function_B('substract')

    def forward(self, x):
        x = self.a(x)
        x = self.b(x)

        return x

class Layer_CD(nn.Module):
    def __init__(self):
        super().__init__()

        self.c = Function_C('multiply')
        self.d = Function_D('divide')

    def forward(self, x):
        x = self.c(x)
        x = self.d(x)

        return x


# Model
class Model(nn.Module):
    def __init__(self):
        super().__init__()

        self.ab = Layer_AB()
        self.cd = Layer_CD()

    def forward(self, x):
        x = self.ab(x)
        x = self.cd(x)

        return x

In [119]:
# register_parameter()
# data.fill_()
# Function_A, B, C, D의 계산 결과 되돌리기
# torch.addmm(b, output, W.T)
# module.register.forward_hook()
# returned_module = model.apply()

model = Model()


# TODO : apply를 이용해 Parameter b를 추가해보세요!
def add_bias(module):
    module_name = module.__class__.__name__

    if module_name.split('_')[0] == "Function":
        module.register_parameter('b', Parameter(torch.rand(2)))


# TODO : apply를 이용해 추가된 b도 값을 1로 초기화해주세요!
def weight_initialization(module):
    module_name = module.__class__.__name__

    if module_name.split('_')[0] == "Function":
        module.W.data.fill_(1.)
        module.b.data.fill_(1.)


# TODO : apply를 이용해 모든 Function을 linear transformation으로 바꿔보세요!
#        X @ W + b
def linear_transformation(module):
    module_name = module.__class__.__name__

    if module_name == "Function_A":
        def hook_A(module, input, output):
            W, b = module.W, module.b
            output = output - W 
            output = torch.addmm(b, output, W.T)

            return output

        module.register_forward_hook(hook_A)

    elif module_name == "Function_B":
        def hook_B(module, input, output):
            W, b = module.W, module.b
            output = output + W
            output = torch.addmm(b, output, W.T)

            return output

        module.register_forward_hook(hook_B)

    elif module_name == "Function_C":
        def hook_C(module, input, output):
            W, b = module.W, module.b
            output = output / W
            output = torch.addmm(b, output, W.T)

            return output

        module.register_forward_hook(hook_C)

    elif module_name == "Function_D":
        def hook_D(module, input, output):
            W, b = module.W, module.b
            output = output * W
            output = torch.addmm(b, output, W.T)

            return output

        module.register_forward_hook(hook_D)

returned_module = model.apply(add_bias)
returned_module = model.apply(weight_initialization)
returned_module = model.apply(linear_transformation)

In [120]:
model.state_dict()

OrderedDict([('ab.a.W',
              tensor([[1., 1.],
                      [1., 1.]])),
             ('ab.a.b', tensor([1., 1.])),
             ('ab.b.W',
              tensor([[1., 1.],
                      [1., 1.]])),
             ('ab.b.b', tensor([1., 1.])),
             ('cd.c.W',
              tensor([[1., 1.],
                      [1., 1.]])),
             ('cd.c.b', tensor([1., 1.])),
             ('cd.d.W',
              tensor([[1., 1.],
                      [1., 1.]])),
             ('cd.d.b', tensor([1., 1.]))])

In [131]:
x = torch.rand(2, 2, requires_grad=True)

print(x)

output = model(x)
output = output.sum()
output.backward()
print(output)

tensor([[0.7199, 0.8792],
        [0.0292, 0.5850]], requires_grad=True)
tensor(95.4112, grad_fn=<SumBackward0>)


In [133]:
grads = [(name, param.grad) for name, param in model.named_parameters()]

print(grads)

[('ab.a.W', tensor([[69.7755, 71.5895],
        [69.7755, 71.5895]])), ('ab.a.b', tensor([144., 144.])), ('ab.b.W', tensor([[142.6825, 142.6825],
        [142.6825, 142.6825]])), ('ab.b.b', tensor([72., 72.])), ('cd.c.W', tensor([[178.6825, 178.6825],
        [178.6825, 178.6825]])), ('cd.c.b', tensor([36., 36.])), ('cd.d.W', tensor([[196.6825, 196.6825],
        [196.6825, 196.6825]])), ('cd.d.b', tensor([18., 18.]))]
