# CNN

In [1]:
import torch
from torch import nn

In [2]:
x=torch.randn(100,3) # 100개의 데이터, 3개의 채널
layer=nn.Linear(3,5) # 3개의 채널을 받아 5개의 채널을 출력하는 선형 레이어
print(layer(x).shape)
print(layer.weight)
print(layer.bias)

torch.Size([100, 5])
Parameter containing:
tensor([[ 0.4127, -0.4809,  0.1796],
        [ 0.2616, -0.1514, -0.0217],
        [ 0.3478,  0.0868,  0.2024],
        [-0.2230,  0.0911, -0.5429],
        [ 0.2235,  0.4159, -0.4302]], requires_grad=True)
Parameter containing:
tensor([ 0.0323, -0.2126, -0.4735,  0.0454,  0.3203], requires_grad=True)


In [3]:
x=torch.randn(2,5)
layer=nn.ReLU()
print(layer(x))

tensor([[0.0000, 0.0000, 0.9201, 0.5228, 1.7621],
        [0.0000, 0.0409, 0.0000, 0.0924, 0.0000]])


In [4]:
x = torch.randn(3,7)
drop = nn.Dropout(p=0.3) # p는 죽일 확률임! 실제 논문에서는 살릴 확률로 쓰니까 주의
print(drop(x))

tensor([[ 0.0000, -1.0716, -0.0000, -2.2039, -0.4534, -1.1902, -1.5283],
        [ 1.2103, -2.3524, -0.0000,  0.3578,  0.0000, -1.4635,  0.2788],
        [-0.0000, -0.0000, -0.0513,  0.4060, -2.3128, -1.2807, -0.0000]])


In [5]:
class sample_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.drop_layer=nn.Sequential(nn.Linear(5,7),
                                    #   nn.ReLU(),
                                      nn.Dropout(p=0.3))
                                      
    def forward(self, x):
        x = self.drop_layer(x)
        return x

model=sample_model()
model.train() # train mode로 전환
x=torch.randn(3,5) # 그 담에 x=torch.randn(2,3,5)
print(model(x))

model.eval() # eval mode로 전환하면 dropout이 적용되지 않음
print(model(x)) # test mode

tensor([[ 0.2398,  0.6868, -0.0000, -0.8432,  0.2565,  0.5256, -0.3514],
        [-0.0000,  0.0000, -1.7343,  0.4125, -0.0000,  0.0000,  0.7228],
        [-0.6500, -0.8030, -1.5039, -0.8714,  1.3981, -1.5781, -1.9703]],
       grad_fn=<MulBackward0>)
tensor([[ 0.1679,  0.4808, -1.1458, -0.5903,  0.1796,  0.3679, -0.2460],
        [-0.0341,  1.3099, -1.2140,  0.2887, -0.4504,  1.5390,  0.5060],
        [-0.4550, -0.5621, -1.0527, -0.6100,  0.9787, -1.1047, -1.3792]],
       grad_fn=<AddmmBackward0>)


In [6]:
# 인채널, 아웃채널, 커널사이즈는 필수로 써줘야 함
layer = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3) # stride=1, padding=0 이 디폴트
# out_channels = 2 의 의미는 각기 다른 필터를 2개 적용한다는 의미해서 2개의 채널이 나오게 하라는 뜻
# kernel_size = 3 의 의미는 필터의 크기를 3x3으로 하라는 뜻, 필터의 크기는 정사각형이어야 함
# stride = 1 의 의미는 필터를 한 칸씩 움직이라는 뜻
# padding = 0 의 의미는 필터를 적용하기 전에 0으로 채워서 테두리를 늘리라는 뜻

layer(torch.randn(32,1,5,5)).shape
# nn.Linear(3,5) # 채채 # 근데 얘는 채 또는 개채를 원함, 개x3 => 개x5
# nn.Conv2d(3,5) # 채채 # 근데 얘는 채행열 또는 개채행열을 원함, 개x3x행x열 => 개x5x행x열

torch.Size([32, 2, 3, 3])

In [7]:
layer = nn.Conv2d(3,5,3) # 3개의 특징으로부터 5개의 특징 추출 //weight(=필터=커널)가 3x3x3 짜리 5개 존재 (5x3x3x3)
x = torch.randn(32,3,21,21)
print(layer(x).shape) # 32개의 데이터, 5개의 채널, 19x19 행렬
print(layer.weight.shape) # 5개 필터, 3개 채널, 3x3 필터, 필터 수는 무조건 입력 채널 수와 같아야 함! 쌍이 맞아야 올라타면서 학습이 가능.

torch.Size([32, 5, 19, 19])
torch.Size([5, 3, 3, 3])


In [8]:
conv1 = nn.Conv2d(1,8,6, stride=2)
x=torch.randn(32,1,28,28)
print(conv1(x).shape)
print(conv1.weight.shape) # 8개 필터, 1개 채널, 6x6 필터
# 와 잠만 이거 nn.Linear랑 거의 유사하네


conv2 = nn.Conv2d(8,16,3, padding=1)
print(conv2(conv1(x)).shape)

Maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
print(Maxpool(conv2(conv1(x))).shape)

torch.Size([32, 8, 12, 12])
torch.Size([8, 1, 6, 6])
torch.Size([32, 16, 12, 12])
torch.Size([32, 16, 6, 6])


In [9]:
maxpool=nn.MaxPool2d(2) # 2 로만 줘도 자동 kernel_size=2, stride=2
x=torch.randn(1,6,6)
print(x)
print(maxpool(x))
print(maxpool(torch.randn(32,3,6,6)).shape)

tensor([[[-2.2179,  0.0710,  0.9323, -0.4986, -2.2351,  1.9611],
         [ 2.9606, -0.3901, -0.8712,  0.2719, -0.2909, -0.0358],
         [-0.3729,  1.1462,  0.3823,  0.6832,  2.0632, -0.6253],
         [-1.5192,  0.8155, -0.4857,  0.3455, -0.8486,  0.8644],
         [ 1.3422, -1.0035,  0.5161,  0.2926,  0.1040, -2.1881],
         [-1.3049,  1.3574, -0.0702,  0.6699, -0.7768,  1.4905]]])
tensor([[[2.9606, 0.9323, 1.9611],
         [1.1462, 0.6832, 2.0632],
         [1.3574, 0.6699, 1.4905]]])
torch.Size([32, 3, 3, 3])


In [10]:
maxpool=nn.AvgPool2d(2)
x=torch.randn(1,6,6)
print(x)
print(maxpool(x))
print(maxpool(torch.randn(32,3,6,6)).shape)

tensor([[[-0.3270,  1.4575,  0.4901, -0.2343,  0.9690, -1.6075],
         [-2.0361, -0.6231,  0.7144,  1.2854,  0.5837, -0.5672],
         [-2.1159, -0.2798,  0.1781,  0.7964, -0.4924, -0.4268],
         [-2.3576,  0.8733,  0.5373, -1.4436, -0.4974, -1.3240],
         [-0.1455,  1.4436, -0.4600, -2.6458,  0.4322, -0.0878],
         [-0.0594, -0.7438,  1.0642,  1.2813,  0.7724, -0.6452]]])
tensor([[[-0.3822,  0.5639, -0.1555],
         [-0.9700,  0.0171, -0.6851],
         [ 0.1237, -0.1901,  0.1179]]])
torch.Size([32, 3, 3, 3])


In [11]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(1,8,6,stride=2)
        self.conv2 = nn.Conv2d(8,16,3,padding=1)
        self.Maxpool = nn.MaxPool2d(2)
        self.fc = nn.Linear(16*6*6, 10) # 왜 필요한가? => 16x6x6 을 10개의 클래스로 분류하겠다는 의미
        # 처음부터 MLP 레이어를 쌓아서 10개의 클래스로 분류하면 안되는 이유는?
            # -> 이미지는 2차원이기 때문에 1차원으로 펼쳐서 분류하면 정보가 손실됨.
            # 따라서 conv 레이어를 쌓아서 지역 별 특징을 추출한 후에 MLP 레이어를 쌓아서 분류해야 함

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.Maxpool(x)
        x = torch.flatten(x,start_dim=1)
        x = self.fc(x)
        return x

x = torch.randn(32,1,28,28)
model = CNN()
# first weight
print(model.conv1.weight.shape) # 8개 필터, 1개 채널, 6x6 필터
print(model.conv2.weight.shape) # 16개 필터, 8개 채널, 3x3 필터
print(model.fc.weight.shape) # 16x6x6, 10
print(model(x).shape)

torch.Size([8, 1, 6, 6])
torch.Size([16, 8, 3, 3])
torch.Size([10, 576])
torch.Size([32, 10])


## .parameters() vs .modules() vs .children() 그리고 isinstance의 활용

In [12]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Sequential(nn.Linear(2,3),
                                 nn.ReLU())
        self.fc2 = nn.Sequential(nn.Linear(3,4),
                                 nn.ReLU())
        self.fc_out = nn.Sequential(nn.Linear(4,1),
                                    nn.Sigmoid())
    def forward(self,x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc_out(x)
        return x
        
model = MLP()
print(model(torch.randn(2,2)).shape)
print(model)

torch.Size([2, 1])
MLP(
  (fc1): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
  (fc_out): Sequential(
    (0): Linear(in_features=4, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


In [13]:
model.parameters()

<generator object Module.parameters at 0x7f992b115190>

In [14]:
list(model.parameters())[0]
# [layer0 weight 값, layer0 bias 값, layer1 weight 값, layer1 bias 값, ...]

Parameter containing:
tensor([[ 0.6661, -0.6883],
        [ 0.6022,  0.4426],
        [-0.2296,  0.6641]], requires_grad=True)

In [15]:
# for transfer learning, 즉 기존 모델을 가져와서 새로운 데이터에 적용할 때
# 기존 모델의 파라미터를 freeze 시키고 새로운 모델을 추가로 쌓아서 학습시키는 방법

model = MLP()
params = [p for p in model.parameters() if p.requires_grad]
print(params,'\n')

for p in model.parameters(): # 전체 freeze
    p.requires_grad = False

model.fc_out = nn.Linear(4,10)

params = [p for p in model.parameters() if p.requires_grad]
print(params)

from torch import optim
optimizer = optim.Adam(params, lr=0.1)

[Parameter containing:
tensor([[ 0.5185, -0.4796],
        [-0.3639, -0.2542],
        [-0.2472,  0.3888]], requires_grad=True), Parameter containing:
tensor([ 0.2918,  0.5136, -0.3614], requires_grad=True), Parameter containing:
tensor([[-0.0701, -0.2292, -0.0593],
        [-0.5003, -0.1136, -0.2190],
        [ 0.4834, -0.0945, -0.1212],
        [-0.5641, -0.3302, -0.5684]], requires_grad=True), Parameter containing:
tensor([-0.1486, -0.3963, -0.1235, -0.2654], requires_grad=True), Parameter containing:
tensor([[0.0362, 0.3262, 0.3917, 0.1626]], requires_grad=True), Parameter containing:
tensor([-0.0322], requires_grad=True)] 

[Parameter containing:
tensor([[-0.2408,  0.2955,  0.3652,  0.4522],
        [ 0.2469, -0.1609, -0.2224,  0.0450],
        [-0.4627,  0.0132, -0.2802,  0.1838],
        [-0.2008, -0.0875,  0.1242, -0.1512],
        [ 0.0062, -0.1597,  0.4149,  0.3072],
        [-0.3715, -0.0226, -0.4107, -0.4296],
        [ 0.3671, -0.0528,  0.2371, -0.4427],
        [ 0.0472, 

In [16]:
list(model.named_parameters())
# [('layer0.weight', weight 값), ('layer0.bias', bias 값), ('layer1.weight', weight 값), ('layer1.bias', bias 값), ...]

[('fc1.0.weight', Parameter containing:
  tensor([[ 0.5185, -0.4796],
          [-0.3639, -0.2542],
          [-0.2472,  0.3888]])), ('fc1.0.bias', Parameter containing:
  tensor([ 0.2918,  0.5136, -0.3614])), ('fc2.0.weight', Parameter containing:
  tensor([[-0.0701, -0.2292, -0.0593],
          [-0.5003, -0.1136, -0.2190],
          [ 0.4834, -0.0945, -0.1212],
          [-0.5641, -0.3302, -0.5684]])), ('fc2.0.bias', Parameter containing:
  tensor([-0.1486, -0.3963, -0.1235, -0.2654])), ('fc_out.weight',
  Parameter containing:
  tensor([[-0.2408,  0.2955,  0.3652,  0.4522],
          [ 0.2469, -0.1609, -0.2224,  0.0450],
          [-0.4627,  0.0132, -0.2802,  0.1838],
          [-0.2008, -0.0875,  0.1242, -0.1512],
          [ 0.0062, -0.1597,  0.4149,  0.3072],
          [-0.3715, -0.0226, -0.4107, -0.4296],
          [ 0.3671, -0.0528,  0.2371, -0.4427],
          [ 0.0472,  0.0030, -0.4604, -0.1683],
          [ 0.2999,  0.2675,  0.2009, -0.2675],
          [ 0.0399,  0.1282,  0.

In [17]:
for name, p in model.named_parameters():
    print(name)
    print(p)

fc1.0.weight
Parameter containing:
tensor([[ 0.5185, -0.4796],
        [-0.3639, -0.2542],
        [-0.2472,  0.3888]])
fc1.0.bias
Parameter containing:
tensor([ 0.2918,  0.5136, -0.3614])
fc2.0.weight
Parameter containing:
tensor([[-0.0701, -0.2292, -0.0593],
        [-0.5003, -0.1136, -0.2190],
        [ 0.4834, -0.0945, -0.1212],
        [-0.5641, -0.3302, -0.5684]])
fc2.0.bias
Parameter containing:
tensor([-0.1486, -0.3963, -0.1235, -0.2654])
fc_out.weight
Parameter containing:
tensor([[-0.2408,  0.2955,  0.3652,  0.4522],
        [ 0.2469, -0.1609, -0.2224,  0.0450],
        [-0.4627,  0.0132, -0.2802,  0.1838],
        [-0.2008, -0.0875,  0.1242, -0.1512],
        [ 0.0062, -0.1597,  0.4149,  0.3072],
        [-0.3715, -0.0226, -0.4107, -0.4296],
        [ 0.3671, -0.0528,  0.2371, -0.4427],
        [ 0.0472,  0.0030, -0.4604, -0.1683],
        [ 0.2999,  0.2675,  0.2009, -0.2675],
        [ 0.0399,  0.1282,  0.2237,  0.4729]], requires_grad=True)
fc_out.bias
Parameter containing

In [18]:
model.modules()

<generator object Module.modules at 0x7f992b115900>

In [19]:
list(model.modules())

[MLP(
   (fc1): Sequential(
     (0): Linear(in_features=2, out_features=3, bias=True)
     (1): ReLU()
   )
   (fc2): Sequential(
     (0): Linear(in_features=3, out_features=4, bias=True)
     (1): ReLU()
   )
   (fc_out): Linear(in_features=4, out_features=10, bias=True)
 ), Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ), Linear(in_features=2, out_features=3, bias=True), ReLU(), Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ), Linear(in_features=3, out_features=4, bias=True), ReLU(), Linear(in_features=4, out_features=10, bias=True)]

In [20]:
print([m for m in model.modules() if isinstance(m, nn.Linear)]) # nn.Linear 레이어만 출력
print([m.weight for m in model.modules() if isinstance(m, nn.Linear)]) # nn.Linear 레이어의 weight만 출력
print([m.weight.grad for m in model.modules() if isinstance(m, nn.Linear)]) # nn.Linear 레이어의 weight의 gradient만 출력

[Linear(in_features=2, out_features=3, bias=True), Linear(in_features=3, out_features=4, bias=True), Linear(in_features=4, out_features=10, bias=True)]
[Parameter containing:
tensor([[ 0.5185, -0.4796],
        [-0.3639, -0.2542],
        [-0.2472,  0.3888]]), Parameter containing:
tensor([[-0.0701, -0.2292, -0.0593],
        [-0.5003, -0.1136, -0.2190],
        [ 0.4834, -0.0945, -0.1212],
        [-0.5641, -0.3302, -0.5684]]), Parameter containing:
tensor([[-0.2408,  0.2955,  0.3652,  0.4522],
        [ 0.2469, -0.1609, -0.2224,  0.0450],
        [-0.4627,  0.0132, -0.2802,  0.1838],
        [-0.2008, -0.0875,  0.1242, -0.1512],
        [ 0.0062, -0.1597,  0.4149,  0.3072],
        [-0.3715, -0.0226, -0.4107, -0.4296],
        [ 0.3671, -0.0528,  0.2371, -0.4427],
        [ 0.0472,  0.0030, -0.4604, -0.1683],
        [ 0.2999,  0.2675,  0.2009, -0.2675],
        [ 0.0399,  0.1282,  0.2237,  0.4729]], requires_grad=True)]
[None, None, None]


In [21]:
# 그래서, isinstance의 활용법?
# weight initialization에 활용할 수 있음
# weight initialization이 필요한 이유는, 분산이 너무 크거나 작은 경우에는 학습이 잘 되지 않기 때문
for m in model.modules():
    if isinstance(m,nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        nn.init.constant_(m.weight, 1)

print([m.weight for m in model.modules() if isinstance(m, nn.Linear)])

[Parameter containing:
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]]), Parameter containing:
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]), Parameter containing:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True)]


## model.children()

In [22]:
# 쓰는 이유? -> 모델의 구조를 직접 인덱싱해서 접근할 수 있음

model.children()

<generator object Module.children at 0x7f992b115f90>

In [23]:
list(model.children())

[Sequential(
   (0): Linear(in_features=2, out_features=3, bias=True)
   (1): ReLU()
 ), Sequential(
   (0): Linear(in_features=3, out_features=4, bias=True)
   (1): ReLU()
 ), Linear(in_features=4, out_features=10, bias=True)]

In [24]:
x=torch.randn(2,2)
list(model.children())[0](x)

tensor([[1.5470, 1.7688, 0.8938],
        [0.5280, 0.7497, 0.0000]])

In [25]:
sub_network = nn.Sequential(*list(model.children())[:2])
print(sub_network)
sub_network(x)

Sequential(
  (0): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=3, out_features=4, bias=True)
    (1): ReLU()
  )
)


tensor([[4.0610, 3.8133, 4.0860, 3.9442],
        [1.1291, 0.8814, 1.1541, 1.0123]])

## Sequential vs ModuleList

In [26]:
fc=nn.Linear(3,3)
layer_list = [fc for _ in range(5)]
layers1 = nn.Sequential(*layer_list) # nn.Sequential(*list) -> list를 unpacking -> 변수가 여러개일 때 이런 식으로 별 표시
layers2 = nn.ModuleList(layer_list)
print(layers1)
print(layers2)

x=torch.randn(1,3)
print(layers1(x))

# print(layers2(x)) # error!
for layer in layers2:
    x = layer(x)
print(x)

Sequential(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
ModuleList(
  (0): Linear(in_features=3, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=3, bias=True)
  (2): Linear(in_features=3, out_features=3, bias=True)
  (3): Linear(in_features=3, out_features=3, bias=True)
  (4): Linear(in_features=3, out_features=3, bias=True)
)
tensor([[ 0.1350, -0.2892,  0.4055]], grad_fn=<AddmmBackward0>)
tensor([[ 0.1350, -0.2892,  0.4055]], grad_fn=<AddmmBackward0>)


In [27]:
# 걍 리스트 쓰지 왜 nn.ModuleList 를 쓸까?
class testNet(nn.Module):
    def __init__(self):
        super().__init__()

        # self.Module_List = [nn.Linear(3,3), nn.Linear(3,3)] # 이렇게 하면 안된다! -> nn.ModuleList로 감싸줘야 한다!
        # 왜냐하면 그냥 리스트로 감싸면 그냥 파이썬의 리스트로 인식되기 때문에 파라미터로 인식이 안된다!
        self.Module_List = nn.ModuleList([nn.Linear(3,3), nn.Linear(3,3)])

    def forward(self,x):
        for layer in self.Module_List:
            x = layer(x)
        return x

model=testNet()
print(model(torch.randn(1,3)))

print(model) # 그냥 리스트로 하면 등록이 안돼있다!

optimizer = optim.Adam(model.parameters(), lr = 0.1) # 등록이 안돼있으면 parameter를 못 찾는다!

tensor([[-0.5324, -0.1967, -0.5057]], grad_fn=<AddmmBackward0>)
testNet(
  (Module_List): ModuleList(
    (0): Linear(in_features=3, out_features=3, bias=True)
    (1): Linear(in_features=3, out_features=3, bias=True)
  )
)


In [28]:
# 그럼 nn.Sequential 쓰고 말지 왜 굳이 nn.ModuleList?
class small_block(nn.Module):
    def __init__(self):
        super().__init__()
        self.block_x = nn.Linear(1,1)
        self.block_y = nn.Linear(1,1)

    def forward(self, x, y):
        x = self.block_x(x)
        y = self.block_y(y)
        return x, y

block = small_block()
print(block)
model = nn.Sequential(block, block)
print(model)
# model(torch.randn(1),torch.randn(1)) # error!
# nn.Sequential 이 가지고 있는 forward 함수를 call 하기 때문에 입력을 두 개 넣으면 안된다!!

model = nn.ModuleList([block,block])
x = torch.randn(1)
y = torch.randn(1)
for block in model:
    x, y = block(x,y)
print(x, y)

small_block(
  (block_x): Linear(in_features=1, out_features=1, bias=True)
  (block_y): Linear(in_features=1, out_features=1, bias=True)
)
Sequential(
  (0): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
  (1): small_block(
    (block_x): Linear(in_features=1, out_features=1, bias=True)
    (block_y): Linear(in_features=1, out_features=1, bias=True)
  )
)
tensor([-0.4641], grad_fn=<AddBackward0>) tensor([1.3845], grad_fn=<AddBackward0>)
