### BUILD THE NEURAL NETWORK

In [1]:
import torch

from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### Define the class

In [2]:
class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        
        return logits

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
model = NN().to(device)

print(model)

NN(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


### Model Layers

In [5]:
input_img = torch.rand(3, 28, 28) # 28*28 크기의 tensor가 3개
print(input_img)

tensor([[[0.3286, 0.7060, 0.5141,  ..., 0.2575, 0.0437, 0.1182],
         [0.4455, 0.5883, 0.6691,  ..., 0.9166, 0.4237, 0.3805],
         [0.0300, 0.5426, 0.6349,  ..., 0.1690, 0.2291, 0.8712],
         ...,
         [0.3502, 0.9800, 0.7696,  ..., 0.8111, 0.2001, 0.8832],
         [0.9996, 0.1389, 0.6896,  ..., 0.9117, 0.4271, 0.6127],
         [0.7536, 0.0060, 0.8405,  ..., 0.3219, 0.9805, 0.2973]],

        [[0.6384, 0.8730, 0.1383,  ..., 0.5182, 0.2961, 0.6892],
         [0.5660, 0.1137, 0.9843,  ..., 0.2465, 0.4727, 0.7241],
         [0.4668, 0.2504, 0.5713,  ..., 0.5956, 0.7244, 0.8439],
         ...,
         [0.5735, 0.9072, 0.0873,  ..., 0.4774, 0.5655, 0.9637],
         [0.4253, 0.1540, 0.5579,  ..., 0.3672, 0.5145, 0.2783],
         [0.0573, 0.7421, 0.8649,  ..., 0.5622, 0.2889, 0.7849]],

        [[0.3769, 0.2420, 0.8320,  ..., 0.0442, 0.5383, 0.6115],
         [0.0433, 0.4480, 0.3946,  ..., 0.1060, 0.4726, 0.7190],
         [0.3255, 0.6633, 0.7239,  ..., 0.1979, 0.1035, 0.

In [9]:
# nn.Flatten
flatten = nn.Flatten()
flat_img = flatten(input_img)

# nn.Linear
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_img)
print(hidden1.size())

# nn.ReLU
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}") # 음수는 0으로 바뀜

# nn.Sequential
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)

input_img = torch.rand(3, 28, 28)
logits = seq_modules(input_img)

## nn.Softmax
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

torch.Size([3, 20])
Before ReLU: tensor([[-0.2264,  0.4209, -0.0211, -0.2047, -0.2359,  0.5338,  0.0263,  0.3797,
         -0.0284, -0.4800, -0.0175,  0.0443, -0.4820, -0.4803, -0.2911,  0.4426,
          0.2676,  0.6306, -0.2964,  0.0964],
        [ 0.1199,  0.3631, -0.1075, -0.3116, -0.3721,  0.3532,  0.0680,  0.1356,
         -0.3876, -0.3830,  0.2407,  0.2580, -0.3904, -0.5308, -0.1937,  0.3743,
          0.1899,  0.6607, -0.3894,  0.0227],
        [-0.0117,  0.1331, -0.0352, -0.2742, -0.2495,  0.2602,  0.0690, -0.1476,
          0.3271, -0.5602, -0.1397,  0.2966, -0.2019, -0.3409, -0.0967,  0.2649,
          0.0663,  0.7369, -0.3518, -0.0846]], grad_fn=<AddmmBackward>)


After ReLU: tensor([[0.0000, 0.4209, 0.0000, 0.0000, 0.0000, 0.5338, 0.0263, 0.3797, 0.0000,
         0.0000, 0.0000, 0.0443, 0.0000, 0.0000, 0.0000, 0.4426, 0.2676, 0.6306,
         0.0000, 0.0964],
        [0.1199, 0.3631, 0.0000, 0.0000, 0.0000, 0.3532, 0.0680, 0.1356, 0.0000,
         0.0000, 0.2407, 0.2580, 0

### Model Parameters

In [10]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NN(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0282,  0.0151,  0.0234,  ..., -0.0191,  0.0288, -0.0235],
        [ 0.0252, -0.0020, -0.0276,  ...,  0.0254, -0.0088, -0.0170]],
       grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0292, -0.0090], grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0284,  0.0424, -0.0325,  ..., -0.0299, -0.0117,  0.0003],
        [-0.0101, -0.0430, -0.0258,  ..., -0.0072, -0.0155, -0.0029]],
       grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values : tensor([-0.0402,  0.040