# Build The Neural Network Model
신경망은 데이터에 대한 연산을 수행하는 Layer/Module로 구성.
- torch.nn namespace: 신경망을 구성하는 모든 구성요소 제공
- nn.Module: Pytorch의 모든 모듈은 nn.Module의 subclass이다.

## FashionMNIST Dataset classifier

Import needed library 

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import dataloader
from torchvision import datasets, transforms

Select dvice for machine learning

In [2]:
# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
        device = torch.device("cpu")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")
        device = torch.device("cpu")
else:
    print("MPS is available")
    device = torch.device("mps")
print(f"Using {device} device")


MPS is available
Using mps device


## Define Class
신경망 모델을 nn.Module의 subclass로 정의하고
```__init__```에서 신경망 계층들을 초기화 한다.
nn.Module을 상속받은 클래스는 forward 메소드에 입력 데이터에 대한 연산을 구현한다.

In [3]:
from turtle import forward


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

NeuralNetwork의 instance를 생성하고 이를 device로 이동한 뒤, structure를 print해본다.

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


To use the model, pass it the input data.   
This executes the model's forward.   
Forward is run background operations, do not call model.forward() directly.

In [23]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1], device='mps:0')


## Model layer

Let's break down FashionMNIST model's layer.   
For discribe this, bring image made 3 batch 28*28pixel image.   


In [24]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


### nn.Flatten
Initialize the nn.Flatten layer to convert each 2D 28x28 image into contihuous array of 784(28x28) pixeel values.   
It works at dim=1,2 the minibatch dimension(dim=0) is maintained.

In [26]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


### nn.Linear
linear layer is module that applies a linear transformation on the input using its stored weights and biases

In [27]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


### nn.ReLU


In [28]:
print(f"Befor ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")


Befor ReLU: tensor([[ 0.3213, -1.0040, -0.3535,  0.0955, -0.4782, -0.1649, -0.4862, -0.1118,
          0.5297,  0.0968,  0.5267, -0.2614,  0.5413,  0.2333,  0.0491, -0.2700,
          0.4627, -0.1208, -0.4704,  0.4267],
        [ 0.5060, -1.3938, -0.2236, -0.0042, -0.7435, -0.3178, -0.2822, -0.3068,
          1.0113, -0.2791,  0.1871,  0.1387,  0.0648, -0.3580, -0.3638, -0.5220,
          0.4414,  0.1225, -0.4179,  0.6499],
        [ 0.5921, -0.9026, -0.1304,  0.0676, -0.6264,  0.1696, -0.5268,  0.0356,
          0.6542, -0.0705,  0.1977, -0.1296, -0.0056, -0.1225,  0.0476, -0.8263,
          0.6602,  0.2721, -0.3819,  0.1844]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.3213, 0.0000, 0.0000, 0.0955, 0.0000, 0.0000, 0.0000, 0.0000, 0.5297,
         0.0968, 0.5267, 0.0000, 0.5413, 0.2333, 0.0491, 0.0000, 0.4627, 0.0000,
         0.0000, 0.4267],
        [0.5060, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0113,
         0.0000, 0.1871, 0.1387, 0.0648, 0.0000, 0.000

### nn.Sequential
nn.Sequential is an ordered container of modules.

In [31]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

### nn.Softmax
nn.Softmax 
- scale to values [0.,...,1.]
- dim: the dimension along which the values must sum to 1.

In [32]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

### Model Parameters
nn.Module automatically tracks all fields defined inside your model object.   
nn.Module makes all parameters accessible using your model's parameters() or named_parameters() methods.

In [35]:
print(f"Model structure: {model}\n\n")
print(f"Model parameter: {model.parameters()}")
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param}")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Model parameter: <generator object Module.parameters at 0x16bd9bcf0>
Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: Parameter containing:
tensor([[-0.0316, -0.0352, -0.0016,  ...,  0.0184,  0.0206, -0.0348],
        [-0.0281, -0.0022, -0.0227,  ..., -0.0110, -0.0199, -0.0066],
        [-0.0218, -0.0195,  0.0215,  ..., -0.0120, -0.0104,  0.0304],
        ...,
        [ 0.0110, -0.0312,  0.0091,  ..., -0.0255, -0.0060, -0.0034],
        [ 0.0024, -0.0211, -0.0157,  ..., -0.0137,  0.0350,  0.0334],
        [ 0.0140,  0.0230, -0.0286,  ..., -0.0098, -0.0149,  0.0291]],
       device='mps:0', requires_grad=True)
Layer: linear_relu_