In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
# parameters
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [4]:
loss_fn = nn.CrossEntropyLoss()

In [5]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

1. `optimizer.zero_grad()`를 호출하여 모델 매개변수의 변화도를 재설정. accumulation되기 때문에 중복 계산을 막기 위해 반복할 때마다 명시적으로 0으로 reset
2. `loss.backward()`를 호출하여 예측 손실(prediction loss)를 역전파. 각 매개변수에 대한 손실의 변화도를 저장
3. `optimizer.step()`을 호출하여 수집된 변화도로 매개변수를 조정

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # 예측(prediction)과 손실(loss) 계산
        pred = model(X)
        loss = loss_fn(pred, y)

        # 역전파
        # 위의 1.2.3. 과정
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# model.parameters()

<br/>

<br/>

model.parameter()은 모든 하위 모듈들을 탐색하고(recursive=True) 각 모듈의 _parameters에 들어있는 파라미터들을 하나씩 반환

<br/>

![](https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FuuIiN%2FbtqEp02L7IX%2F3F3fisTmytsvMzl4hsuLL1%2Fimg.png)

위의 예시에서 A.paramters()를 호출하면 [A.B.weight, A.B.bias, A.C.D.weight, A.C.D.bias]가 반환

In [7]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.300441  [    0/60000]
loss: 2.293444  [ 6400/60000]
loss: 2.290199  [12800/60000]
loss: 2.280282  [19200/60000]
loss: 2.250604  [25600/60000]
loss: 2.239673  [32000/60000]
loss: 2.236798  [38400/60000]
loss: 2.213104  [44800/60000]
loss: 2.209504  [51200/60000]
loss: 2.183428  [57600/60000]
Test Error: 
 Accuracy: 45.8%, Avg loss: 2.194227 

Epoch 2
-------------------------------
loss: 2.195865  [    0/60000]
loss: 2.197921  [ 6400/60000]
loss: 2.167277  [12800/60000]
loss: 2.181406  [19200/60000]
loss: 2.107856  [25600/60000]
loss: 2.074762  [32000/60000]
loss: 2.096737  [38400/60000]
loss: 2.033400  [44800/60000]
loss: 2.040551  [51200/60000]
loss: 1.984656  [57600/60000]
Test Error: 
 Accuracy: 47.3%, Avg loss: 2.017612 

Epoch 3
-------------------------------
loss: 2.014806  [    0/60000]
loss: 2.032789  [ 6400/60000]
loss: 1.956893  [12800/60000]
loss: 2.006266  [19200/60000]
loss: 1.864479  [25600/60000]
loss: 1.804008  [32000/600

# torch.save()

1. torch.save(model, 'model.pth')

- 이경우에는 파라미터, 옵티마이저, 에포크, 스코어 등 모든 상태를 저장한다. 나중에 이어서 학습한다던지 코드에 접근하지 못하는 사용자가 모델을 사용할 수 있도록 허락해주고 싶을 때 등의 경우에 사용

2. torch.save(model.state_dict(), 'model_state_dict.pth')

- 학습가능한 파라미터들이 담겨있는 딕셔너리이다. weight, bias와 같은 것들이다. 따라서 모델의 뼈대구조가 있을 때만 load가 가능하다


In [8]:
torch.save(model.state_dict(), 'torch_model.pth')

In [12]:
# 모델의 state_dict 출력
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print()

# 옵티마이저의 state_dict 출력
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
linear_relu_stack.0.weight 	 torch.Size([512, 784])
linear_relu_stack.0.bias 	 torch.Size([512])
linear_relu_stack.2.weight 	 torch.Size([512, 512])
linear_relu_stack.2.bias 	 torch.Size([512])
linear_relu_stack.4.weight 	 torch.Size([10, 512])
linear_relu_stack.4.bias 	 torch.Size([10])

Optimizer's state_dict:
state 	 {0: {'momentum_buffer': None}, 1: {'momentum_buffer': None}, 2: {'momentum_buffer': None}, 3: {'momentum_buffer': None}, 4: {'momentum_buffer': None}, 5: {'momentum_buffer': None}}
param_groups 	 [{'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1, 2, 3, 4, 5]}]


In [None]:
model = torch.load('model.pth')