In [1]:
import argparse

import torch
import torch.nn as nn
import torch.optim as optim

from model import ImageClassifier
from trainer import Trainer
from utils import load_mnist

In [2]:
device = torch.device('cpu')

In [5]:
from torchvision import datasets, transforms

In [6]:
def load_mnist(is_train=True, flatten=True):
    from torchvision import datasets, transforms

    dataset = datasets.MNIST(
        '../data', train=is_train, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
    )

    x = dataset.data.float() / 255. # dataset이라는 클래스 받아오고 텐서로 바꿔놓음. 값이 int로 되있어가지구 float형으로 바꿔줘야함 .
    y = dataset.targets #  target에는 int형으로 들어가있음, 물론 텐서로 ,.

    if flatten:
        x = x.view(x.size(0), -1) # view 쓰면 60000, 28 ,28 에서 60000, 784 로 맞춰줌.

    return x, y

In [8]:
dataset = datasets.MNIST(
        '../data', train=True, download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
    )

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw






In [25]:
x = dataset.data.float()

In [26]:
x.shape

torch.Size([60000, 28, 28])

In [27]:
dataset.targets

tensor([5, 0, 4,  ..., 5, 6, 8])

In [28]:
x = x.view(x.size(0), -1)

In [29]:
x.shape

torch.Size([60000, 784])

In [21]:
x = x.view(x.size(0), -1)

In [24]:
x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [42]:
y = dataset.targets

In [30]:
indices = torch.randperm(x.size(0))

In [31]:
indices

tensor([ 2981, 20321, 51672,  ..., 52457, 21072, 51286])

In [35]:
train_cnt = int(x.size(0) * 0.85)
valid_cnt = x.size(0) - train_cnt

In [36]:
train_cnt

51000

In [37]:
x = torch.index_select(
        x,
        dim=0,
        index=indices
    ).to(device).split([train_cnt, valid_cnt], dim=0)

In [40]:
len(x)

2

In [43]:
y = torch.index_select(
        y,
        dim=0,
        index=indices
    ).to(device).split([train_cnt, valid_cnt], dim=0)

In [44]:
y

(tensor([3, 0, 1,  ..., 8, 7, 3]), tensor([3, 1, 4,  ..., 4, 9, 0]))

In [45]:
print("Train:", x[0].shape, y[0].shape)
print("Valid:", x[1].shape, y[1].shape)

Train: torch.Size([51000, 784]) torch.Size([51000])
Valid: torch.Size([9000, 784]) torch.Size([9000])


In [47]:
import torch
import torch.nn as nn

class ImageClassifier(nn.Module): # nn.module 상속받고

    def __init__(self,
                 input_size,
                 output_size):
        self.input_size = input_size
        self.output_size = output_size

        super().__init__()

        self.layers = nn.Sequential(  # Layer 정의
            nn.Linear(input_size, 500),
            nn.LeakyReLU(),
            nn.BatchNorm1d(500),
            nn.Linear(500, 400),
            nn.LeakyReLU(),
            nn.BatchNorm1d(400),
            nn.Linear(400, 300),
            nn.LeakyReLU(),
            nn.BatchNorm1d(300),
            nn.Linear(300, 200),
            nn.LeakyReLU(),
            nn.BatchNorm1d(200),
            nn.Linear(200, 100),
            nn.LeakyReLU(),
            nn.BatchNorm1d(100),
            nn.Linear(100, 50),
            nn.LeakyReLU(),
            nn.BatchNorm1d(50),
            nn.Linear(50, output_size), # 마지막에 output_size 까지 넣어주고
            nn.LogSoftmax(dim=-1),  # 마지막 원소에 대해서 소프트맥스적용 , 0~1사이값으로 바꿔줌.
        )

    def forward(self, x):
        # |x| = (batch_size, input_size)

        y = self.layers(x)
        # |y| = (batch_size, output_size)

        return y

In [48]:
model = ImageClassifier(28**2, 10).to(device) # 모델 정의해주고
optimizer = optim.Adam(model.parameters()) # 옵티마이저
crit = nn.NLLLoss()

In [51]:
batch_size = 32
n_epochs = 1 

In [49]:
from copy import deepcopy

import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim

class Trainer():

    def __init__(self, model, optimizer, crit):
        self.model = model        # 모델을 다시 들고오는 방식으로 ,
        self.optimizer = optimizer
        self.crit = crit

        super().__init__()

    def _train(self, x, y, config):
        self.model.train()

        # Shuffle before begin.
        indices = torch.randperm(x.size(0), device=x.device)
        x = torch.index_select(x, dim=0, index=indices).split(batch_size, dim=0) # split쓰면 알아서 쪼개줌.
        y = torch.index_select(y, dim=0, index=indices).split(batch_size, dim=0)

        total_loss = 0

        for i, (x_i, y_i) in enumerate(zip(x, y)):
            y_hat_i = self.model(x_i)   # instance한 모델에 스플릿한데이터 넣어주기
            loss_i = self.crit(y_hat_i, y_i.squeeze())
            '''
            (A x B x 1 x C x 1) 형태의 텐서에서 차원이 1인 부분을 제거하여 (A x B x C) 형태로  [] 이런것들 없애줌.
            '''

            # Initialize the gradients of the model.
            # 로스 게산하고 조정 파라미터들
            self.optimizer.zero_grad()
            loss_i.backward()

            self.optimizer.step()

            if config.verbose >= 2:
                print("Train Iteration(%d/%d): loss=%.4e" % (i + 1, len(x), float(loss_i)))

            # Don't forget to detach to prevent memory leak.
            total_loss += float(loss_i)

        return total_loss / len(x)

    def _validate(self, x, y, config):
        # Turn evaluation mode on.
        self.model.eval()

        # Turn on the no_grad mode to make more efficintly.
        with torch.no_grad():
            # Shuffle before begin.
            indices = torch.randperm(x.size(0), device=x.device)
            x = torch.index_select(x, dim=0, index=indices).split(batch_size, dim=0)
            y = torch.index_select(y, dim=0, index=indices).split(batch_size, dim=0)

            total_loss = 0

            for i, (x_i, y_i) in enumerate(zip(x, y)):
                y_hat_i = self.model(x_i)
                loss_i = self.crit(y_hat_i, y_i.squeeze())

                if config.verbose >= 2:
                    print("Valid Iteration(%d/%d): loss=%.4e" % (i + 1, len(x), float(loss_i)))

                total_loss += float(loss_i)

            return total_loss / len(x)

    def train(self, train_data, valid_data, config):
        lowest_loss = np.inf
        best_model = None

        for epoch_index in range(n_epochs):
            train_loss = self._train(train_data[0], train_data[1], config)
            valid_loss = self._validate(valid_data[0], valid_data[1], config)

            # You must use deep copy to take a snapshot of current best weights.
            if valid_loss <= lowest_loss:
                lowest_loss = valid_loss
                best_model = deepcopy(self.model.state_dict()) # valid기준 로스가 젤 낮았던걸 딥카피해둠 파라미터들

            print("Epoch(%d/%d): train_loss=%.4e  valid_loss=%.4e  lowest_loss=%.4e" % (
                epoch_index + 1,
                n_epochs,
                train_loss,
                valid_loss,
                lowest_loss,
            ))

        # Restore to best model.
        self.model.load_state_dict(best_model) # 학습 끝나고 젤 좋았떤걸 이렇게 불러와서 쓸수있꾼

In [50]:
trainer = Trainer(model, optimizer, crit)
trainer.train((x[0], y[0]), (x[1], y[1]), config)

In [52]:
lowest_loss = np.inf
best_model = None

In [54]:
train_data = (x[0], y[0])
valid_data = (x[1], y[1])

In [55]:
x = x[0]
y = y[0]

In [56]:
indices = torch.randperm(x.size(0), device=x.device)

In [59]:
indices = torch.randperm(x.size(0), device=x.device)
x = torch.index_select(x, dim=0, index=indices).split(batch_size, dim=0) # split쓰면 알아서 쪼개줌.
y = torch.index_select(y, dim=0, index=indices).split(batch_size, dim=0)

In [66]:
len(x)

1594

In [67]:
x[0].shape

torch.Size([32, 784])