In [2]:
import torch
import torchvision.transforms as transforms # MNIST 데이터를 텐서 형태로 바꾸기 위해
import torch.nn.init
import numpy as np
import os
import random

device = None
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# 일정한 실험을 위해 랜덤 시드 고정
seed = 777
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) 
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# 현재 디바이스가 무엇인지?
print('Current Device : ' + str(device))

Current Device : mps


In [3]:
import os
import torchvision.datasets as dsets # 이 안에 MNIST 있다

if os.path.exists('MNIST'):
    mnist_train = dsets.MNIST(root=str(os.getcwd()), train=True, transform=transforms.ToTensor(), download=False)
    mnist_test = dsets.MNIST(root=str(os.getcwd()), train=False, transform=transforms.ToTensor(), download=False)
else: # MNIST 데이터 다운로드
    mnist_train = dsets.MNIST(root=str(os.getcwd()), train=True, transform=transforms.ToTensor(), download=True)
    mnist_test = dsets.MNIST(root=str(os.getcwd()), train=False, transform=transforms.ToTensor(), download=True)

print(mnist_train)

BATCH_SIZE = 128
train_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

Dataset MNIST
    Number of datapoints: 60000
    Root location: /Users/leejunseo/NODE
    Split: Train
    StandardTransform
Transform: ToTensor()


In [4]:
from torchinfo import summary
import torch
import torch.nn as nn


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.downsample = nn.Identity()

    def forward(self, x):
        identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_classes=10):
        super().__init__()
        self.downsample1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(1)
        )
        self.downsample2 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=1, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(1)
        )
        self.in_channels = 1
        self.layer1 = self._make_layer(block, 32)
        self.layer2 = self._make_layer(block, 32, stride=2)
        self.layer3 = self._make_layer(block, 64)
        self.layer4 = self._make_layer(block, 64, stride=2)
        self.layer5 = self._make_layer(block, 128)
        self.layer6 = self._make_layer(block, 128, stride=2)
        self.fc = nn.Linear(128, num_classes)
        # self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
        # self.bn1 = nn.BatchNorm2d(64)
        # self.relu = nn.ReLU(inplace=True)
        # self.avgpool = nn.AdaptiveAvgPool2d((1,1))

        # self.layer1 = self._make_layer(block, 64, layers[0])
        # self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        # self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        # self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # self.fc = nn.Linear(512, num_classes)
        return

    def _make_layer(self, block, out_channels, blocks=1, stride=1):
        layers = []

        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels

        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.downsample1(x)
        out = self.downsample2(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        
        # out = self.avgpool(out)
        # out = self.conv1(x)
        # out = self.bn1(out)
        # out = self.relu(out)

        # out = self.layer1(out)
        # out = self.layer2(out)
        # out = self.layer3(out)
        # out = self.layer4(out)

        # out = self.avgpool(out)
        # out = torch.flatten(out, 1)
        # out = self.fc(out)

        return out

model = ResNet(ResidualBlock, num_classes=10)
summary(model, (128,1,28,28))

  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super(TypedStorage, self).__sizeof__() + self.nbytes()


Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [128, 10]                 --
├─Sequential: 1-1                        [128, 1, 14, 14]          --
│    └─Conv2d: 2-1                       [128, 1, 14, 14]          1
│    └─BatchNorm2d: 2-2                  [128, 1, 14, 14]          2
├─Sequential: 1-2                        [128, 1, 7, 7]            --
│    └─Conv2d: 2-3                       [128, 1, 7, 7]            1
│    └─BatchNorm2d: 2-4                  [128, 1, 7, 7]            2
├─Sequential: 1-3                        [128, 32, 7, 7]           --
│    └─ResidualBlock: 2-5                [128, 32, 7, 7]           --
│    │    └─Sequential: 3-1              [128, 32, 7, 7]           96
│    │    └─Conv2d: 3-2                  [128, 32, 7, 7]           288
│    │    └─BatchNorm2d: 3-3             [128, 32, 7, 7]           64
│    │    └─ReLU: 3-4                    [128, 32, 7, 7]           --
│    │    └─Conv2d

In [5]:
len(train_loader)

468

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm 
import matplotlib.pyplot as plt

# 하이퍼파라미터 설정

EPOCH = 10
LR = 1e-2
accum_loss = 0
train_loss_arr = []
test_loss_arr = []
accum_acc = 0
acc_arr = []

# 모델 초기화 및 손실 함수, 최적화 알고리즘 설정
# model = ResNet(ResidualBlock, num_classes=10).to(device)
model.to(device) # summary 함수는 모델을 cpu로 꺼내버림
loss_func = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# 모델 학습
for epoch in range(EPOCH):
    model.train()
    with tqdm(total=len(train_loader), desc='Train') as pbar:
        for x,y in train_loader:
            optimizer.zero_grad()
            train_output = model(x.to(device))
            train_loss = loss_func(train_output, y.to(device))
            train_loss.backward()
            optimizer.step()
            pbar.update(1)
            accum_loss += train_loss.cpu().item()
    cur_train_loss = accum_loss/len(train_loader) # (아마도) loss는 미니배치 한 개에 들어있는 데이터 개수 만큼의 loss를 의미할 것임.
    train_loss_arr.append(cur_train_loss)
    accum_loss = 0
        
    model.eval()
    with torch.no_grad():
        with tqdm(total=len(test_loader), desc='Evaluation') as pbar:
            for x,y in test_loader:
                test_output = model(x.to(device))
                test_loss = loss_func(test_output, y.to(device))
                accum_loss += test_loss.cpu().item()
                max_prob, pred_idx = torch.max(test_output.data, 1)
                accum_acc += (pred_idx.cpu() == y).sum().item()
                pbar.update(1)
    cur_test_loss = accum_loss/len(test_loader)
    test_loss_arr.append(cur_test_loss)
    cur_acc = 100*accum_acc/len(test_loader.dataset)
    acc_arr.append(cur_acc)
    accum_loss = 0
    accum_acc = 0
    print("Epoch [{}/{}] Train_Loss: {:.4f}, Test_Loss: {:.4f}, Test_acc: {:.2f}".format(epoch+1, EPOCH, cur_train_loss,cur_test_loss, cur_acc))
    

Train: 100%|██████████| 468/468 [00:48<00:00,  9.67it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 45.82it/s]


Epoch [1/10] Train_Loss: 0.6216, Test_Loss: 0.5225, Test_acc: 82.93


Train: 100%|██████████| 468/468 [00:46<00:00, 10.13it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 49.58it/s]


Epoch [2/10] Train_Loss: 0.4556, Test_Loss: 0.4336, Test_acc: 85.48


Train: 100%|██████████| 468/468 [00:44<00:00, 10.49it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 49.94it/s]


Epoch [3/10] Train_Loss: 0.4112, Test_Loss: 0.4263, Test_acc: 85.57


Train: 100%|██████████| 468/468 [00:45<00:00, 10.21it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 46.17it/s]


Epoch [4/10] Train_Loss: 0.3829, Test_Loss: 0.3957, Test_acc: 86.69


Train: 100%|██████████| 468/468 [00:46<00:00, 10.06it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 49.07it/s]


Epoch [5/10] Train_Loss: 0.3619, Test_Loss: 0.4185, Test_acc: 86.40


Train: 100%|██████████| 468/468 [00:45<00:00, 10.31it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 48.15it/s]


Epoch [6/10] Train_Loss: 0.3470, Test_Loss: 0.3916, Test_acc: 87.15


Train: 100%|██████████| 468/468 [00:45<00:00, 10.24it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 49.38it/s]


Epoch [7/10] Train_Loss: 0.3352, Test_Loss: 0.3806, Test_acc: 87.49


Train: 100%|██████████| 468/468 [00:45<00:00, 10.33it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 48.98it/s]


Epoch [8/10] Train_Loss: 0.3201, Test_Loss: 0.3785, Test_acc: 88.19


Train: 100%|██████████| 468/468 [00:45<00:00, 10.39it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 49.01it/s]


Epoch [9/10] Train_Loss: 0.3087, Test_Loss: 0.5343, Test_acc: 82.76


Train: 100%|██████████| 468/468 [00:44<00:00, 10.40it/s]
Evaluation: 100%|██████████| 78/78 [00:01<00:00, 48.00it/s]

Epoch [10/10] Train_Loss: 0.2999, Test_Loss: 0.4361, Test_acc: 86.04



