In [1]:
import torch

t = torch.FloatTensor([0., 1., 2., 3., 4., 5., 6.])

print(t.dim())
print(t.shape)
print(t.size())

1
torch.Size([7])
torch.Size([7])


In [2]:
import torch

t = torch.FloatTensor([[[0, 1, 2],
                        [3, 4, 5]],
                        [[6, 7, 8],
                        [9, 10, 11]]])
print(t.shape)

torch.Size([2, 2, 3])


In [None]:
# 02.04. class


class Calculator:
    # class: 여러 메서드들을 객체 형태로 독립적으로 생성할 수 있는 몰드같은 느낌
    def __init__(self): # __init__ 메서드: 파이썬에서 생성자로 자동 인식함
        self.result = 0

    def add(self, num):
        # class 내부 함수: method 라고 부름
        self.result += num # 메서드에서 선언된 num: 객체 변수
        return self.result


cal1 = Calculator() # 선언한 cal1 객체: self 변수로 자동으로 정해짐
cal2 = Calculator()

print(cal1.add(3)) # class.method() 호출 방식 사용
print(cal1.add(4))
print(cal2.add(3))
print(cal2.add(7))

In [None]:
# 03.04. nn.Module로 구현하는 선형 회귀

import torch


# 항상 동일 결과가 나오게
torch.manual_seed(1)

# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 모델을 선언 및 초기화 / 단순 선형 회귀이므로 input_dim=1, output_dim=1
model = torch.nn.Linear(in_features=3, out_features=1, bias=True)

# optimizer 설정 / 경사 하강법 SGD를 사용하고 learning rate를 의미하는 lr
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

# 전체 훈련 데이터에 대해 경사 하강법을 2,000회 반복
nb_epochs = 2_000
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    prediction = model(x_train)

    # cost 계산 / 파이토치 제공 평균 제곱 오차 함수
    cost = torch.nn.functional.mse_loss(prediction, y_train)

    # cost로 H(x) 개선하는 부분
    # gradient를 0으로 초기화
    optimizer.zero_grad()

    # 비용 함수를 미분하여 gradient 계산
    # backward 연산
    cost.backward()

    # W와 b를 업데이트
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f'Epoch {epoch}/{nb_epochs}, Cost: {cost.item():.6f}')

# 학습 후 W 및 b 값 출력
print(list(model.parameters()))

# 임의의 입력값 [73, 80, 75] 선언
new_x = torch.FloatTensor([[73, 80, 75]])

# 임의의 입력값에 대해서 예측값 pred_y를 리턴받아서 저장 및 출력
pred_y = model(new_x)
print(f'예측값: {pred_y}')

Epoch 0/2000, Cost: 31667.597656
Epoch 100/2000, Cost: 0.225993
Epoch 200/2000, Cost: 0.223911
Epoch 300/2000, Cost: 0.221941
Epoch 400/2000, Cost: 0.220059
Epoch 500/2000, Cost: 0.218271
Epoch 600/2000, Cost: 0.216575
Epoch 700/2000, Cost: 0.214950
Epoch 800/2000, Cost: 0.213413
Epoch 900/2000, Cost: 0.211952
Epoch 1000/2000, Cost: 0.210560
Epoch 1100/2000, Cost: 0.209232
Epoch 1200/2000, Cost: 0.207967
Epoch 1300/2000, Cost: 0.206761
Epoch 1400/2000, Cost: 0.205619
Epoch 1500/2000, Cost: 0.204522
Epoch 1600/2000, Cost: 0.203484
Epoch 1700/2000, Cost: 0.202485
Epoch 1800/2000, Cost: 0.201542
Epoch 1900/2000, Cost: 0.200635
Epoch 2000/2000, Cost: 0.199769
[Parameter containing:
tensor([[0.9778, 0.4539, 0.5768]], requires_grad=True), Parameter containing:
tensor([0.2802], requires_grad=True)]
예측값: tensor([[151.2305]], grad_fn=<AddmmBackward>)


In [None]:
# 03.05 클래스로 파이토치 모델 구현하기

import torch


# 항상 동일 결과가 나오게
torch.manual_seed(1)

# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# torch.nn.Module을 상속받는 파이썬 클래스
class MultivariateLinearRegressionModel(torch.nn.Module):
    def __init__(self):
        # super() 함수는 상속하는 nn.Module 클래스의 속성을 가지고 초기화됨
        super().__init__()
        self.linear = torch.nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)


model = MultivariateLinearRegressionModel()

optimizer = torch.optim.SGD(model.parameters(), lr=1e-5) 

# 전체 훈련 데이터에 대해 경사 하강법을 2,000회 반복
nb_epochs = 2_000
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    prediction = model(x_train)

    # cost 계산 / 파이토치 제공 평균 제곱 오차 함수
    cost = torch.nn.functional.mse_loss(prediction, y_train)

    # cost로 H(x) 개선하는 부분
    # gradient를 0으로 초기화
    optimizer.zero_grad()

    # 비용 함수를 미분하여 gradient 계산
    # backward 연산
    cost.backward()
    
    # W와 b를 업데이트
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print(f'Epoch {epoch}/{nb_epochs}, Cost: {cost.item():.6f}')

# 학습 후 W 및 b 값 출력
print(list(model.parameters()))

Epoch 0/2000, Cost: 31667.597656
Epoch 100/2000, Cost: 0.225993
Epoch 200/2000, Cost: 0.223911
Epoch 300/2000, Cost: 0.221941
Epoch 400/2000, Cost: 0.220059
Epoch 500/2000, Cost: 0.218271
Epoch 600/2000, Cost: 0.216575
Epoch 700/2000, Cost: 0.214950
Epoch 800/2000, Cost: 0.213413
Epoch 900/2000, Cost: 0.211952
Epoch 1000/2000, Cost: 0.210560
Epoch 1100/2000, Cost: 0.209232
Epoch 1200/2000, Cost: 0.207967
Epoch 1300/2000, Cost: 0.206761
Epoch 1400/2000, Cost: 0.205619
Epoch 1500/2000, Cost: 0.204522
Epoch 1600/2000, Cost: 0.203484
Epoch 1700/2000, Cost: 0.202485
Epoch 1800/2000, Cost: 0.201542
Epoch 1900/2000, Cost: 0.200635
Epoch 2000/2000, Cost: 0.199769
[Parameter containing:
tensor([[0.9778, 0.4539, 0.5768]], requires_grad=True), Parameter containing:
tensor([0.2802], requires_grad=True)]


In [None]:
# 03.06. 미니 배치와 데이터 로드 (Mini Batch and Data Load)

import torch


# epoch: 전체 훈련 데이터가 학습에 1번 사용된 주기
# iteration: 미니 배치 훈련 데이터가 학습이 1번 사용된 주기

# 데이터
x_train = torch.FloatTensor([[73, 80, 75], 
                             [93, 88, 93], 
                             [89, 91, 90], 
                             [96, 98, 100],   
                             [73, 66, 70]])  
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 데이터셋 저장
dataset = torch.utils.data.TensorDataset(x_train, y_train)

# 데이터셋 호출
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=2, shuffle=True)

# 모델과 옵티마이저 설계
model = torch.nn.Linear(in_features=3, out_features=1, bias=True)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

# 학습 시작
nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader):
        # print(batch_idx)
        # print(samples)
        x_train, y_train = samples

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = torch.nn.functional.mse_loss(prediction, y_train)

        # cost로 H(x) 계산
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        # 학습 결과 출력
        print(f'Epoch:{epoch}/{nb_epochs}, Batch:{batch_idx + 1}/{len(dataloader)}, Cost:{cost.item()}')

Epoch:0/20, Batch:1/3, Cost:40394.078125
Epoch:0/20, Batch:2/3, Cost:10625.84765625
Epoch:0/20, Batch:3/3, Cost:5451.56982421875
Epoch:1/20, Batch:1/3, Cost:937.502685546875
Epoch:1/20, Batch:2/3, Cost:265.94464111328125
Epoch:1/20, Batch:3/3, Cost:244.42251586914062
Epoch:2/20, Batch:1/3, Cost:3.5231244564056396
Epoch:2/20, Batch:2/3, Cost:55.83884811401367
Epoch:2/20, Batch:3/3, Cost:2.3711094856262207
Epoch:3/20, Batch:1/3, Cost:21.61503791809082
Epoch:3/20, Batch:2/3, Cost:1.5361597537994385
Epoch:3/20, Batch:3/3, Cost:23.620941162109375
Epoch:4/20, Batch:1/3, Cost:12.848325729370117
Epoch:4/20, Batch:2/3, Cost:17.57857894897461
Epoch:4/20, Batch:3/3, Cost:18.382184982299805
Epoch:5/20, Batch:1/3, Cost:11.40179443359375
Epoch:5/20, Batch:2/3, Cost:20.961008071899414
Epoch:5/20, Batch:3/3, Cost:28.93952751159668
Epoch:6/20, Batch:1/3, Cost:14.807928085327148
Epoch:6/20, Batch:2/3, Cost:2.5992140769958496
Epoch:6/20, Batch:3/3, Cost:33.501407623291016
Epoch:7/20, Batch:1/3, Cost:16.8

In [None]:
# 03.07. 커스텀 데이터셋 (Custom Dataset)

import torch


# 커스텀 데이터셋 클래스
class CustomDataset(torch.utils.data.Dataset):
    # 데이터셋의 전처리를 해주는 부분
    def __init__(self):
        self.x_data = [[73, 80, 75],
                       [93, 88, 93],
                       [89, 91, 90],
                       [96, 98, 100],
                       [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]

    # 데이터셋의 길이. 즉, 총 샘플의 수를 적어주는 부분
    def __len__(self):
        return len(self.x_data)

    # 데이터셋에서 특정 1개의 샘플을 가져오는 함수
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x, y


# 데이터 셋 저장
dataset = CustomDataset()

# 데이터 셋 호출
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=2, shuffle=True)

# 모델과 옵티마이저 설계
model = torch.nn.Linear(in_features=3, out_features=1, bias=True)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

# 학습 시작
nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader):
        # print(batch_idx)
        # print(samples)
        x_train, y_train = samples

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = torch.nn.functional.mse_loss(prediction, y_train)

        # cost로 H(x) 계산
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        # 학습 결과 출력
        print(f'Epoch:{epoch}/{nb_epochs}, Batch:{batch_idx + 1}/{len(dataloader)}, Cost:{cost.item()}')

Epoch:0/20, Batch:1/3, Cost:40548.7421875
Epoch:0/20, Batch:2/3, Cost:7291.49462890625
Epoch:0/20, Batch:3/3, Cost:1811.4722900390625
Epoch:1/20, Batch:1/3, Cost:1189.89306640625
Epoch:1/20, Batch:2/3, Cost:401.4664306640625
Epoch:1/20, Batch:3/3, Cost:98.8622055053711
Epoch:2/20, Batch:1/3, Cost:59.92449188232422
Epoch:2/20, Batch:2/3, Cost:5.559006690979004
Epoch:2/20, Batch:3/3, Cost:0.0050149112939834595
Epoch:3/20, Batch:1/3, Cost:7.23870849609375
Epoch:3/20, Batch:2/3, Cost:0.5797872543334961
Epoch:3/20, Batch:3/3, Cost:9.432348251342773
Epoch:4/20, Batch:1/3, Cost:2.596285104751587
Epoch:4/20, Batch:2/3, Cost:6.112614154815674
Epoch:4/20, Batch:3/3, Cost:0.1735772043466568
Epoch:5/20, Batch:1/3, Cost:0.5858479142189026
Epoch:5/20, Batch:2/3, Cost:3.764530658721924
Epoch:5/20, Batch:3/3, Cost:4.496255874633789
Epoch:6/20, Batch:1/3, Cost:1.7508087158203125
Epoch:6/20, Batch:2/3, Cost:2.6920955181121826
Epoch:6/20, Batch:3/3, Cost:4.092446804046631
Epoch:7/20, Batch:1/3, Cost:0.87

In [None]:
# 04.02. nn.Module로 구현하는 로지스틱 회귀

import torch


# 일정한 결과 도출
torch.manual_seed(1)

# 훈련 데이터
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

# 모듈 층 쌓기
model = torch.nn.Sequential(torch.nn.Linear(in_features=2, out_features=1, bias=True),
                            torch.nn.Sigmoid())

# optimizer 설정
optimizer = torch.optim.SGD(model.parameters(), lr=1)

# 학습 시작
nb_epochs = 1_000
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    hypothesis = model(x_train)

    # cost 계산
    cost = torch.nn.functional.binary_cross_entropy(hypothesis, y_train)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 20번마다 로그 출력
    if epoch % 100 == 0:

        # 예측값이 0.5를 넘으면 True로 간주
        prediction = hypothesis >= torch.FloatTensor([0.5])

        # 실제값과 일치하는 경우만 True로 간주
        correct_prediction = prediction.float() == y_train

        # 정확도를 계산
        accuracy = correct_prediction.sum().item() / len(correct_prediction)

        # 학습 결과 출력
        print(f'Epoch:{epoch}/{nb_epochs}, Cost:{cost.item()}, Accuracy:{accuracy * 100}')

print(list(model.parameters()))

Epoch:0/1000, Cost:0.5397130846977234, Accuracy:83.33333333333334
Epoch:100/1000, Cost:0.1342717856168747, Accuracy:100.0
Epoch:200/1000, Cost:0.08048570901155472, Accuracy:100.0
Epoch:300/1000, Cost:0.05782029405236244, Accuracy:100.0
Epoch:400/1000, Cost:0.045251354575157166, Accuracy:100.0
Epoch:500/1000, Cost:0.037228479981422424, Accuracy:100.0
Epoch:600/1000, Cost:0.0316491425037384, Accuracy:100.0
Epoch:700/1000, Cost:0.027538282796740532, Accuracy:100.0
Epoch:800/1000, Cost:0.024380534887313843, Accuracy:100.0
Epoch:900/1000, Cost:0.021877193823456764, Accuracy:100.0
Epoch:1000/1000, Cost:0.019843030720949173, Accuracy:100.0
[Parameter containing:
tensor([[3.2534, 1.5181]], requires_grad=True), Parameter containing:
tensor([-14.4839], requires_grad=True)]


In [None]:
# 05.05 소프트맥스 회귀로 MNIST 데이터 분류하기

import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
import random


# GPU 사용
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda' if USE_CUDA else 'cpu')
print(f'다음 기기로 학습합니다: {device}')

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# hyperparameters
training_epochs = 15
batch_size = 100

# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

# dataset loader
data_loader = DataLoader(dataset=mnist_train,
                         batch_size=batch_size,
                         shuffle=True,
                         drop_last=True)

# MNIST data image of shape 28 x 28 = 784
linear = nn.Linear(784, 10, bias=True).to(device)

# 비용 함수와 옵티마이저 정의
# 내부적으로 소프트맥스 함수를 포함하고 있음
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(linear.parameters(), lr=0.1)

# 학습 시작
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)

    for X, Y in data_loader:
        
        # 배치 크기가 100이므로 아래의 연산에서 X는 (100, 784)의 텐서
        X = X.view(-1, 28 * 28).to(device)
        
        # 레이블은 원-핫 인코딩이 된 상태가 아니라 0 ~ 9의 정수
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = linear(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

# 테스트 데이터를 사용하여 모델을 테스트
# torch.no_grad()를 하면 gradient 계산을 수행하지 않음
with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = linear(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # MNIST 테스트 데이터에서 무작위로 하나를 뽑아서 예측
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = linear(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

    plt.imshow(mnist_test.test_data[r:r + 1].view(28, 28), cmap='Greys', interpolation='nearest')
    plt.show()

다음 기기로 학습합니다: cuda
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Epoch: 0001 cost = 0.535150588
Epoch: 0002 cost = 0.359577715
Epoch: 0003 cost = 0.331264228
Epoch: 0004 cost = 0.316404670
Epoch: 0005 cost = 0.307106942
Epoch: 0006 cost = 0.300456554
Epoch: 0007 cost = 0.294933408
Epoch: 0008 cost = 0.290956199
Epoch: 0009 cost = 0.287074089
Epoch: 0010 cost = 0.284515619
Epoch: 0011 cost = 0.281914026
Epoch: 0012 cost = 0.279526860
Epoch: 0013 cost = 0.277636588
Epoch: 0014 cost = 0.275874794
Epoch: 0015 cost = 0.274422735
Learning finished


In [None]:
# 07.03. 깊은 CNN으로 MNIST 분류하기

# Import all libraries
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init


# Using GPU
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

# 랜덤 시드 고정
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# 학습 파라미터 설정
learning_rate = 0.001
training_epochs = 15
batch_size = 100

# 데이터 셋 정의
mnist_train = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                          train=True, # True를 지정하면 훈련 데이터로 다운로드
                          transform=transforms.ToTensor(), # 텐서로 변환
                          download=True)

mnist_test = dsets.MNIST(root='MNIST_data/', # 다운로드 경로 지정
                         train=False, # False를 지정하면 테스트 데이터로 다운로드
                         transform=transforms.ToTensor(), # 텐서로 변환
                         download=True)

# 데이터 로드 정의
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

# 모델 클래스 정의
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        # L1 ImgIn shape=(?, 28, 28, 1)
        #    Conv     -> (?, 28, 28, 32)
        #    Pool     -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
                                          torch.nn.ReLU(),
                                          torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # L2 ImgIn shape=(?, 14, 14, 32)
        #    Conv      ->(?, 14, 14, 64)
        #    Pool      ->(?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
                                          torch.nn.ReLU(),
                                          torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # L3 ImgIn shape=(?, 7, 7, 64)
        #    Conv      ->(?, 7, 7, 128)
        #    Pool      ->(?, 4, 4, 128)
        self.layer3 = torch.nn.Sequential(torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
                                          torch.nn.ReLU(),
                                          torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))

        # L4 FC 4x4x128 inputs -> 625 outputs
        self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = torch.nn.Sequential(self.fc1,
                                          torch.nn.ReLU(),
                                          torch.nn.Dropout(p=1 - self.keep_prob))

        # L5 Final FC 625 inputs -> 10 outputs
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1) # Flatten for FCL
        out = self.layer4(out)
        out = self.fc2(out)
        return out


# CNN 모델 정의
model = CNN().to(device)

# 비용 함수와 옵티마이저 정의
# 비용 함수에 소프트맥스 함수 포함
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 총 배치 수 정의
total_batch = len(data_loader)

# 학습 시작
for epoch in range(training_epochs):
    avg_cost = 0

    # 미니 배치 단위로 호출, X: 미니 배치, Y: 레이블
    for X, Y in data_loader:
        # image is already size of (28x28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    # 학습 결과 출력
    print(f'[Epoch: {epoch + 1}], Cost: {avg_cost:0.3f}')

# 테스트 진행
# 학습을 진행하지 않을 것이므로 torch.no_grad()
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

[Epoch: 1], Cost: 0.191
[Epoch: 2], Cost: 0.054
[Epoch: 3], Cost: 0.036
[Epoch: 4], Cost: 0.029
[Epoch: 5], Cost: 0.024
[Epoch: 6], Cost: 0.021
[Epoch: 7], Cost: 0.017
[Epoch: 8], Cost: 0.016
[Epoch: 9], Cost: 0.012
[Epoch: 10], Cost: 0.013
[Epoch: 11], Cost: 0.011
[Epoch: 12], Cost: 0.010
[Epoch: 13], Cost: 0.009
[Epoch: 14], Cost: 0.008
[Epoch: 15], Cost: 0.006
Accuracy: 0.9815999865531921


