In [2]:
import torch
import torch.nn as nn
import random
import torchvision.datasets as dsets
from torchvision import transforms

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
  torch.cuda.manual_seed_all(777)

In [5]:
## root = directory / train =true (train data) / transform= 형태
mnist_train =dsets.MNIST(root='MNIST_data/', train=True, transform= transforms.ToTensor(), download=True)
mnist_test =dsets.MNIST(root='MNIST_data/', train=False, transform= transforms.ToTensor(), download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



In [6]:
mnist_test

Dataset MNIST
    Number of datapoints: 10000
    Root location: MNIST_data/
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
## drop_last = batch size 안맞는거 날림
data_loader = torch.utils.data.DataLoader(mnist_train, batch_size =128, shuffle=True, drop_last=True)

In [36]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    ## 28*28 
    self.layer1= nn.Sequential(
        nn.Conv2d(1,32, kernel_size=3 ,stride=1 , padding =1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    ## pooling 거쳐서 14 *14
    self.layer2= nn.Sequential(
        nn.Conv2d(32,64, kernel_size=3 ,stride=1 , padding =1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.layer3= nn.Sequential(
        nn.Conv2d(64,128, kernel_size=3 ,stride=1 , padding =1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    ## 필터 수 *  이미지 사이즈 계산 필요 
    self.fc1 = nn.Linear(128*3*3, 128, bias=True)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(128, 10, bias=True)
    nn.init.xavier_uniform_(self.fc1.weight)
    nn.init.xavier_uniform_(self.fc2.weight)

  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)

    out = out.view(out.size(0),-1)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.fc2(out)

    return out

In [37]:
model = CNN().to(device)

In [38]:
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=1152, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [39]:
value = torch.Tensor(1,1,28,28).to(device)

In [40]:
model(value).shape

torch.Size([1, 10])

In [43]:
## loss func 으로 계산 softmax 계산 - 내부에 softmax 동작
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr =0.001 )

In [45]:
total_batch = len(data_loader)

for epoch in range(15):
  avg_cost = 0
  for X,Y in data_loader:
    
    X = X.to(device)
    Y = Y.to(device)

    ## 0으로 초기화
    optimizer.zero_grad()
    hypothesis =model(X)
    ##  손실 구하기 위해 예측값과 정답
    cost = criterion(hypothesis , Y)
    cost.backward()
    ## lr 로 weight  수정
    optimizer.step()

    avg_cost += cost / total_batch
  print('epoch', '%04d'%(epoch+1), 'cost = ','{:.9f}'.format(avg_cost))

epoch 0001 cost =  0.205241725
epoch 0002 cost =  0.046625447
epoch 0003 cost =  0.032008763
epoch 0004 cost =  0.025741601
epoch 0005 cost =  0.019730818
epoch 0006 cost =  0.016483951
epoch 0007 cost =  0.015096014
epoch 0008 cost =  0.012122239
epoch 0009 cost =  0.011444286
epoch 0010 cost =  0.008128060
epoch 0011 cost =  0.009289267
epoch 0012 cost =  0.007500156
epoch 0013 cost =  0.006400755
epoch 0014 cost =  0.004178956
epoch 0015 cost =  0.006582484
