<a href="https://colab.research.google.com/github/seunghwan1228/TF-Torch/blob/main/pytorch_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import os
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision
from torchvision import datasets, models, transforms


from tqdm.notebook import tqdm

import time

In [3]:
DATA_PATH = '.'
CKPT_PATH = './saved_model'

train_transforms = transforms.Compose([
                                       transforms.ToTensor(),
                                       ])


train_data = torchvision.datasets.MNIST(root = DATA_PATH, train=True, download=True, transform=train_transforms)
valid_data = torchvision.datasets.MNIST(root = DATA_PATH, train=False, download=True, transform=train_transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
# Data Loader

train_loader = torch.utils.data.DataLoader(train_data, batch_size=512, shuffle=True, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=512, shuffle=False, pin_memory=True)

In [5]:
tmp_img, tmp_label = next(iter(train_loader))

In [6]:
tmp_img.shape, tmp_img.mean(), tmp_img.std()

(torch.Size([512, 1, 28, 28]), tensor(0.1342), tensor(0.3118))

In [7]:
class MnistLinearModel(nn.Module):
  def __init__(self, num_units, num_layers, output_classes):
    super(MnistLinearModel, self).__init__()
    self.input_layer = nn.Linear(28*28*1, num_units)

    self.layers = nn.ModuleList(self.linear_act(num_units, num_units) for _ in range(num_layers))
    self.output_layer = nn.Linear(num_units, output_classes)

  def linear_act(self, c_in, c_out):
    return nn.Sequential(nn.Linear(c_in, c_out),
                         nn.ReLU())

  def forward(self, x):
    b_size = x.size(0)
    x = x.view(b_size, -1)
    x = F.relu(self.input_layer(x))

    for l in self.layers:
      x = l(x)
    
    return self.output_layer(x)

In [8]:
tmp_model = MnistLinearModel(32, 5, 10)

In [9]:
tmp_model

MnistLinearModel(
  (input_layer): Linear(in_features=784, out_features=32, bias=True)
  (layers): ModuleList(
    (0): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ReLU()
    )
    (1): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ReLU()
    )
    (2): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ReLU()
    )
    (3): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ReLU()
    )
    (4): Sequential(
      (0): Linear(in_features=32, out_features=32, bias=True)
      (1): ReLU()
    )
  )
  (output_layer): Linear(in_features=32, out_features=10, bias=True)
)

In [10]:
def train(model, train_data, valid_data, loss_module, optimizer, epochs=100,):
  __start = time.time()
  for epoch in tqdm(range(1, epochs+1)):
    model.train()

    cnt = 0
    true_pred = 0

    for img, label in train_data:
      img, label = img.to(device), label.to(device)
      optimizer.zero_grad()
      pred = model(img)
      loss = loss_module(pred, label)
      loss.backward()
      optimizer.step()
      
      true_pred += (torch.argmax(pred, dim=1) == label).sum().item()
      cnt += label.shape[0]
    
    train_acc = true_pred / cnt

    if epoch % 10 == 0:
      print('train:', train_acc)

    model.eval()
    valid_cnt = 0
    valid_true_pred = 0
    for v_img, v_label in valid_data:
      v_img, v_label = v_img.to(device), v_label.to(device)
      with torch.no_grad():
        v_pred = model(v_img)
        loss = loss_module(v_pred, v_label)

        valid_true_pred += (v_pred.argmax(dim=1) == v_label).sum().item()
        valid_cnt += v_img.shape[0]

    valid_acc = valid_true_pred / valid_cnt
    if epoch % 10 == 0:
      print('valid:', valid_acc)
  print(f'Complete: {time.time() - __start}')

In [11]:
# GPU: K80
device = torch.device('cuda')

model = MnistLinearModel(32, 5, 10).to(device)
loss_module = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)


train(model, train_loader, valid_loader, loss_module, optimizer)

  0%|          | 0/100 [00:00<?, ?it/s]

train: 0.9429166666666666
valid: 0.937
train: 0.9655833333333333
valid: 0.9563
train: 0.97795
valid: 0.9584
train: 0.9826166666666667
valid: 0.9595
train: 0.9861166666666666
valid: 0.9598
train: 0.9872833333333333
valid: 0.9582
train: 0.9921166666666666
valid: 0.9588
train: 0.9928666666666667
valid: 0.9613
train: 0.9953
valid: 0.9574
train: 0.9936833333333334
valid: 0.9595
Complete: 1007.0228490829468
