In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
print('PyTorch version:[%s].'%(torch.__version__))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device:[%s].'%(device))

PyTorch version:[1.12.1+cu113].
device:[cuda:0].


In [7]:
from torchvision import datasets, transforms
mnist_train = datasets.MNIST(root='./data/', train = True, transform = transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data/', train = False, transform = transforms.ToTensor(), download=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [8]:
print('mnist_train:\n', mnist_train, '\n')
print('mnist_test:\n', mnist_test, '\n')
print('Done')

mnist_train:
 Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: ToTensor() 

mnist_test:
 Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test
    StandardTransform
Transform: ToTensor() 

Done


In [9]:
BATCH_SIZE = 256
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size = BATCH_SIZE, shuffle=True, num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size = BATCH_SIZE, shuffle=True, num_workers=1)

print('Done')

Done


In [None]:
class MultiLayerPerceptronClass(nn.Module):
    def __init__(self, name='mlp', xdim=784, hdim=256, ydim = 10):
        super(MultiLayerPerceptronClass,self).__init__()
        self.name = name
        self.xdim = xdim # input layer
        self.hdim = hdim # hidden layer
        self.ydim = ydim # output layer
        self.lin_1 = nn.Linear(self.xdim, self.hdim)
        self.lin_2 = nn.Linear(self.hdim, self.ydim)
        self.init_param()

    def init_param(self):
        nn.init.kaiming_normal_(self.lin_1.weight)
        nn.init.zeros_(self.lin_1.bias)
        nn.init.kaiming_normal_(self.lin_2.weight)
        nn.init.zeros_(self.lin_2.bias)
    

    def forward(self,x):
        net = x
        net = self.lin_1(net)
        net = F.relu(net)
        net = self.lin_2(net)
        return net
M = MultiLayerPerceptronClass(name='mlp', xdim = 784, hdim = 256, ydim = 10).to(device) # 사용하는 cpu or gpu로 넘겨줌
loss = nn.CrossEntropyLoss()
optm = optim.Adam(M.parameters(), lr = 1e-3)
print('Done')

In [None]:
x_numpy = np.random.rand(2,784)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = M.forward(x_torch)
y_numpy = y_torch.detach().cpu().numpy()

print('x_numpy:\n', x_numpy)
print('x_torch:\n', x_torch)
print('y_torch:\n', y_torch)
print('y_numpy:\n', y_numpy)

In [None]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx, (param_name, param) in enumerate(M.named_parameters()):
    param_numpy = param.detach().cpu().numpy()
    n_param += len(param_numpy.reshape(-1))
    print('[%d] name:[%s].'%(p_idx,param_name, param_numpy.shape))
    print('      val%s'%(param_numpy.reshape(-1)[:5]))
print('Total number of parameters:[%s].'%(format(n_param,'d')))

In [None]:
def func_eval(model, data_iter, device):
    with torch.no_grad():
        model.eval()
        n_total, n_correct = 0, 0
        for batch_in, batch_out in data_iter:
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view())