## Mnist back propagation

In [12]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

In [13]:
device = 'cpu'

In [14]:
torch.manual_seed(777)

<torch._C.Generator at 0x212bb0d1e90>

In [15]:
# 파라미터 설정
learning_rate = 0.5
batch_size = 10

In [16]:
# MNIST 데이터셋 호출
mnist_train = dsets.MNIST(root = "C:\TIL\CUAI 4기\모두를 위한 딥러닝\MNIST_data", 
                          train=True, transform=transforms.ToTensor(), download=True)

mnist_test = dsets.MNIST(root = "C:\TIL\CUAI 4기\모두를 위한 딥러닝\MNIST_data", 
                          train=False, transform=transforms.ToTensor(), download=True)

In [17]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                        batch_size = batch_size,
                                        shuffle = True,
                                        drop_last = True)

#### MLP 내부

In [18]:
# mnist 데이터는 한 이미지 당 784개(28*28)의 픽셀로 이루어짐
# 입력 784, 최종 출력 10
w1 = torch.nn.Parameter(torch.Tensor(784,30)).to(device) # 1층의 가중치
b1 = torch.nn.Parameter(torch.Tensor(30)).to(device) # 1층의 편향
w2 = torch.nn.Parameter(torch.Tensor(30,10)).to(device) # 2층의 가중치
b2 = torch.nn.Parameter(torch.Tensor(10)).to(device) # 2층의 편향

In [19]:
# 가중치 초기화
torch.nn.init.normal_(w1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(w2)
torch.nn.init.normal_(b2)

Parameter containing:
tensor([ 0.3078, -1.9857,  1.0512,  1.5122, -1.0199, -0.7402, -1.3111,  0.6142,
        -0.6474,  0.1758], requires_grad=True)

#### 시그모이드 함수

In [20]:
def sigmoid(x):
    return 1.0 / (1.0 + torch.exp(-x))

In [21]:
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

#### 모델 학습

In [22]:
X_test = mnist_test.test_data.view(-1,28*28).float().to(device)[:1000] # 1000개 데이터만 학습
y_test = mnist_test.test_labels.to(device)[:1000]

i=0
while not i == 10000 :
    for X, Y in data_loader:
        i = i + 1
        
        # forward
        X = X.view(-1,28*28).to(device)
        Y = torch.zeros((batch_size, 10)).scatter_(1, Y.unsqueeze(1), 1).to(device) # one hot
        l1 = torch.add(torch.matmul(X,w1), b1) # layer 1
        a1 = sigmoid(l1)
        l2 = torch.add(torch.matmul(a1,w2),b2) # layer 2
        y_pred = sigmoid(l2)
        
        diff = y_pred - Y
        
        # backpropagation
        d_l2 = diff * sigmoid_prime(l2)
        d_b2 = d_l2
        d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_l2)
        
        d_a1 = torch.matmul(d_l2, torch.transpose(w2, 0, 1))
        d_l1 = d_a1 * sigmoid_prime(l1)
        d_b1 = d_l1
        d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_l1)
        
        w1 = w1 - learning_rate * d_w1
        b1 = b1 - learning_rate * torch.mean(d_b1, 0)
        w2 = w2 - learning_rate * d_w2
        b2 = b2 - learning_rate * torch.mean(d_b2, 0)
        
        if i % 1000 == 0:
            l1 = torch.add(torch.matmul(X_test, w1), b1)
            a1 = sigmoid(l1)
            l2 = torch.add(torch.matmul(a1, w2), b2)
            y_pred = sigmoid(l2)
            acct_mat = torch.argmax(y_pred, 1) == y_test
            acct_res = acct_mat.sum()
            print("{:.2f} %".format(acct_res.item()/1000 * 100))

        if i == 10000:
            break
        

80.60 %
85.40 %
88.50 %
89.10 %
88.80 %
88.90 %
90.10 %
90.80 %
88.70 %
90.60 %
