In [12]:
%matplotlib inline
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# import check_util.checker as checker 

print('pytorch version: {}'.format(torch.__version__))
print('GPU 사용 가능 여부: {}'.format(torch.cuda.is_available()))
device = "cuda" if torch.cuda.is_available() else "cpu"

pytorch version: 1.5.0+cpu
GPU 사용 가능 여부: False


In [13]:
batch_size = 100
num_epochs = 5
learning_rate = 0.001

In [14]:
from torch.utils.data import DataLoader
root = './data'
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5,), std=(0.5,))])
train_data = dset.FashionMNIST(root=root, train=True, transform=transform, download=True)
test_data = dset.FashionMNIST(root=root, train=False, transform=transform, download=True)
## 코드 시작 ##
train_loader = DataLoader(dataset=train_data, 
                          batch_size=32, 
                          shuffle=True, 
                          num_workers=2)

test_loader = DataLoader(dataset=test_data, 
                          batch_size=32, 
                          shuffle=True, 
                          num_workers=2)
## 코드 종료 ##

In [15]:
labels_map = {0 : 'T-Shirt', 1 : 'Trouser', 2 : 'Pullover', 3 : 'Dress', 4 : 'Coat', 5 : 'Sandal', 6 : 'Shirt',
              7 : 'Sneaker', 8 : 'Bag', 9 : 'Ankle Boot'}
columns = 5
rows = 5
# fig = plt.figure(figsize=(8,8))

# for i in range(1, columns*rows+1):
#     data_idx = np.random.randint(len(train_data))
#     img = train_data[data_idx][0][0,:,:].numpy() # numpy()를 통해 torch Tensor를 numpy array로 변환
#     label = labels_map[train_data[data_idx][1]] # item()을 통해 torch Tensor를 숫자로 변환
    
#     fig.add_subplot(rows, columns, i)
#     plt.title(label)
# #     plt.imshow(img, cmap='gray')
#     plt.axis('off')
# plt.show()

In [24]:
class DNN(nn.Module):
    def __init__(self, num_classes=10):
        super(DNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.BatchNorm1d(512),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Linear(128, 32),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        self.layer4 = nn.Sequential(
            nn.Linear(32,10)
        )
    
    def forward(self, x):
        x = x.view(x.size(0), -1) # flatten
        x_out = self.layer1(x)
        x_out = self.layer2(x_out)
        x_out = self.layer3(x_out)
        x_out = self.layer4(x_out)

        return x_out

In [25]:
def weights_init(m):
    if isinstance(m, nn.Linear): # 모델의 모든 MLP 레이어에 대해서
        nn.init.xavier_normal_(m.weight) # Weight를 xavier_normal로 초기화
        print(m.weight)

In [26]:
torch.manual_seed(7777) # 일관된 weight initialization을 위한 random seed 설정
model = DNN().to(device)
model.apply(weights_init) # 모델에 weight_init 함수를 적용하여 weight를 초기화

Parameter containing:
tensor([[-0.0199,  0.0633, -0.0464,  ...,  0.0558,  0.0085,  0.1140],
        [-0.1089, -0.0388,  0.0418,  ...,  0.0677,  0.0866, -0.1158],
        [-0.0513, -0.0045, -0.0182,  ..., -0.0374,  0.0525,  0.0485],
        ...,
        [ 0.0277, -0.0726, -0.0044,  ...,  0.0335, -0.0817,  0.0323],
        [ 0.0086, -0.0067,  0.0321,  ...,  0.0402,  0.0288, -0.0947],
        [-0.0183, -0.0171, -0.0010,  ..., -0.0124,  0.0222, -0.0565]],
       requires_grad=True)
Parameter containing:
tensor([[-3.1014e-02, -4.6322e-02, -3.9021e-02,  ...,  4.3780e-02,
         -6.3242e-02, -3.1698e-02],
        [ 3.4446e-02,  5.1718e-02,  2.0039e-02,  ..., -3.5063e-05,
          4.6842e-02, -7.9096e-02],
        [-6.3938e-03, -4.2534e-02,  6.3819e-02,  ...,  1.2139e-02,
         -3.1255e-02, -3.3414e-02],
        ...,
        [ 1.6447e-02,  1.8054e-02,  5.2685e-02,  ...,  3.2749e-02,
         -8.8620e-02, -3.7797e-02],
        [ 6.9293e-02,  3.6293e-02,  1.2373e-01,  ...,  5.5416e-02,
   

DNN(
  (layer1): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer2): Sequential(
    (0): Linear(in_features=512, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer3): Sequential(
    (0): Linear(in_features=128, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Linear(in_features=32, out_features=10, bias=True)
  )
)

In [27]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [28]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x17c698bcb08>

In [29]:
for epoch in range(num_epochs):
    for i, (imgs, labels) in enumerate(train_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _, argmax = torch.max(outputs, 1)
        accuracy = (labels == argmax).float().mean()
        
        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'.format(
                epoch+1, num_epochs, i+1, len(train_loader), loss.item(), accuracy.item() * 100))

Epoch [1/5], Step [100/1875], Loss: 0.8865, Accuracy: 71.88%
Epoch [1/5], Step [200/1875], Loss: 0.6996, Accuracy: 78.12%
Epoch [1/5], Step [300/1875], Loss: 0.5967, Accuracy: 84.38%
Epoch [1/5], Step [400/1875], Loss: 0.5960, Accuracy: 71.88%
Epoch [1/5], Step [500/1875], Loss: 0.8688, Accuracy: 68.75%
Epoch [1/5], Step [600/1875], Loss: 0.4206, Accuracy: 84.38%
Epoch [1/5], Step [700/1875], Loss: 0.5857, Accuracy: 81.25%
Epoch [1/5], Step [800/1875], Loss: 0.4237, Accuracy: 84.38%
Epoch [1/5], Step [900/1875], Loss: 0.2859, Accuracy: 87.50%
Epoch [1/5], Step [1000/1875], Loss: 0.2824, Accuracy: 90.62%
Epoch [1/5], Step [1100/1875], Loss: 0.4811, Accuracy: 78.12%
Epoch [1/5], Step [1200/1875], Loss: 0.3787, Accuracy: 81.25%
Epoch [1/5], Step [1300/1875], Loss: 0.2417, Accuracy: 90.62%
Epoch [1/5], Step [1400/1875], Loss: 0.2560, Accuracy: 93.75%
Epoch [1/5], Step [1500/1875], Loss: 0.6044, Accuracy: 75.00%
Epoch [1/5], Step [1600/1875], Loss: 0.4351, Accuracy: 84.38%
Epoch [1/5], Step

In [30]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for i, (imgs, labels) in enumerate(test_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, argmax = torch.max(outputs, 1) # max()를 통해 최종 출력이 가장 높은 class 선택
        total += imgs.size(0)
        correct += (labels == argmax).sum().item()
    
    print('Test accuracy for {} images: {:.2f}%'.format(total, correct / total * 100))

Test accuracy for 10000 images: 88.73%


dead kernel 발생하여 plot 하지 않음.

In [31]:
columns = 5
rows = 5
# fig = plt.figure(figsize=(8,8))

model.eval()
for i in range(1, columns*rows+1):
    data_idx = np.random.randint(len(test_data))
    input_img = test_data[data_idx][0].unsqueeze(dim=0).to(device) 
    '''
    unsqueeze()를 통해 입력 이미지의 shape을 (1, 28, 28)에서 (1, 1, 28, 28)로 변환. 
    모델에 들어가는 입력 이미지의 shape은 (batch_size, channel, width, height) 되어야 함에 주의하세요!
    '''
    output = model(input_img)
    _, argmax = torch.max(output, 1)
    pred = labels_map[argmax.item()]
    label = labels_map[test_data[data_idx][1]]
    
#     fig.add_subplot(rows, columns, i)
#     if pred == label:
#         plt.title(pred + '(O)')
#     else:
#         plt.title(pred + '(X)' + ' / ' + label)
#     plot_img = test_data[data_idx][0][0,:,:]
#     plt.imshow(plot_img, cmap='gray')
#     plt.axis('off')
# model.train()
# plt.show()