In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init as init
import torch.nn.functional as F

In [2]:
torch.manual_seed(777)
torch.cuda.manual_seed_all(777)
torch.backends.cudnn.enabled = False

In [3]:
GPU_NUM = 1
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [4]:
lr = 0.0012
#epochs = 50
epochs = 10
batch_size = 60

In [5]:
mnist_train = dsets.MNIST(root='../MNIST_data/',
                         train=True,
                         transform=transforms.ToTensor(),
                         download=True)
mnist_test = dsets.MNIST(root='../MNIST_data/',
                        train=False,
                        transform=transforms.ToTensor(),
                        download=True)

In [6]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                         shuffle=False,
                                         drop_last=True)

In [7]:
class LeNet300(nn.Module):
    def __init__(self):
        super(LeNet300, self).__init__()
        
        self.fclayer = nn.Sequential(
            nn.Linear(28*28, 300, bias = True),
            nn.ReLU(),
            nn.Linear(300, 100, bias = True),
            nn.ReLU(),
            nn.Linear(100, 10, bias = True)
        )
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fclayer(x)
        return x

In [8]:
model = LeNet300().to(device)

In [9]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = 1.2e-3)

total_batch = len(data_loader)
for epoch in range(epochs):
    running_loss = 0
    
    for X, Y in data_loader:
        model.train()
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        
        running_loss = loss / total_batch
        
    with torch.no_grad():
        model.eval()
        """
        X_2 = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
        Y_2 = mnist_test.test_labels.to(device)
        outputs = model(X_2)
        correct_prediction = torch.argmax(outputs, 1) == Y_2
        accuracy = correct_prediction.float().mean()
        """
    
        correct = 0
        total = 0
        for X, Y in test_loader:
            X = X.to(device)
            Y = Y.to(device)
            outputs = model(X)
            
            predicted = torch.argmax(outputs.data, 1)
            total += Y.size(0)
            correct += (predicted == Y).sum().item()
   # print('    [Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, running_loss),'Accuracy:', accuracy.item())
#    print(accuracy)
    print(correct / total)

In [10]:
def test(model, test_loader, criterion):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).sum().item()
        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)
    return accuracy, test_loss


total_batch = len(data_loader)
for epoch in range(epochs):
    running_loss = 0
    
    for X, Y in data_loader:
        model.train()
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        
        running_loss = loss / total_batch
    
    accuracy, loss2 = test(model, test_loader, criterion) 
    
    print(running_loss, accuracy, loss2)


tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>) 95.73 -6.34413158703912
tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>) 96.49 -6.302240842116997
tensor(8.1623e-05, device='cuda:1', grad_fn=<DivBackward0>) 97.2 -6.887959402368217
tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>) 96.67 -6.935449502489716
tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>) 97.16 -7.0472011812312525
tensor(7.8251e-05, device='cuda:1', grad_fn=<DivBackward0>) 96.81 -7.593076448619366
tensor(7.6064e-05, device='cuda:1', grad_fn=<DivBackward0>) 97.5 -7.639289631887898
tensor(9.2929e-05, device='cuda:1', grad_fn=<DivBackward0>) 97.37 -8.044865715610236
tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>) 97.33 -8.106754867497832
tensor(4.1772e-05, device='cuda:1', grad_fn=<DivBackward0>) 97.59 -8.37580814203769
