In [1]:
import datetime
import sys

In [2]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# load dataset
train_dataset = dsets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=False)
test_dataset = dsets.MNIST(root='./data',
                         train=False,
                         transform=transforms.ToTensor())

# dataset iterable
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

# model
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.linear(x)

input_dim = 28*28
output_dim = 10

model = LogisticRegressionModel(input_dim, output_dim)

#GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
model.to(device)

# loss
criterion = nn.CrossEntropyLoss()

#optimizer
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# train
iter = 0
start_time = datetime.datetime.now()

for e in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # GPU
        images = images.view(-1, 28*28).requires_grad_().to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss =criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                images = images.view(-1, 28*28).to(device)
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                #GPU
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
                                
            accuracy = 100 * correct/total
            
            print('Iterations: {}, Loss: {}. Accuracy: {}'.format(iter, loss.data.item(), accuracy))
            
sys.stdout.write('Time '+ str(datetime.datetime.now() - start_time))

Iterations: 500, Loss: 0.3984835743904114. Accuracy: 90
Iterations: 1000, Loss: 0.43380919098854065. Accuracy: 91
Iterations: 1500, Loss: 0.25238001346588135. Accuracy: 91
Iterations: 2000, Loss: 0.41780635714530945. Accuracy: 92
Iterations: 2500, Loss: 0.23664851486682892. Accuracy: 92
Iterations: 3000, Loss: 0.14310169219970703. Accuracy: 92
Time 0:00:31.341716