In [32]:
import torch as t
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [40]:
input_size = 784 #(28x28)
num_classes = 10
learning_rate = 0.01
num_epochs = 5
batch_size = 64

In [34]:
train_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=True,
                                           download = True,
                                           transform=transforms.ToTensor()
                                          )

In [35]:
test_dataset = torchvision.datasets.MNIST(root='../../data', 
                                           train=False,
                                           transform=transforms.ToTensor()
                                          )

In [36]:
train_loader = t.utils.data.DataLoader(dataset=train_dataset,
                                       batch_size=batch_size,
                                      shuffle=True)
test_loader = t.utils.data.DataLoader(dataset=test_dataset,
                                       batch_size=batch_size,
                                      shuffle=True)

In [37]:
img, label = train_dataset[0]
img.size()

torch.Size([1, 28, 28])

In [43]:
# Logistic regression Model
LR = nn.Linear(input_size, num_classes)

# Build loss function and optimizer.
# nn.CrossEntropyLoss() computes softmax internally
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(LR.parameters(), lr=learning_rate)

In [46]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images,labels) in enumerate(train_loader):
        # reshape images to batch size, (64,1,28,28) -> (64,784)
        images = images.reshape(-1, 28*28)
        
        # forwardpass
        pred = LR(images)
        
        # Compute loss and optimize
        loss = criterion(pred, labels)
        optimizer.zero_grad() # Set grads= 0 for a new pass to avoid accumulation of gradients
        
        # backwardpass
        loss.backward() # Compute gradients
        optimizer.step() # Updates parameters
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [100/938], Loss: 0.3240
Epoch [1/5], Step [200/938], Loss: 0.2016
Epoch [1/5], Step [300/938], Loss: 0.3981
Epoch [1/5], Step [400/938], Loss: 0.5265
Epoch [1/5], Step [500/938], Loss: 0.2564
Epoch [1/5], Step [600/938], Loss: 0.4470
Epoch [1/5], Step [700/938], Loss: 0.1353
Epoch [1/5], Step [800/938], Loss: 0.3691
Epoch [1/5], Step [900/938], Loss: 0.3272
Epoch [2/5], Step [100/938], Loss: 0.1925
Epoch [2/5], Step [200/938], Loss: 0.2320
Epoch [2/5], Step [300/938], Loss: 0.2487
Epoch [2/5], Step [400/938], Loss: 0.3751
Epoch [2/5], Step [500/938], Loss: 0.2439
Epoch [2/5], Step [600/938], Loss: 0.1735
Epoch [2/5], Step [700/938], Loss: 0.1891
Epoch [2/5], Step [800/938], Loss: 0.3309
Epoch [2/5], Step [900/938], Loss: 0.2251
Epoch [3/5], Step [100/938], Loss: 0.2195
Epoch [3/5], Step [200/938], Loss: 0.3936
Epoch [3/5], Step [300/938], Loss: 0.1241
Epoch [3/5], Step [400/938], Loss: 0.2852
Epoch [3/5], Step [500/938], Loss: 0.3356
Epoch [3/5], Step [600/938], Loss:

In [50]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with t.no_grad():
    total = correct = 0
    for i, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        pred = LR(images)
        _, predicted = t.max(pred.data, 1)
        total += labels.size(0)
        correct += (predicted==labels).sum()
    
print('Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))


Accuracy of the model on the 10000 test images: 91 %


In [52]:
# save model
t.save(LR.state_dict(), "LR_MNIST.ckpt")