In [40]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sklearn.metrics as metrics

In [7]:
BATCH_SIZE = 32

#transforms
transform = transforms.Compose([transforms.ToTensor()])

## download and load training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset,batch_size=BATCH_SIZE,shuffle=True,num_workers=2)

## download and load testing dataset
testset = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size = BATCH_SIZE, shuffle = False, num_workers=2)

In [19]:
class MyModel(nn.Module):
    # initialization to define all the parameters in the model to use and train
    def __init__(self):
        super(MyModel,self).__init__()
        
        #28x28x1 => 26x26x32
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3)
        self.d1 = nn.Linear(26*26*32,128)
        self.d2 = nn.Linear(128,10)
    # forward function helps to construct the computation graph from input to the output
    def forward(self,x):
        #32x1x28x28 => 32x32x26x26
        x = self.conv1(x)
        x = F.relu(x)
        
        # flatten => 32 x (32*26*26)
        x = x.flatten(start_dim = 1)
        
        # 32 x (32*26*26) => 32x128
        x = self.d1(x)
        x = F.relu(x)
        
        # logits => 32x10
        logits = self.d2(x)
        out = F.softmax(logits, dim = 1)
        return out
        

In [20]:
learning_rate = 0.001
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Training loop
After defining the network parameters, model, and criterion, we can start writing the code for the training loop

In [24]:
for epoch in range(num_epochs):
    training_running_loss = 0.0
    training_acc = 0.0
    
    # training step
    for i, (images, labels) in enumerate(trainloader):
        images = images.to(device)
        labels = labels.to(device)
        
        ## forward + backprop + loss
        logits = model(images)
        loss = criterion(logits,labels)
        optimizer.zero_grad()         # set gradients to zero for all the variables. If we don't do it, the PyTorch will accumulate the gradients.
        loss.backward()               # This line of code does the backpropagation and compute all the gradients.
        
        ## update model params
        optimizer.step()              # Performing one step optimization on the defined parameters of the model
        
        training_running_loss += loss.detach().item() # accumulating the loss
        training_acc += ((torch.argmax(logits,1)).flatten() == labels).type(torch.float).mean().item()
    
    print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \
          %(epoch, training_running_loss/i, training_acc/i))

Epoch: 0 | Loss: 1.4904 | Train Accuracy: 0.97
Epoch: 1 | Loss: 1.4815 | Train Accuracy: 0.98
Epoch: 2 | Loss: 1.4775 | Train Accuracy: 0.99
Epoch: 3 | Loss: 1.4737 | Train Accuracy: 0.99
Epoch: 4 | Loss: 1.4721 | Train Accuracy: 0.99


In [32]:
# Test
test_acc = 0.0
p = 0
for i , (images, labels) in enumerate(testloader, 0):
    images = images.to(device)
    labels = labels.to(device)
    output = model(images)
    test_acc += (torch.argmax(output, 1).flatten() == labels).type(torch.float).mean().item()
    preds = torch.argmax(output,1).flatten().cpu().numpy()
    l = labels.cpu().numpy()
    p += metrics.precision_score(preds,l,average = 'macro')
    
print('precision: %.2f'%(p/i))
print('Test Accuracy: %.2f'%(test_acc/i))

  _warn_prf(average, modifier, msg_start, len(result))


precision: 0.98
Test Accuracy: 0.98
