In [56]:
import torch
import torchvision
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset

In [57]:
# Hyperparameters
input_size = 28
hidden_size = 256
num_layers = 1
num_classes = 10
sequence_length = 28
learning_rate = 0.01
batch_size = 64
num_epochs = 5

train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)

test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [58]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: dataset/
    Split: Train
    StandardTransform
Transform: ToTensor()

In [59]:
images,labels=next(iter(train_loader))

In [60]:
images.shape   ## batch of 64 images, 1 channel, 28X28
print(images.squeeze(1).shape)

torch.Size([64, 28, 28])


In [61]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size, num_layers, num_classes):
        super(RNN,self).__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        
        self.rnn=nn.GRU(input_size,hidden_size,num_layers,batch_first=True)   ##Instead of GRU, simple RNN can be used as below
        #self.rnn=nn.RNN(input_size,hidden_size,num_layers,batch_first=True)
        self.fc=nn.Linear(sequence_length*hidden_size, num_classes)    ## The output of RNN is suposed to be this, check 
        
    def forward(self,x):
        #h0=torch.zeros(batch_size,num_layers,hidden_size)  ## This is supposed to be the dimension of hidden, check docs
        ##We can omit the above line since pytorch implicitly can define the initial hidden state
        out, _=self.rnn(x)
        out= out.reshape(out.size(0),-1)   ## Since the output of RNN is : Batch X seq_length X hidden_size
        out=self.fc(out)
        return out
    
                
        
        

In [62]:

model = RNN(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
    totalloss=0
    for i,(data,targets) in enumerate(train_loader):
        data=data.squeeze(1)   ##Get rid of channels dimension since our RNN does not expect it
        
        scores=model(data)
        loss = criterion(scores, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        totalloss+=loss.item()
        if(i%100==99):
            print(epoch+1, i+1, totalloss/100)   ##Print loss every 100 mini batches
            totalloss=0
            
        

        

1 100 0.5416555697470904
1 200 0.30151759488508106
1 300 0.24489514326676726
1 400 0.22135506493970752
1 500 0.2646946180798113
1 600 0.21628798396792262
1 700 0.2142199614678975
1 800 0.2212664064054843
1 900 0.2815736058715265
2 100 0.18577722580754197
2 200 0.17462258572340944
2 300 0.15955362594540928
2 400 0.21128770615527173
2 500 0.18967950030229985
2 600 0.1538682202711425
2 700 0.559040157776326
2 800 0.9157516873627901
2 900 0.6677597797103226
3 100 0.345773831109982
3 200 0.34393676704110476
3 300 0.297503183817189
3 400 0.2457331905490719
3 500 0.23034094353410184
3 600 0.24112510446459054
3 700 0.2279403104887274
3 800 0.22646140249766178
3 900 0.22951832945095701
4 100 0.16941715283588565
4 200 0.14392549726556353
4 300 0.13445125032935265
4 400 0.19185534232083457
4 500 0.1860054040304658
4 600 0.23072998011635718
4 700 0.24317138892874937
4 800 0.1994385685805173
4 900 0.21358676996354917
5 100 0.13295625841387845
5 200 0.15030163708644978
5 300 0.15871925972690407
5 40

Lets see how our model behaves on the test data

In [63]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images=images.squeeze(1)
        outputs = model(images)
        correct+=outputs.argmax(dim=1).eq(labels).sum().item()
    total=len(test_dataset)
    print('Accuracy: ',correct*100/total )

Accuracy:  97.28


In [64]:
image,label=next(iter(test_loader))
image[0].shape
img=image[0]
### Lets test the output for 1 image, as you see below img[0] is a label of 3, our model should predict 3 also

In [65]:
print(label[0])

tensor(4)


In [66]:
pred=model(img.squeeze(1))
print(pred)

tensor([[-28.7556, -16.9997,  -7.2758,  -1.7043,  26.7961,  -1.8823, -18.4477,
          -5.0595,  -2.9458,  10.8903]], grad_fn=<AddmmBackward>)


In [67]:
pred.argmax(1)

tensor([4])

In [None]:
### ABove argmax show that index 4 is highest, means the label it predicted is also 4. Which is what we expected