In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision

In [2]:
device = 'cpu'

input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
lr = 1e-3
batch_size = 16
epochs = 1

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True)
        #nn.GRU(input_size, hidden_size, num_layers, batch_first = True)
        #nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
        # N x time_seq x features
        self.fc = nn.Linear(hidden_size*sequence_length, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

        # LSTM --> c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        # Forward
        out, _ = self.rnn(x, h0)
        #LSTM --> out, _ = self.gru(x, (h0, c0))
        
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        #LSTM --> out = self.fc(out[:, -1, :])
        return out
        

In [3]:
import pickle, gzip

with gzip.open('C:/Users/OWNER/Desktop/mnist.pkl.gz', 'rb') as f:
    ((x_train, y_train), (x_test, y_test), _) = pickle.load(f, encoding="latin-1")
    
batch_size = 16

x_train, y_train, x_test, y_test = map(torch.tensor,
                                       (x_train, y_train, x_test, y_test))

train_ds = TensorDataset(x_train, y_train)
test_ds = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_ds, batch_size=batch_size)
test_loader = DataLoader(test_ds, batch_size=batch_size)

In [4]:
model = RNN(input_size=input_size, hidden_size = hidden_size, num_layers= num_layers, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr= lr)

In [5]:
for epoch in range(epochs):
    losses = []
    for data, targets in train_loader:
        data = data.reshape(-1, 28, 28)
        preds = model(data)
        
        loss = criterion(preds, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [6]:
def get_acc(loader, model):
    num_correct = 0
    num_pred = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.reshape(-1, 28, 28)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_pred += len(preds)
            
        print(f"{num_correct}/{num_pred} --> Acc : {(num_correct/num_pred)*100:.4f}%")
    model.train()   

In [7]:
get_acc(train_loader, model)
get_acc(test_loader, model)

42620/50000 --> Acc : 85.2400%
8697/10000 --> Acc : 86.9700%
