In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [20]:
class RNN(nn.Module):
    def __init__(self, in_size, hidden_size, n_layers, num_classes=10):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.rnn = nn.RNN(in_size, hidden_size, n_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size*seq_len, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x

In [21]:
# sets device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ConvNet(1, 10).to(device)

In [22]:
# load data
train_data = datasets.MNIST(
    root='./datasets',
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

test_data = datasets.MNIST(
    root='./datasets',
    train=False,
    transform=transforms.ToTensor(),
    download=True
)

test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

In [23]:
for x, y in train_loader:
    print(x.shape)
    print(y.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [24]:
# sets hyperparameters
in_size = 28
seq_len = 28
n_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
num_epochs = 5

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device)
        targets = targets.to(device)

        scores = model(data)
        loss = criterion(scores, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print('Batch {}| Loss {}'.format(batch_idx, loss))

Batch 0| Loss 2.286425828933716
Batch 1| Loss 2.3130524158477783
Batch 2| Loss 2.295990467071533
Batch 3| Loss 2.263593912124634
Batch 4| Loss 2.2751824855804443
Batch 5| Loss 2.3191330432891846
Batch 6| Loss 2.354827642440796
Batch 7| Loss 2.2913033962249756
Batch 8| Loss 2.2685651779174805
Batch 9| Loss 2.2454631328582764
Batch 10| Loss 2.248403310775757
Batch 11| Loss 2.2550175189971924
Batch 12| Loss 2.2539515495300293
Batch 13| Loss 2.185675859451294
Batch 14| Loss 2.2310078144073486
Batch 15| Loss 2.186575174331665
Batch 16| Loss 2.17037034034729
Batch 17| Loss 2.184222936630249
Batch 18| Loss 2.2027101516723633
Batch 19| Loss 2.140342950820923
Batch 20| Loss 2.1501286029815674
Batch 21| Loss 2.1307146549224854
Batch 22| Loss 2.106173276901245
Batch 23| Loss 2.1119422912597656
Batch 24| Loss 2.078310489654541
Batch 25| Loss 2.0283548831939697
Batch 26| Loss 2.0092225074768066
Batch 27| Loss 2.029815673828125
Batch 28| Loss 1.9810935258865356
Batch 29| Loss 1.9243189096450806
Batc

In [27]:
def check_accuracy(loader, model):
    correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            scores = model(x)
            _, predictions = scores.max(1)
            correct += (predictions == y).sum()
            total += predictions.size(0)
        model.train()
        print('Accuracy: ', correct/total)

In [28]:
check_accuracy(test_loader, model)

Accuracy:  tensor(0.9860)
