In [9]:
import torch
from torch import nn 
import numpy as np
from torchvision import datasets
from torchvision import transforms as T
from torch.utils.data import DataLoader

In [4]:
SEED = 42

np.random.seed(SEED)

In [1]:
# mnist
input_size = 28
seq_len = 28

n_layers = 2
hidden_size = 256
n_classes = 10
lr = 1e-2
batch_size = 64
n_epochs = 2

In [70]:
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers, n_classes):
        super(BiRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_classes = n_classes
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, n_classes)

        # turn weight initialization ON
        self.init_weights()
        
    def forward(self, x, device='cpu'):
        h0 = torch.zeros(self.n_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.n_layers * 2, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.fc(out[:, -1, :])
    
        return out
    
    def init_weights(self):
        for module in self.modules():
            # Хе инициализация для сверточного слоя
            if isinstance(module, nn.Conv2d):
                nn.init.kaiming_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)

In [23]:
train_ds = datasets.MNIST(root='dataset/', train=True, transform=T.ToTensor(), download=True)
test_ds = datasets.MNIST(root='dataset/', train=False, transform=T.ToTensor(), download=True)

In [24]:
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
test_loader = DataLoader(test_ds, batch_size, shuffle=False)

In [25]:
model = BiRNN(input_size, hidden_size, n_layers, n_classes)

In [26]:
crit = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(model.parameters(), lr)

In [27]:
def train(train_loader, n_epochs, crit, optimizer):
    for epoch in range(n_epochs):
        for batch_idx, (data, targets) in enumerate(train_loader):
            # Get data to cuda if possible
            data = data.to(device='cpu').squeeze(1)
            targets = targets.to(device='cpu')

            # forward
            scores = model(data)
            loss = crit(scores, targets)

            # backward
            optimizer.zero_grad()
            loss.backward()

            # gradient descent or adam step
            optimizer.step()


In [30]:
def check_accuracy(loader, model, device='cpu'):
    if loader.dataset.train:
        print("Checking accuracy on training data")
    else:
        print("Checking accuracy on test data")

    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device).squeeze(1)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy  \
              {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()

In [50]:
%%time
check_accuracy(train_loader, model)

Checking accuracy on training data
Got 58616 / 60000 with accuracy                97.69
CPU times: user 4min 22s, sys: 19.8 s, total: 4min 42s
Wall time: 2min 31s


In [31]:
%%time
check_accuracy(test_loader, model)

Checking accuracy on test data
Got 9722 / 10000 with accuracy                97.22


In [67]:
t = torch.zeros((5, 4, 3, 2))

In [68]:
t.shape

torch.Size([5, 4, 3, 2])

In [69]:
t.squeeze(1).shape

torch.Size([5, 4, 3, 2])