In [17]:
import torch
from torch import nn
import pickle
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import random_split, Dataset, TensorDataset, DataLoader

In [18]:
with open('handwriting.dat', 'rb') as file:
    dataDict = pickle.load(file)

In [136]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LEARNING_RATE = 0.01
REG_STRENGTH = 1e-5
EPOCHS = 5
BATCH_SIZE = 64
print(f"Using {DEVICE} device")

Using cuda device


In [5]:
class HandwritingDataset(Dataset):
    def __init__(self, x, y, z):
        self.x = x.clone().detach()
        self.y = y.clone().detach()
        self.z = z.clone().detach()
        
    def __getitem__(self, index):
        return self.x[index], self.y[index], self.z[index]
    
    def __len__(self):
        return len(self.x)

In [202]:
# x = torch.tensor(dataDict['inputs'][0:5000], dtype=torch.float)
# y = torch.tensor(dataDict['charLabels'][0:5000], dtype=torch.float)
# z = torch.tensor(dataDict['charStarts'][0:5000], dtype=torch.float)
x = torch.tensor(dataDict['inputs'], dtype=torch.float)
y = torch.tensor(dataDict['charLabels'], dtype=torch.float)
z = torch.tensor(dataDict['charStarts'], dtype=torch.float)
data_full = HandwritingDataset(x, y, z)
train_dataset, test_dataset = random_split(data_full, [0.9, 0.1])
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)

In [104]:
class HandwritingGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_chars, num_layers):
        super(HandwritingGRU, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc_y = nn.Linear(hidden_size, num_chars)
        self.fc_z = nn.Linear(hidden_size, 1)
        self.n_layers = num_layers
        self.hidden_dim = hidden_size
        
    def forward(self, x, h):
        out, h = self.gru(x, h)
        y_logits = self.fc_y(out)
        z_logits = self.fc_z(out)
        yhat = torch.softmax(y_logits, dim=0)
        zhat = torch.tanh(z_logits)
        return yhat, zhat, h
    
    def init_hidden(self, batch_size):
        # weight = next(self.parameters()).data
        # hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(DEVICE)
        # hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        hidden = torch.zeros(self.n_layers, self.hidden_dim).to(DEVICE)
        return hidden
    
model = HandwritingGRU(input_size=192, hidden_size=512, num_chars=31, num_layers=2)
model.to(DEVICE)
print(model)

HandwritingGRU(
  (gru): GRU(192, 512, num_layers=2, batch_first=True)
  (fc_y): Linear(in_features=512, out_features=31, bias=True)
  (fc_z): Linear(in_features=512, out_features=1, bias=True)
)


In [131]:
def train_loop(dataloader, model, loss_fn1, loss_fn2, optimizer):
    model.train()
    size = len(dataloader.dataset)
    losses = []
    h = model.init_hidden(BATCH_SIZE)
    # h = h.data
    for batch, (X, y, z) in enumerate(dataloader):
        # Compute prediction and loss
        X, y, z = X.to(DEVICE), y.to(DEVICE), z.to(DEVICE)
        yhat, zhat, h = model(X, h)
        reg_loss = REG_STRENGTH*torch.norm(model.gru.weight_hh_l0) ** 2
        loss = reg_loss + loss_fn1(yhat, y) + loss_fn2(zhat, z)
        losses.append(loss)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        h = h.detach()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return losses

In [198]:
def test_loop(dataloader, model, loss_fn1, loss_fn2):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    h = model.init_hidden(BATCH_SIZE)
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y, z in dataloader:
            X, y, z = X.to(DEVICE), y.to(DEVICE), z.to(DEVICE)
            yhat, zhat, h = model(X, h)
            # print(yhat.argmax(0).shape)
            # print(y)
            # print(yhat.argmax(0)==y)
            reg_loss = REG_STRENGTH*torch.norm(model.gru.weight_hh_l0) ** 2
            loss = reg_loss + loss_fn1(yhat, y) + loss_fn2(zhat, z)
            test_loss += loss.item()
            correct += (yhat.argmax(1) == y.argmax(1)).type(torch.float).sum().item()
            # print(correct)
            # print(size)
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [116]:
loss_fn1 = nn.CrossEntropyLoss()
loss_fn2 = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [203]:
for t in range(EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn1, loss_fn2, optimizer)
    plt.plot(losses)
    test_loop(test_dataloader, model, loss_fn1, loss_fn2)
print("Done!")

Epoch 1
-------------------------------
loss: 4.668413  [   64/7811001]
loss: 4.715262  [ 6464/7811001]
loss: 4.762125  [12864/7811001]
loss: 4.668368  [19264/7811001]
loss: 4.480865  [25664/7811001]
loss: 4.574614  [32064/7811001]
loss: 4.668363  [38464/7811001]
loss: 4.762113  [44864/7811001]
loss: 4.715237  [51264/7811001]
loss: 4.762113  [57664/7811001]
loss: 4.808987  [64064/7811001]
loss: 4.621487  [70464/7811001]
loss: 4.668362  [76864/7811001]
loss: 4.574612  [83264/7811001]
loss: 4.715237  [89664/7811001]
loss: 4.527737  [96064/7811001]
loss: 4.902737  [102464/7811001]
loss: 4.902737  [108864/7811001]
loss: 4.762112  [115264/7811001]
loss: 4.808987  [121664/7811001]
loss: 4.715237  [128064/7811001]
loss: 4.808988  [134464/7811001]
loss: 4.902737  [140864/7811001]
loss: 4.621487  [147264/7811001]
loss: 4.527738  [153664/7811001]
loss: 4.574612  [160064/7811001]
loss: 4.808987  [166464/7811001]
loss: 4.668362  [172864/7811001]
loss: 4.621487  [179264/7811001]
loss: 4.762112  [18

KeyboardInterrupt: 

In [210]:
x = torch.tensor(dataDict['inputs'][15001:20000], dtype=torch.float)
y = torch.tensor(dataDict['charLabels'][15001:20000], dtype=torch.float)
z = torch.tensor(dataDict['charStarts'][15001:20000], dtype=torch.float)
print(x.shape)

data = HandwritingDataset(x, y, z)
dataloader = DataLoader(data, shuffle=True, batch_size=1, drop_last=True)
model.eval()
with torch.no_grad():
        for X, y, z in dataloader:
            X, y, z = X.to(DEVICE), y.to(DEVICE), z.to(DEVICE)
            yhat, zhat, h = model(X, h)
            # print(yhat)
            # print(y)
            # print(yhat==y)
            # print(yhat.shape)
            # print(y.argmax(1))
            # print(yhat.argmax(1))
            # print(zhat)
            # print(z)
            print(f'yhat = {yhat}')
            print(f'y = {y}')
            print(loss_fn1(yhat, y))
            print(loss_fn2(zhat, z))

torch.Size([4999, 192])
yhat = tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0')
y = tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]], device='cuda:0')
tensor(3.4340, device='cuda:0')
tensor(1., device='cuda:0')
yhat = tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0')
y = tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]], device='cuda:0')
tensor(3.4340, device='cuda:0')
tensor(1., device='cuda:0')
yhat = tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], device='cuda:0')
y = tensor([[0., 0., 0