In [1]:
from fastai.text.all import *

In [2]:
path = untar_data(URLs.HUMAN_NUMBERS)

In [3]:
Path.BASE_PATH = path

In [4]:
path.ls()

(#2) [Path('valid.txt'),Path('train.txt')]

In [5]:
lines = L()
with open(path/'train.txt') as f: lines += L(*f.readlines())
with open(path/'valid.txt') as f: lines += L(*f.readlines())
lines

(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n','eleven \n','twelve \n','thirteen \n','fourteen \n','fifteen \n','sixteen \n','seventeen \n','eighteen \n','nineteen \n','twenty \n'...]

# Preprocessing

## Tokenisation

In [6]:
text = ' . '.join(l.strip() for l in lines)

In [8]:
text[:100]

'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'

In [8]:
tokens = text.split(' ')
tokens[:10]

['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']

## Numericalisation

In [9]:
vocab = L(*tokens).unique()
vocab

(#30) ['one','.','two','three','four','five','six','seven','eight','nine','ten','eleven','twelve','thirteen','fourteen','fifteen','sixteen','seventeen','eighteen','nineteen'...]

In [11]:
word2idx = {w:i for i, w in enumerate(vocab)}
nums = L(word2idx[w] for w in tokens)
nums

(#63095) [0,1,2,1,3,1,4,1,5,1,6,1,7,1,8,1,9,1,10,1...]

## Self-supervising data set

In [12]:
sl = 16
seqs = L((tensor(nums[i:i+sl]), tensor(nums[i+1: i+1+sl])) for i in range(0, len(nums)-sl-1, sl))

In [14]:
seqs[0]

(tensor([0, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1]),
 tensor([1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9]))

In [13]:
BATCHSIZE = 64

In [14]:
def group_chunks(ds, bs):
    m = len(ds) // bs
    new_ds = L()
    for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
    return new_ds

In [15]:
cut = int(len(seqs)*0.8)
dls = DataLoaders.from_dsets(
    group_chunks(seqs[:cut], BATCHSIZE), 
    group_chunks(seqs[cut:], BATCHSIZE), 
    bs=BATCHSIZE, drop_last=True, shuffle=False)

In [18]:
len(seqs[:cut]) // BATCHSIZE

49

In [19]:
seqs[49]

(tensor([ 2, 28, 11,  1,  2, 28, 12,  1,  2, 28, 13,  1,  2, 28, 14,  1]),
 tensor([28, 11,  1,  2, 28, 12,  1,  2, 28, 13,  1,  2, 28, 14,  1,  2]))

## Vanillla RNN

In [41]:
class RNN(Module):
    def __init__(self, vocab_sz, n_hidden, sl):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.h_h = nn.Linear(n_hidden, n_hidden)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.sl = sl
        self.h = 0

    def forward(self, x):
        outs = []
        for i in range(self.sl):
            self.h = self.h + self.i_h(x[:,i])
            self.h = F.relu(self.h_h(self.h))
            outs.append(self.h_o(self.h))
        self.h = self.h.detach()
        return torch.stack(outs, dim=1)

    def reset(self):
        self.h = 0
        

In [35]:
def loss_func(inp, targ):
    return F.cross_entropy(inp.view(-1, len(vocab)), targ.view(-1))

In [44]:
simple_rnn = RNN(len(vocab), 64, 16)

In [45]:
learn = Learner(dls, simple_rnn, loss_func = loss_func, metrics=accuracy, cbs=ModelResetter)
learn.fit_one_cycle(15, 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,3.175631,2.992774,0.252523,00:01
1,2.304283,1.980272,0.470215,00:01
2,1.744542,1.825861,0.46224,00:01
3,1.45849,1.819294,0.515951,00:01
4,1.287225,1.792837,0.526042,00:01
5,1.158036,1.798218,0.576497,00:01
6,1.053031,1.829944,0.563151,00:01
7,0.957125,1.841787,0.602458,00:01
8,0.870648,1.840455,0.604329,00:01
9,0.808551,2.00999,0.61849,00:01


In [27]:
class LSTM(Module):
    def __init__(self, vocab_size, n_layers, n_hidden, batch_size):
        self.ih = nn.Embedding(vocab_size, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True) 
        self.ho = nn.Linear(n_hidden, vocab_size)
        self.h = [torch.zeros(n_layers, batch_size, n_hidden) for _ in range(2)]

    def forward(self, x):
        res, h = self.rnn(self.ih(x), self.h)
        self.h = [h_.detach() for h_ in h]
        return self.ho(res)

    def reset(self):
        for h in self.h: h.zero_()
        

In [28]:
lstm = LSTM(len(vocab), 1, 64, BATCHSIZE)
lstm_learn = Learner(dls, lstm, loss_func=CrossEntropyLossFlat(),
           metrics=accuracy, cbs=ModelResetter)
lstm_learn.fit_one_cycle(15, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.981228,2.659369,0.311849,00:02
1,2.007059,1.904348,0.321696,00:01
2,1.552929,1.738283,0.416748,00:01
3,1.253639,1.701052,0.54777,00:01
4,0.953825,1.770578,0.628418,00:01
5,0.664577,1.567581,0.690918,00:01
6,0.427357,1.527407,0.751709,00:01
7,0.261472,1.688289,0.776367,00:01
8,0.159606,1.5747,0.767741,00:01
9,0.103017,1.575075,0.776855,00:01


In [39]:
class AWDLSTM(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, batch_size, drop_out_p):
        self.ih = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(drop_out_p)
        self.ho = nn.Linear(n_hidden, vocab_sz)
        self.ho.weight = self.ih.weight #weight tying
        self.h = [torch.zeros(n_layers, batch_size, n_hidden) for _ in range(2)]

    def forward(self, x):
        raw, h = self.rnn(self.ih(x), self.h)
        out = self.drop(raw)
        self.h = [h_.detach() for h_ in h]
        return self.ho(out), raw, out

    def reset(self):
        for h in self.h: h.zero_()

In [40]:
awd_learn = Learner(dls, AWDLSTM(len(vocab), 64, 2, BATCHSIZE, 0.5),
                loss_func=CrossEntropyLossFlat(), metrics=accuracy,
                cbs=[ModelResetter, RNNCallback, RNNRegularizer(alpha=2, beta=1)])

In [41]:
awd_learn.fit_one_cycle(15, 1e-2, wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,2.838563,2.282286,0.437174,00:01
1,1.913846,1.75516,0.536784,00:01
2,1.147236,0.954735,0.735026,00:01
3,0.593927,0.565308,0.84375,00:01
4,0.309942,0.541855,0.842855,00:01
5,0.181693,0.45656,0.856771,00:01
6,0.117939,0.420669,0.869548,00:01
7,0.087703,0.391323,0.881104,00:01
8,0.070003,0.375363,0.87736,00:01
9,0.059786,0.349775,0.893636,00:01


In [37]:
awd_learn = Learner(dls, LMModel7(len(vocab), 64, 2, 0.5),
                loss_func=CrossEntropyLossFlat(), metrics=accuracy,
                cbs=[ModelResetter, RNNCallback, RNNRegularizer(alpha=2, beta=1)])