In [6]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [7]:
from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

#### Downloading collected works of Nietzsche to use as our data.

In [8]:
PATH='data/nietzsche/'

In [9]:
get_data("https://s3.amazonaws.com/text-datasets/nietzsche.txt", f'{PATH}nietzsche.txt')
text = open(f'{PATH}nietzsche.txt').read()

In [10]:
len(text)

600893

In [11]:
text[:600]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself to be won; and\nat present every kind of dogma stands with sad and discouraged mien--IF,\nindeed, it stands at all! For there are scoffers who maintain that it\nhas fallen, that all dogma lies on the gro'

In [12]:
chars = sorted(list(set(text)))

In [13]:
vocab_size = len(chars)+1

In [14]:
vocab_size

85

In [15]:
#ADDING PADDING TO TEXTS
chars.insert(0, "\0")

In [16]:
char_indices = {c: i for i,c in enumerate(chars)}

In [17]:
list(char_indices)[:10]

['\x00', '\n', ' ', '!', '"', "'", '(', ')', ',', '-']

In [18]:
indices_char = {i: c for i, c in enumerate(chars)}

In [19]:
list(indices_char)[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [20]:
# idx will be the data we use from now on - it simply converts all the characters to their index (based on the mapping above)
idx = [char_indices[c] for c in text]

In [21]:
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [22]:
''.join(indices_char[i] for i in idx[:84])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspec'

### Model with 3 character

In [23]:
cs = 3
c1_dat = [idx[i] for i in range(0, len(idx)-cs, cs)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-cs, cs)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-cs, cs)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-cs, cs)]

inputs

In [24]:
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)

Output

In [25]:
y = np.stack(c4_dat)

In [26]:
x1[:4],x2[:4],x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [27]:
y[:4]

array([30, 29,  1, 40])

In [28]:
x1.shape, y.shape

((200297,), (200297,))

### Create and train model

In [29]:
#Size for our hidden state
n_hidden = 256

In [30]:
n_fac = 42

In [31]:
class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = V(torch.zeros(in1.size()).cpu())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [32]:
md = ColumnarModelData.from_arrays('.',[-1], np.stack([x1,x2,x3], axis=1), y, bs=512)

In [33]:
m = Char3Model(vocab_size, n_fac).cpu()

In [34]:
it = iter(md.trn_dl)

In [35]:
*xs, yt = next(it)
t = m(*V(xs))

In [36]:
t

Variable containing:
-4.6156 -4.4204 -4.4193  ...  -4.3643 -4.5374 -4.4032
-4.4457 -4.3505 -4.2791  ...  -4.4429 -4.3434 -4.5960
-4.5597 -4.3953 -4.6268  ...  -4.4736 -4.4592 -4.6106
          ...             ⋱             ...          
-4.1328 -4.1761 -4.2057  ...  -4.7636 -4.5354 -4.3087
-4.4356 -4.4133 -4.4649  ...  -4.5162 -4.4508 -4.2189
-4.4919 -4.4890 -4.3087  ...  -4.5102 -4.4784 -4.3328
[torch.FloatTensor of size 512x85]

In [37]:
opt = optim.Adam(m.parameters(), 1e-2)

In [38]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.150364   1.078322  



[array([1.07832])]

### Testing Model

In [39]:
def get_next(inpt):
    idxs = T(np.array([char_indices[c] for c in inpt]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [40]:
get_next('y. ')

'H'

In [41]:
get_next('res')

'e'

In [42]:
get_next(' ar')

'e'

In [43]:
get_next('and')

' '

### Doing it in RNN !!

#### Creating inputs

In [44]:
#Character size: 8
cs = 8

For each of 0 through 7, create a list of every 8th character with that starting point. These will be the 8 inputs to our model.

In [45]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]

In [46]:
c_in_dat

[[40, 42, 29, 30, 25, 27, 29, 1],
 [42, 29, 30, 25, 27, 29, 1, 1],
 [29, 30, 25, 27, 29, 1, 1, 1],
 [30, 25, 27, 29, 1, 1, 1, 43],
 [25, 27, 29, 1, 1, 1, 43, 45],
 [27, 29, 1, 1, 1, 43, 45, 40],
 [29, 1, 1, 1, 43, 45, 40, 40],
 [1, 1, 1, 43, 45, 40, 40, 39],
 [1, 1, 43, 45, 40, 40, 39, 43],
 [1, 43, 45, 40, 40, 39, 43, 33],
 [43, 45, 40, 40, 39, 43, 33, 38],
 [45, 40, 40, 39, 43, 33, 38, 31],
 [40, 40, 39, 43, 33, 38, 31, 2],
 [40, 39, 43, 33, 38, 31, 2, 73],
 [39, 43, 33, 38, 31, 2, 73, 61],
 [43, 33, 38, 31, 2, 73, 61, 54],
 [33, 38, 31, 2, 73, 61, 54, 73],
 [38, 31, 2, 73, 61, 54, 73, 2],
 [31, 2, 73, 61, 54, 73, 2, 44],
 [2, 73, 61, 54, 73, 2, 44, 71],
 [73, 61, 54, 73, 2, 44, 71, 74],
 [61, 54, 73, 2, 44, 71, 74, 73],
 [54, 73, 2, 44, 71, 74, 73, 61],
 [73, 2, 44, 71, 74, 73, 61, 2],
 [2, 44, 71, 74, 73, 61, 2, 62],
 [44, 71, 74, 73, 61, 2, 62, 72],
 [71, 74, 73, 61, 2, 62, 72, 2],
 [74, 73, 61, 2, 62, 72, 2, 54],
 [73, 61, 2, 62, 72, 2, 54, 2],
 [61, 2, 62, 72, 2, 54, 2, 76],
 [2

In [47]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [48]:
c_out_dat

[1,
 1,
 43,
 45,
 40,
 40,
 39,
 43,
 33,
 38,
 31,
 2,
 73,
 61,
 54,
 73,
 2,
 44,
 71,
 74,
 73,
 61,
 2,
 62,
 72,
 2,
 54,
 2,
 76,
 68,
 66,
 54,
 67,
 9,
 9,
 76,
 61,
 54,
 73,
 2,
 73,
 61,
 58,
 67,
 24,
 2,
 33,
 72,
 2,
 73,
 61,
 58,
 71,
 58,
 2,
 67,
 68,
 73,
 2,
 60,
 71,
 68,
 74,
 67,
 57,
 1,
 59,
 68,
 71,
 2,
 72,
 74,
 72,
 69,
 58,
 56,
 73,
 62,
 67,
 60,
 2,
 73,
 61,
 54,
 73,
 2,
 54,
 65,
 65,
 2,
 69,
 61,
 62,
 65,
 68,
 72,
 68,
 69,
 61,
 58,
 71,
 72,
 8,
 2,
 62,
 67,
 2,
 72,
 68,
 2,
 59,
 54,
 71,
 2,
 54,
 72,
 2,
 73,
 61,
 58,
 78,
 2,
 61,
 54,
 75,
 58,
 2,
 55,
 58,
 58,
 67,
 1,
 57,
 68,
 60,
 66,
 54,
 73,
 62,
 72,
 73,
 72,
 8,
 2,
 61,
 54,
 75,
 58,
 2,
 59,
 54,
 62,
 65,
 58,
 57,
 2,
 73,
 68,
 2,
 74,
 67,
 57,
 58,
 71,
 72,
 73,
 54,
 67,
 57,
 2,
 76,
 68,
 66,
 58,
 67,
 9,
 9,
 73,
 61,
 54,
 73,
 2,
 73,
 61,
 58,
 2,
 73,
 58,
 71,
 71,
 62,
 55,
 65,
 58,
 1,
 72,
 58,
 71,
 62,
 68,
 74,
 72,
 67,
 58,
 72,
 72,
 2,
 54,


In [49]:
xs = np.stack(c_in_dat, axis=0)

In [50]:
xs.shape

(600885, 8)

In [51]:
y = np.stack(c_out_dat)

In [52]:
xs[:cs,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

In [53]:
# Next character after sequence
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

### Model creation and training

In [54]:
val_idx = get_cv_idxs(len(idx)-cs-1)

In [55]:
md = ColumnarModelData.from_arrays('.', val_idx, xs, y, bs = 512)

In [56]:
class CharLoopModel(nn.Module):
    #This is RNN
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cpu())
        for c in cs:
            inp = F.relu(self.l_in(self.e(c)))
            h = F.tanh(self.l_hidden(h+inp))
        return F.log_softmax(self.l_out(h), dim=-1)

In [57]:
m = CharLoopModel(vocab_size, n_fac).cpu()
opt = optim.Adam(m.parameters(), 1e-2)

In [58]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.011673   2.025526  



[array([2.02553])]

In [59]:
set_lrs(opt, 0.001)

In [60]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.712808   1.719532  



[array([1.71953])]

As per suggestion now creating model with concatination

In [61]:
class CharLoopConcatModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cpu())
        for c in cs:
            inp = torch.cat((h, self.e(c)), 1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh(self.l_hidden(inp))
    
        return F.log_softmax(self.l_out(h),dim=-1)

In [62]:
m = CharLoopConcatModel(vocab_size, n_fac).cpu()
opt = optim.Adam(m.parameters(), 1e-3)

In [63]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [64]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.878227   1.849148  



[array([1.84915])]

In [65]:
set_lrs(opt, 1e-4)

In [66]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.77561    1.7687    



[array([1.7687])]

#### Test Model

In [67]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [68]:
get_next('for thos')

'e'

In [69]:
get_next('part of ')

't'

In [70]:
get_next('queens a')

'n'

### RNN with Pytorch

In [71]:
class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp, h = self.rnn(inp, h)
        return F.log_softmax(self.l_out(outp[-1]), dim =-1)

In [72]:
m = CharRnn(vocab_size, n_fac).cpu()
opt = optim.Adam(m.parameters(), 1e-3)

In [73]:
it = iter(md.trn_dl)
*xs,yt = next(it)

In [74]:
t = m.e(V(torch.stack(xs)))
t.size()

torch.Size([8, 512, 42])

In [75]:
ht = V(torch.zeros(1, 512, n_hidden))
outp, hn = m.rnn(t, ht)
outp.size(), hn.size()

(torch.Size([8, 512, 256]), torch.Size([1, 512, 256]))

In [76]:
t = m(*V(xs)); t.size()

torch.Size([512, 85])

In [77]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.864865   1.847729  
    1      1.676384   1.672071                              
    2      1.587078   1.595017                              
    3      1.537883   1.553042                              



[array([1.55304])]

In [78]:
set_lrs(opt, 1e-4)

In [79]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.469748   1.510924  
    1      1.467569   1.506408                              



[array([1.50641])]

### Test Model

In [80]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [81]:
get_next('for thos')

'e'

In [82]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [83]:
get_next_n('for thos', 40)

'for those such a such a such a such a such a suc'