In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

In [2]:
PATH='D:/FAST.Ai/DL1/Data/'

In [3]:
text = open(f'{PATH}nietzsche.txt').read()
print('corpus length:', len(text))

corpus length: 600901


In [4]:
text[:400]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself '

In [5]:
chars = sorted(list(set(text)))

In [6]:
print('total chars' ,len(chars))

total chars 85


In [7]:
vocab_size = len(chars)+1

In [8]:
chars.insert(0,'\0')

In [9]:
' '.join(chars[1:-6])

'\n   ! " \' ( ) , - . 0 1 2 3 4 5 6 7 8 9 : ; = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] _ a b c d e f g h i j k l m n o p q r s t u v w x y z'

In [10]:
indices_char = {i:c for i,c in enumerate(chars)}
char_indices = {c:i for i,c in enumerate(chars)}

In [11]:
idx = [char_indices[c] for c in text]

In [12]:
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [13]:
''.join([indices_char[i] for i in idx[:30]])

'PREFACE\n\n\nSUPPOSING that Truth'

In [14]:
# predicting 4th charachter from 1st , 2nd, 3rd

In [15]:
cs = 3
c1_dat = [idx[i] for i in range(0,len(idx)-cs,cs)]
c2_dat = [idx[i+1] for i in range(0,len(idx)-cs,cs)]
c3_dat = [idx[i+2] for i in range(0,len(idx)-cs,cs)]
c4_dat = [idx[i+3] for i in range(0,len(idx)-cs,cs)]

In [16]:
type(c1_dat[:-2])

list

In [17]:
x1 = np.stack(c1_dat[:-2])

In [18]:
x1.shape , type(x1)

((200298,), numpy.ndarray)

In [19]:
x2 = np.stack(c2_dat[:-2])
x3 = np.stack(c3_dat[:-2])
y = np.stack(c4_dat[:-2])

In [20]:
x1[:4], x2[:4], x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [21]:
y[:4]

array([30, 29,  1, 40])

# create and train the model

In [21]:
n_hidden = 256

In [22]:
n_fac = 42

In [23]:
class char3Model(nn.Module):
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size,n_fac)
        self.l_in = nn.Linear(n_fac,n_hidden)
        self.l_hidden = nn.Linear(n_hidden,n_hidden)
        self.l_out = nn.Linear(n_hidden,vocab_size)
        
    def forward(self,c1,c2,c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = F.tanh(self.l_hidden(in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [24]:
np.stack([x1,x2,x3],axis=1)

array([[40, 42, 29],
       [30, 25, 27],
       [29,  1,  1],
       ...,
       [72,  2, 68],
       [59,  2, 72],
       [62, 67, 59]])

In [25]:
np.stack([x1,x2,x3],axis=1).shape

(200298, 3)

In [26]:
md = ColumnarModelData.from_arrays(".", [-1], np.stack([x1,x2,x3],axis=1) , y,bs=512)

# i passed "np.stack([x1,x2,x3]" so md will pass that to "forward(self,c1,c2,c3)" as c1,c2,c3

In [27]:
m = char3Model(vocab_size,n_fac)

In [28]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [29]:
opt = optim.Adam(m.parameters(), 1e-2)

In [30]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      2.090732   2.337853  



[array([2.33785])]

In [89]:
set_lrs(opt,1e-2)

In [90]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      2.039751   1.125197  



[array([1.1252])]

In [99]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*V(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [100]:
get_next('ppl')

'e'

In [101]:
get_next(' th')

'e'

# create inputs

In [12]:
cs = 8

In [13]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]

In [14]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [18]:
xs = np.stack(c_in_dat)

In [19]:
xs.shape

(600893, 8)

In [20]:
y = np.stack(c_out_dat)

In [21]:
xs[:cs,:]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

In [22]:
xs[0]

array([40, 42, 29, 30, 25, 27, 29,  1])

In [23]:
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

# create and train the model 

In [24]:
val_idx = get_cv_idxs(len(xs))

In [25]:
md = ColumnarModelData.from_arrays(".", val_idx, xs , y,bs=512)

In [34]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [27]:
type(xs)

list

In [28]:
xs[0].size(0)

512

In [29]:
class CharLoopModel(nn.Module):
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size,n_fac)
        self.l_in = nn.Linear(n_fac,n_hidden)
        self.l_hidden = nn.Linear(n_hidden,n_hidden)
        self.l_out = nn.Linear(n_hidden,vocab_size)
        
    def forward(self,*cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs,n_hidden))
        for c in cs :
            inp = F.relu(self.l_in(self.e(c)))
            h = F.tanh(self.l_hidden(inp+h))
            
        return F.log_softmax(self.l_out(h),dim=-1)

In [33]:
m = CharLoopModel(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-2)

In [101]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      2.046063   2.041055  



[array([2.04105])]

In [139]:
set_lrs(opt, 0.001)

In [140]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      1.721896   1.720413  



[array([1.72041])]

In [102]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [103]:
get_next('for those o')

'f'

In [144]:
class CharLoopConcatModel(nn.Module):
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size,n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden,n_hidden)
        self.l_hidden = nn.Linear(n_hidden,n_hidden)
        self.l_out = nn.Linear(n_hidden,vocab_size)
    
    def forward(self,*cs):
        bs = cs[0].size(0)
        type(cs[0])
        h = V(torch.zeros(bs,n_hidden))
        for c in cs :
            inp = torch.cat((h,self.e(c)),1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh()
            
        return F.log_softmax(self.l_out(h))

In [141]:
a = torch.zeros(2,2)
b = torch.ones(2,2)
torch.cat((a,b),1)


 0  0  1  1
 0  0  1  1
[torch.FloatTensor of size 2x4]

In [145]:
m = CharLoopConcatModel(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-3)

In [146]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      1.857759   1.851464  



[array([1.85146])]

# think to concatinate in an other way

In [147]:
class CharLoopConcatModel2(nn.Module):
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size,n_fac)
        
        #
        self.l_in = nn.Linear(n_fac,n_hidden)
        self.l_hidden = nn.Linear(n_hidden*2,n_hidden)
        #
        
        self.l_out = nn.Linear(n_hidden,vocab_size)
        
    def forward(self,*cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs,n_hidden))
        
        #
        for c in cs :
            inp = F.relu(self.l_in(self.e(c)))
            inp = torch.cat((h,inp),1)
            h = F.tanh(self.l_hidden(inp))
        #   
        return F.log_softmax(self.l_out(h),dim=-1)

In [148]:
m = CharLoopConcatModel2(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-3)

In [149]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      1.810731   1.799153  



[array([1.79915])]

In [150]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [151]:
get_next('for thos')

'e'

In [152]:
get_next('queens a')

'n'

In [170]:
get_next('queens a')

'n'

# RNN with pytorch 

In [104]:
class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp[-1]), dim=-1)

In [105]:
m = CharRnn(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-3)

In [106]:
it = iter(md.trn_dl)
*xs,yt = next(it)

In [107]:
t = m.e(V(torch.stack(xs)))

In [108]:
t.size()

torch.Size([8, 512, 42])

In [109]:
ht = (V(torch.zeros(1,512,n_hidden)))

In [110]:
outp , hn = m.rnn(t,ht)
outp.size() , hn.size()

(torch.Size([8, 512, 256]), torch.Size([1, 512, 256]))

In [111]:
# outp[7,:] is equal to hn[0]

In [112]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      1.870356   1.847333  



[array([1.84733])]

In [56]:
def get_next(inp):
    idxs=T(np.array([char_indices[c] for c in inp]))
    p=m(*V(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [57]:
get_next("for thos")

'e'

In [58]:
def get_next_n(inp):
    res = inp
    for i in range(50):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [59]:
get_next_n("for thos")

'for those of the some and and and and and and and and and '

# multi output model

In [35]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(0, len(idx)-cs, cs)]

In [36]:
c_out_dat = [[idx[i+j] for i in range(1,cs+1)] for j in range(0, len(idx)-cs, cs)]

In [37]:
xs = np.stack(c_in_dat)
xs.shape

(75112, 8)

In [38]:
ys = np.stack(c_out_dat)
ys.shape

(75112, 8)

In [39]:
xs[-cs:,:]

array([[71, 58,  2, 57, 58, 67, 68, 74],
       [67, 56, 58, 57,  2, 55, 78,  2],
       [73, 61, 58,  1, 56, 61, 71, 62],
       [72, 73, 62, 54, 67,  2, 76, 68],
       [71, 65, 57,  2, 54, 72,  2, 73],
       [61, 58,  2, 62, 67, 57, 62, 56],
       [54, 73, 62, 68, 67, 72,  2, 68],
       [59,  2, 72, 62, 67, 59, 74, 65]])

In [40]:
ys[-cs:,:]

array([[58,  2, 57, 58, 67, 68, 74, 67],
       [56, 58, 57,  2, 55, 78,  2, 73],
       [61, 58,  1, 56, 61, 71, 62, 72],
       [73, 62, 54, 67,  2, 76, 68, 71],
       [65, 57,  2, 54, 72,  2, 73, 61],
       [58,  2, 62, 67, 57, 62, 56, 54],
       [73, 62, 68, 67, 72,  2, 68, 59],
       [ 2, 72, 62, 67, 59, 74, 65, 67]])

In [41]:
class CharSeqRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp))

In [42]:
m = CharSeqRnn(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-3)

In [43]:
val_idx = get_cv_idxs(len(xs))

md = ColumnarModelData.from_arrays('.', val_idx, xs, ys, bs=512)

In [68]:
it = iter(md.trn_dl)
*xs,yt = next(it)

t = m.e(V(torch.stack(xs)))

ht = (V(torch.zeros(1,512,n_hidden)))

outp , hn = m.rnn(t,ht)
outp.size() , hn.size()

(torch.Size([8, 512, 256]), torch.Size([1, 512, 256]))

In [67]:
type(xs)

list

In [65]:
Q = torch.stack(xs)

In [66]:
Q.shape

torch.Size([8, 512])

In [69]:
t.size()

torch.Size([8, 512, 42])

In [45]:
type(outp)

torch.autograd.variable.Variable

In [46]:
vocab_size

86

In [47]:
last_outp = F.log_softmax(m.l_out(outp))

In [48]:
last_outp.size()

torch.Size([8, 512, 86])

In [49]:
yt.size()

torch.Size([512, 8])

In [50]:
yt.transpose(0,1).size()

torch.Size([8, 512])

In [51]:
yt.transpose(0,1)


    2    72     8  ...     24    67    65
   76    62     2  ...      2    56    54
   61    73    63  ...      4    73    55
       ...          ⋱          ...       
   71    58    73  ...     71    25     2
   58    65     2  ...     54    73    54
   75    78    54  ...     56     2    72
[torch.LongTensor of size 8x512]

In [52]:
def nll_loss_seq(outp,targ):
    sl,bs,nc = outp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(outp.view(-1,nc),targ)

In [53]:
fit(m, md, 1, opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      1.124818   0.956862  



[array([0.95686])]

## Identity init!

In [54]:
m = CharSeqRnn(vocab_size, n_fac)
opt = optim.Adam(m.parameters(), 1e-2)

In [55]:
m.rnn.weight_hh_l0.data.copy_(torch.eye(n_hidden))


    1     0     0  ...      0     0     0
    0     1     0  ...      0     0     0
    0     0     1  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      1     0     0
    0     0     0  ...      0     1     0
    0     0     0  ...      0     0     1
[torch.FloatTensor of size 256x256]

In [56]:
fit(m, md, 1, opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1, style=ProgressStyle(description_width='initial…

epoch      trn_loss   val_loss                                                                                         
    0      0.797862   0.715006  



[array([0.71501])]