In [6]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import fastai

from fastai.io import *

from fastai.column_data import *
from fastai.text import *


from pathlib import Path

## Learning to generate Dickens-like text

### Setup

In [7]:
from torchtext import vocab, data

from fastai.nlp import *
from fastai.lm_rnn import *

PATH=Path('../data/dickens/')

TRN_PATH = 'trn'
VAL_PATH = 'val'
TRN = PATH / TRN_PATH
VAL = PATH / VAL_PATH

%ls {PATH}

davidcopperfield.txt  [0m[01;34mmodels[0m/  [01;34mtmp[0m/  [01;34mtrn[0m/  [01;34mval[0m/


In [8]:
TEXT = data.Field(lower=True, tokenize=list)
bs=2048; bptt=16; n_fac=42; n_hidden=256

FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)

len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

(44, 53, 1, 1494913)

### Our model

In [9]:
class CharSequence(nn.Module):
    def __init__(self, vocab_size, n_fac, bs, nl):
        super().__init__()
        self.vocab_size,self.nl = vocab_size,nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.dropout = nn.Dropout(.1)
        self.rnn = nn.GRU(n_fac, n_hidden, nl, dropout=0.0)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h[0].size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.dropout(self.e(cs)), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs):
        self.h = V(torch.zeros(self.nl, bs, n_hidden))

In [10]:
m = BasicModel(CharSequence(md.nt, n_fac, n_hidden, 1).cuda())
learner = RNN_Learner(md, m, opt_fn=optim.Adam, crit=F.nll_loss)

#minimum_learning_rate_divisor = 1200
#percent_after_triangle_cycle = 15
#max_momentum=.97
#min_momentum=.85
#learner.fit(1e-2, 1, cycle_len=72, 
#           use_clr_beta=(minimum_learning_rate_divisor, 
#                         percent_after_triangle_cycle, 
#                         max_momentum, 
#                         min_momentum),
#                         wds=1e-5)
learner.fit(2e-2, 7, cycle_mult=2, cycle_len=1, 
           wds=1e-5)


HBox(children=(IntProgress(value=0, description='Epoch', max=127), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.598663   2.305043  
    1      2.208407   1.921305  
    2      2.002703   1.863702  
    3      1.881645   1.773078  
    4      1.813082   1.715943  
    5      1.750779   1.700081  
    6      1.699288   1.673854  
    7      1.69325    1.676341  
    8      1.669341   1.651264  
    9      1.6614     1.618604  
    10     1.642774   1.623783  
    11     1.634357   1.608566  
    12     1.60647    1.586232  
    13     1.58998    1.58007   
    14     1.580077   1.583153  
    15     1.603731   1.630319  
    16     1.615242   1.612495  
    17     1.617313   1.60939   
    18     1.612678   1.606043  
    19     1.621265   1.596941  
    20     1.59347    1.578293  
    21     1.597405   1.60619   
    22     1.582763   1.573977  
    23     1.558387   1.582913  
    24     1.551436   1.553195  
    25     1.538836   1.569853  
    26     1.531411   1.510865  
    27     1.531785   1.575031  
    28     1.524847   1.539582  
    29   

[array([1.50235])]

### Test

In [11]:
def get_next(inp):
    idxs = TEXT.numericalize(inp)
    p = learner.model(VV(idxs.transpose(0,1)))
    r = torch.multinomial(p[-1].exp(), 1)
    return TEXT.vocab.itos[to_np(r)[0]]

In [12]:
get_next('for thos')
#TEXT

'e'

In [13]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [14]:
print(get_next_n('fourscore and seven years ago', 400))

fourscore and seven years agony. the otherwise my arm for purious for a convenience.‘i only see it?’‘from the first singularly way, grivalicloney of her. i lived thoughtn’s pass on, which i faintly, the edid old looks in favour off.’‘when you as this?’ said traddles, standing into sigh?’‘i am called until your counter with the prina’ leconsent, trot, mr. micawber for the countenance neighbours ofthe ficse, i was not allusion 
