In [1]:
from fastai.text.all import *
from fastdownload import download_url

In [13]:
dest = 'tinyshakespeare/text.txt'
url  = 'https://raw.githubusercontent.com/karpathy/char-rnn/refs/heads/master/data/tinyshakespeare/input.txt'
download_url(url, dest)

Path('tinyshakespeare/text.txt')

In [3]:
with open('tinyshakespeare/text.txt') as fh:
    text = fh.read()

In [45]:
current_text = ''
file_count = 1
for i, para in enumerate(text.split('\n\n')):
    current_text+= para +'\n\n'
    if (i!=0) and (i%1000 == 0):
        with open(f'tinyshakespeare/text_{file_count}.txt', 'w') as fh:
            fh.write(current_text)
        file_count += 1
        current_text = ''
        
with open(f'tinyshakespeare/text_{file_count}.txt', 'w') as fh:
    fh.write(current_text)

In [2]:
ParentSplitter = FuncSplitter(lambda o: Path(o).parent.name == 'test')

In [3]:
path = Path('tinyshakespeare')
get_drama = partial(get_text_files, folders=['train', 'test'])
dls_lm = DataBlock(
    blocks=TextBlock.from_folder(path, is_lm=True),
    get_items=get_drama, splitter=ParentSplitter
).dataloaders(path, path=path, bs=128, seq_len=80)

In [11]:
type(dls_lm)

fastai.data.core.DataLoaders

In [69]:
type(dls_lm.train)

fastai.text.data.LMDataLoader

In [67]:
len(dls_lm.train)

24

In [68]:
len(dls_lm.valid)

9

In [27]:
    class AWDLSTM(Module):
        def __init__(self, vocab_sz, n_hidden, n_layers, batch_size, drop_out_p):
            store_attr()
            self.ih = nn.Embedding(vocab_sz, n_hidden)
            self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
            self.drop = nn.Dropout(drop_out_p)
            self.ho = nn.Linear(n_hidden, vocab_sz)
            self.ho.weight = self.ih.weight #weight tying
            self.h = [torch.zeros(n_layers, batch_size, n_hidden, device=dls_lm.device) for _ in range(2)]
    
        def forward(self, x):
            bs,sl = x.shape
            if bs < self.batch_size:
                self.h = (self.h[0][:, :bs, :].contiguous(), self.h[1][:, :bs, :].contiguous())
            raw, h = self.rnn(self.ih(x), self.h)
            out = self.drop(raw)
            self.h = [h_.detach() for h_ in h]
            return self.ho(out), raw, out
    
        def reset(self):
            for h in self.h: h.zero_()

In [28]:
learn_custom = LMLearner(dls_lm, AWDLSTM(len(dls_lm.vocab), 64, 2, 128, 0.4),
                    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [7]:
learn_custom.h[0].shape

torch.Size([2, 128, 64])

In [21]:
learn_custom.h[0][0, :, :].shape

torch.Size([128, 64])

In [47]:
learn_custom.h[0].shape

torch.Size([2, 128, 64])

In [43]:
learn_custom.h[1].shape

torch.Size([128, 1])

In [29]:
learn_custom.fit_one_cycle(2, 1e-4)

epoch,train_loss,valid_loss,accuracy,time
0,8.700819,8.553678,0.002145,00:01
1,8.633881,8.506541,0.014431,00:01


In [7]:
learn_custom.show_training_loop()

Start Fit
   - before_fit     : [TrainEvalCallback, Recorder, ProgressCallback]
  Start Epoch Loop
     - before_epoch   : [Recorder, ProgressCallback]
    Start Train
       - before_train   : [TrainEvalCallback, ModelResetter, Recorder, ProgressCallback]
      Start Batch Loop
         - before_batch   : [CastToTensor]
         - after_pred     : [RNNCallback]
         - after_loss     : []
         - before_backward: []
         - before_step    : []
         - after_step     : []
         - after_cancel_batch: []
         - after_batch    : [TrainEvalCallback, Recorder, ProgressCallback]
      End Batch Loop
    End Train
     - after_cancel_train: [Recorder]
     - after_train    : [Recorder, ProgressCallback]
    Start Valid
       - before_validate: [TrainEvalCallback, ModelResetter, Recorder, ProgressCallback]
      Start Batch Loop
         - **CBs same as train batch**: []
      End Batch Loop
    End Valid
     - after_cancel_validate: [Recorder]
     - after_validate : [Rec

In [47]:
(x, y) = dls_lm.one_batch()

In [48]:
(x, y)

(LMTensorText([[   2,    7,   51,  ...,    7,  139,    7],
               [  10,  193,    8,  ...,   10,   58,  730],
               [2922,   12,   13,  ...,   11,    9,   16],
               ...,
               [  10,  810,  582,  ...,   19,    9,    8],
               [   0,   10,    9,  ..., 3417,   49,    0],
               [  45,   10, 2162,  ...,   38,   19,    9]], device='cuda:0'),
 TensorText([[   7,   51,    7,  ...,  139,    7,  515],
             [ 193,    8,  255,  ...,   58,  730,  259],
             [  12,   13,    7,  ...,    9,   16,  473],
             ...,
             [ 810,  582,   10,  ...,    9,    8,   29],
             [  10,    9,    8,  ...,   49,    0,    9],
             [  10, 2162,   10,  ...,   19,    9,    8]], device='cuda:0'))

In [50]:
dls_lm.decode_batch((x,y))[0]

("xxbos xxup king xxup edward xxup iv : \n xxmaj ay , if thou wilt say ' ay ' to my request ; \n xxmaj no if thou dost say ' no ' to my demand . \n\n xxup lady xxup grey : \n xxmaj then , no , my lord . xxmaj my suit is at an end . \n\n xxup gloucester : \n\n xxup clarence : \n\n xxup king xxup edward xxup iv : \n\n xxup lady xxup",
 "xxup king xxup edward xxup iv : \n xxmaj ay , if thou wilt say ' ay ' to my request ; \n xxmaj no if thou dost say ' no ' to my demand . \n\n xxup lady xxup grey : \n xxmaj then , no , my lord . xxmaj my suit is at an end . \n\n xxup gloucester : \n\n xxup clarence : \n\n xxup king xxup edward xxup iv : \n\n xxup lady xxup grey")

In [53]:
L(dls_lm.vocab)[[2,7,51]]

(#3) ['xxbos','xxup','king']

In [56]:
type(dls_lm.after_batch)

fasttransform.transform.Pipeline

In [60]:
type(dls_lm.after_item)

fasttransform.transform.Pipeline

In [59]:
type(dls_lm.before_batch)

fasttransform.transform.Pipeline

In [31]:
type(dls_lm.test_dl)

method

In [30]:
TEXT = "Clown:He seems to be of great authority:"
test_dl = dls_lm.test_dl([TEXT])

In [32]:
idxs = idxs_all = test_dl.items[0].to(dls_lm.device)

In [95]:
idxs.shape

torch.Size([11])

In [97]:
type(learn_custom)

fastai.text.learner.TextLearner

In [31]:
learn_custom.predict(TEXT, n_words=3)

'clown : he seems to be of great authority : general sped afeard'

In [33]:
learn_custom.model.reset()
preds,_ = learn_custom.get_preds(dl=[(idxs[None],)])

In [35]:
preds.shape

torch.Size([1, 11, 4912])

In [73]:
type(test_dl)

fastai.text.data.LMDataLoader

In [61]:
L(dls_lm.vocab)[test_dl.items[0]]

(#11) ['xxbos','clown',':','he','seems','to','be','of','great','authority',':']

In [8]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult=0.3, 
    metrics=accuracy)

  wgts = torch.load(wgts_fname, map_location = lambda storage,loc: storage)


In [11]:
len(learn.model)

2

In [18]:
help(AWD_LSTM)

Help on class AWD_LSTM in module fastai.text.models.awdlstm:

class AWD_LSTM(fastai.torch_core.Module)
 |  AWD_LSTM(vocab_sz: 'int', emb_sz: 'int', n_hid: 'int', n_layers: 'int', pad_token: 'int' = 1, hidden_p: 'float' = 0.2, input_p: 'float' = 0.6, embed_p: 'float' = 0.1, weight_p: 'float' = 0.5, bidir: 'bool' = False)
 |  
 |  AWD-LSTM inspired by https://arxiv.org/abs/1708.02182
 |  
 |  Method resolution order:
 |      AWD_LSTM
 |      fastai.torch_core.Module
 |      torch.nn.modules.module.Module
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, vocab_sz: 'int', emb_sz: 'int', n_hid: 'int', n_layers: 'int', pad_token: 'int' = 1, hidden_p: 'float' = 0.2, input_p: 'float' = 0.6, embed_p: 'float' = 0.1, weight_p: 'float' = 0.5, bidir: 'bool' = False)
 |      Initialize internal Module state, shared by both nn.Module and ScriptModule.
 |  
 |  forward(self, inp: 'Tensor', from_embeds: 'bool' = False)
 |      Define the computation performed at every call.

In [15]:
learn.model[0].encoder.emb_sz

AttributeError: 'Embedding' object has no attribute 'emb_sz'

In [9]:
learn.fit_one_cycle(3, 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,5.724758,5.370021,0.209151,00:02
1,5.231106,4.956509,0.228683,00:02
2,4.936974,4.882655,0.231967,00:02


In [10]:
learn.show_training_loop()

Start Fit
   - before_fit     : [TrainEvalCallback, Recorder, ProgressCallback]
  Start Epoch Loop
     - before_epoch   : [Recorder, ProgressCallback]
    Start Train
       - before_train   : [TrainEvalCallback, ModelResetter, Recorder, ProgressCallback]
      Start Batch Loop
         - before_batch   : [CastToTensor]
         - after_pred     : [RNNCallback]
         - after_loss     : []
         - before_backward: []
         - before_step    : []
         - after_step     : []
         - after_cancel_batch: []
         - after_batch    : [TrainEvalCallback, Recorder, ProgressCallback]
      End Batch Loop
    End Train
     - after_cancel_train: [Recorder]
     - after_train    : [Recorder, ProgressCallback]
    Start Valid
       - before_validate: [TrainEvalCallback, ModelResetter, Recorder, ProgressCallback]
      Start Batch Loop
         - **CBs same as train batch**: []
      End Batch Loop
    End Valid
     - after_cancel_validate: [Recorder]
     - after_validate : [Rec

In [10]:
learn.unfreeze()
learn.fit_one_cycle(10, 3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,4.195487,4.129751,0.288162,00:02
1,3.906633,4.019586,0.291211,00:02
2,3.664222,3.871012,0.316818,00:02
3,3.430566,3.848905,0.324999,00:02
4,3.197395,3.985066,0.321521,00:02
5,2.968852,4.029352,0.318111,00:02
6,2.710333,4.126971,0.319483,00:03
7,2.464854,4.261774,0.312334,00:02
8,2.264975,4.318427,0.31452,00:02
9,2.113755,4.339167,0.312641,00:02


In [31]:
learn.model

SequentialRNN(
  (0): AWD_LSTM(
    (encoder): Embedding(4912, 400, padding_idx=1)
    (encoder_dp): EmbeddingDropout(
      (emb): Embedding(4912, 400, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDropout(
        (module): LSTM(400, 1152, batch_first=True)
      )
      (1): WeightDropout(
        (module): LSTM(1152, 1152, batch_first=True)
      )
      (2): WeightDropout(
        (module): LSTM(1152, 400, batch_first=True)
      )
    )
    (input_dp): RNNDropout()
    (hidden_dps): ModuleList(
      (0-2): 3 x RNNDropout()
    )
  )
  (1): LinearDecoder(
    (decoder): Linear(in_features=400, out_features=4912, bias=True)
    (output_dp): RNNDropout()
  )
)

In [17]:
TEXT = "Clown:He seems to be of great authority:"
N_WORDS = 30
N_SENTENCES = 1
preds = [learn.predict(TEXT, N_WORDS) for _ in range(N_SENTENCES)]

In [18]:
preds

['clown : he seems to be of great authority : \n Take on the officer your very sword , on \n Lie your sword , and so thrive you in his defence . \n And you soldiers']