# Knock Knock Joke Generator
## Team Compose

### Import Section

In [1]:
%reload_ext autoreload
%autoreload
%matplotlib inline

  (fname, cnt))
  (fname, cnt))


In [2]:
from fastai.learner import *
import torchtext
from torchtext import vocab, data
from torchtext.datasets import language_modeling

from fastai.rnn_reg import *
from fastai.rnn_train import *
from fastai.nlp import *
from fastai.lm_rnn import *

import dill as pickle
import spacy

###  Data setup

In [3]:
# Setting up the training and validation paths
TEST_PATH = 'test/'
TRAIN_PATH = 'train/'

In [4]:
# Number of test examples
! ls -l {TEST_PATH} | wc -l

106


In [5]:
# Number of training examples
!ls -l {TRAIN_PATH} | wc -l

460


In [6]:
# Creating lists of training and test examples
train_files = !ls {TRAIN_PATH}
test_files = !ls {TEST_PATH}
train_files[:5]

['train_100.txt',
 'train_101.txt',
 'train_102.txt',
 'train_104.txt',
 'train_105.txt']

In [7]:
# Checking the first training examples together
train_file = !cat {TRAIN_PATH}{train_files[0]}
train_file

["knock, knock. who's there! arfur! arfur who? arfur got!"]

### Creating the model

In [8]:
# Setting up the torchtext field.

#required so that the tokenizer works
spacy.load('en') 
TEXT = data.Field(lower=True, tokenize=list)

In [9]:
# bs is the number of batches that the data gets split into
# bptt is the number of words from each batch
bs = 16
bptt = 20

In [10]:
FILES = dict(train=TRAIN_PATH, validation=TEST_PATH, test=TEST_PATH)
FILES

{'test': 'test/', 'train': 'train/', 'validation': 'test/'}

In [11]:
model_data = LanguageModelData.from_text_files("", TEXT, **FILES, bs=bs, bptt=bptt, min_freq=1)

In [12]:
# The number of batches
len(model_data.trn_dl)

106

In [13]:
# The number of vocab tokens
model_data.nt

45

In [14]:
# Subset of the words that have been tokenized
TEXT.vocab.itos[:10]

['<unk>', '<pad>', ' ', 'o', 'e', 'k', 'n', 'h', 'c', 'a']

In [16]:
TEXT.vocab.stoi['t']

11

In [17]:
embedding_matrix_vector_size = 500

In [18]:
hidden_activations_per_layer = 500

In [19]:
number_of_layers = 2

In [20]:
optimization_function = partial(optim.Adam, betas=(0.7, 0.99))

### Creating the learner

In [21]:
learner = model_data.get_model(optimization_function, 
                               embedding_matrix_vector_size,
                               hidden_activations_per_layer,
                               number_of_layers,
                               dropouti=0.05, 
                               dropout=0.05, 
                               wdrop=0.1, 
                               dropoute=0.02, 
                               dropouth=0.05)

In [22]:
learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)

In [23]:
# This clips the learning rate
learner.clip=0.2

In [24]:
learner.fit(3e-4, 4, wds=1e-6, cycle_len=1, cycle_mult=2)

epoch      trn_loss   val_loss                              
    0      2.820423   2.662502  
    1      2.104295   1.877877                              
    2      1.782074   1.751191                              
    3      1.624566   1.567082                              
    4      1.495123   1.4979                                
    5      1.425235   1.461224                              
    6      1.404931   1.433978                              
    7      1.391117   1.431324                              
    8      1.340597   1.385715                              
    9      1.29383    1.347896                              
    10     1.264139   1.322773                              
    11     1.229856   1.28737                               
    12     1.189815   1.266234                              
    13     1.198576   1.269404                              
    14     1.187142   1.279893                              



[1.2798933]

In [25]:
learner.fit(3e-3, 1, wds=1e-6, cycle_len=10)

epoch      trn_loss   val_loss                              
    0      1.326785   1.422977  
    1      1.2476     1.270615                              
    2      1.079133   1.213807                              
    3      0.960059   1.187449                               
    4      0.808017   1.185401                               
    5      0.675018   1.195614                               
    6      0.585171   1.211999                               
    7      0.477929   1.238316                               
    8      0.41318    1.260869                               
    9      0.455005   1.259753                               



[1.2597526]

In [26]:
model = learner.model

In [42]:
ss = "m"

In [43]:
s = [TEXT.tokenize(ss)]
t = TEXT.numericalize(s)
' '.join(s[0])

'm'

In [44]:
model[0].bs = 1
model.eval()
model.reset()
res, *_ = model(t)
model[0].bs = bs

In [45]:
next_words = torch.topk(res[-1], 10)[1]
[TEXT.vocab.itos[o] for o in to_np(next_words)]

['e', 'a', 'i', 'y', 'o', ' ', '!', 'u', 's', 'b']

In [49]:
print(ss,"\n")
for i in range(300):
    n=res[-1].topk(2)[1]
    n = n[1] if n.data[0]==0 else n[0]
    print(TEXT.vocab.itos[n.data[0]], end=' ')
    res,*_ = model(n[0].unsqueeze(0))
print('...')

Variable containing:
 26
[torch.cuda.LongTensor of size 1x1 (GPU 0)]
 

k n o c k .   w h o ' s   t h e r e !   b e n   h u r !   b e n   h u r   w h o ?   b e n   h u r   y o u   a   p o o l i n e !   <eos> k n o c k ,   k n o c k .   w h o ' s   t h e r e !   b e n   h u r !   b e n   h u r   w h o ?   b e n   h u r   y o u   a   p o o l i n e !   <eos> k n o c k ,   k n o c k .   w h o ' s   t h e r e !   b e n   h u r !   b e n   h u r   w h o ?   b e n   h u r   y o u   a   p o o l i n e !   <eos> k n o c k ,   k n o c k .   w h o ' s   t h e r e !   b e n   h u r !   b e n   h u r   w h o ?   b e n   h u r   y o u   a   p o o l i n e !   <eos> k n o c k ,   k n o c k .   w ...
