# Knock Knock Joke Generator
## Team Compose

### Import Section

In [1]:
%reload_ext autoreload
%autoreload
%matplotlib inline

  (fname, cnt))
  (fname, cnt))


In [2]:
from fastai.learner import *
import torchtext
from torchtext import vocab, data
from torchtext.datasets import language_modeling

from fastai.rnn_reg import *
from fastai.rnn_train import *
from fastai.nlp import *
from fastai.lm_rnn import *

import dill as pickle
import spacy

###  Data setup

In [3]:
# Setting up the training and validation paths
TEST_PATH = 'test/'
TRAIN_PATH = 'train/'

In [20]:
# Number of test examples
! ls -l {TEST_PATH} | wc -l

106


In [21]:
# Number of training examples
!ls -l {TRAIN_PATH} | wc -l

460


In [22]:
# Creating lists of training and test examples
train_files = !ls {TRAIN_PATH}
test_files = !ls {TEST_PATH}
train_files[:5]

['train_100.txt',
 'train_101.txt',
 'train_102.txt',
 'train_104.txt',
 'train_105.txt']

In [23]:
# Checking the first training examples together
train_file = !cat {TRAIN_PATH}{train_files[0]}
train_file

["knock, knock. who's there! arfur! arfur who? arfur got!"]

### Creating the model

In [5]:
# Setting up the torchtext field.

#required so that the tokenizer works
spacy.load('en') 
TEXT = pickle.load(open('TEXT-adam5.pkl', 'rb'))

In [6]:
# bs is the number of batches that the data gets split into
# bptt is the number of words from each batch
bs = 16
bptt = 20

In [7]:
FILES = dict(train=TRAIN_PATH, validation=TEST_PATH, test=TEST_PATH)
FILES

{'train': 'train/', 'validation': 'test/', 'test': 'test/'}

In [8]:
model_data = LanguageModelData.from_text_files("", TEXT, **FILES, bs=bs, bptt=bptt, min_freq=1)

In [9]:
# The number of batches
len(model_data.trn_dl)

28

In [10]:
# The number of vocab tokens
model_data.nt

37392

In [11]:
# Subset of the words that have been tokenized
TEXT.vocab.itos[:10]

['<unk>', '<pad>', 'the', ',', '.', 'and', 'a', 'of', 'to', 'is']

In [12]:
TEXT.vocab.stoi['the']

2

In [26]:
embedding_matrix_vector_size = 200

In [27]:
hidden_activations_per_layer = 500

In [28]:
number_of_layers = 3

In [29]:
optimization_function = partial(optim.Adam, betas=(0.7, 0.99))

### Creating the learner

In [30]:
learner = model_data.get_model(optimization_function, 
                               embedding_matrix_vector_size,
                               hidden_activations_per_layer,
                               number_of_layers,
                               dropouti=0.05, 
                               dropout=0.05, 
                               wdrop=0.1, 
                               dropoute=0.02, 
                               dropouth=0.05)

In [31]:
learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)

In [32]:
# This clips the learning rate
learner.clip=0.2

In [33]:
learner.load_encoder('adam5_enc')

In [34]:
learner.fit(3e-4, 4, wds=1e-6, cycle_len=1, cycle_mult=2)

HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))

epoch      trn_loss   val_loss                            
    0      4.70394    4.037547  
    1      3.880033   3.257333                            
    2      3.479815   3.221597                            
    3      3.169128   2.856732                            
    4      2.910245   2.821475                            
    5      2.678835   2.710077                            
    6      2.522056   2.735424                            
    7      2.44901    2.669336                            
    8      2.327343   2.729415                            
    9      2.210193   2.636024                            
    10     2.10802    2.669223                            
    11     2.036789   2.656643                            
    12     1.97295    2.63354                             
    13     1.927896   2.618145                            
    14     1.914096   2.664956                            



[2.6649556109779757]

In [35]:
learner.fit(3e-3, 1, wds=1e-6, cycle_len=10)

HBox(children=(IntProgress(value=0, description='Epoch', max=10), HTML(value='')))

epoch      trn_loss   val_loss                            
    0      1.946285   2.68589   
    1      1.827705   2.752432                            
    2      1.575973   2.798588                            
    3      1.386097   2.807913                            
    4      1.177162   2.875699                            
    5      1.025995   2.856046                            
    6      0.898376   3.002249                             
    7      0.890061   2.906619                             
    8      0.78586    2.914161                             
    9      0.75588    2.99573                              



[2.995729708843094]

In [36]:
model = learner.model

In [63]:
ss = "knock, knock who's there"

In [64]:
s = [TEXT.tokenize(ss)]
t = TEXT.numericalize(s)
' '.join(s[0])

"knock , knock who 's there"

In [65]:
model[0].bs = 1
model.eval()
model.reset()
res, *_ = model(t)
model[0].bs = bs

In [66]:
next_words = torch.topk(res[-1], 10)[1]
[TEXT.vocab.itos[o] for o in to_np(next_words)]

['?', '!', '.', 'there', 'if', 'who', ',', '-', '...', 'again']

In [67]:
print(ss,"\n")
for i in range(100):
    n=res[-1].topk(10)[1]
    n = n[random.randint(0,9)] if n.data[0]==0 else n[0]
    print(TEXT.vocab.itos[n.data[0]], end=' ')
    res,*_ = model(n[0].unsqueeze(0))
print('...')

knock, knock who's there 

? <eos> knock , knock . who 's there ! argo ! argo who ? argo down the shops ! <eos> knock , knock . who 's there ! teresa ! teresa who ? teresa are green ! <eos> knock , knock . who 's there ! argo ! argo who ? argo down the shops ! <unk> ! <eos> knock , knock . who 's there ! carrie ! carrie who ? carrie on camping ! <eos> knock , knock . who 's there ! <unk> ! - who ? hey hey who ? hey ho , hey ho ...
