In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Funnynet -GRU

## A neural network that makes jokes

Special thanks to taivop for providing the [dataset](https://github.com/taivop/joke-dataset).

This notebook is heavily inspired by [fastai NLP work](https://github.com/fastai/fastai/blob/master/courses/dl2/imdb.ipynb).

## Setup

In [2]:
import pdb
import json
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import math, random

# These libraries require some setup, try the pip install git+https.github.com/... trick
from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

from torchtext import vocab, data

from fastai.nlp import *
from fastai.lm_rnn import *

Data needs to already be partitioned into trn/trn.txt and val/val.txt for training and validation sets, respectively.
If it has not, uncomment the following cell.

In [3]:
#import preprocessing
#preprocessing.divide_data_into_files()

Variables that carry over from previous experiments:

In [4]:
embeddings_sz = 42
n_hidden = 256

PATH='data/'
TRN_PATH = 'trn/'
VAL_PATH = 'val/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'

TEXT = data.Field(lower=True, tokenize=list)
print("TEXT: "+str(TEXT))
bs=64; bptt=8; n_fac=42; n_hidden=256

FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)

len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

TEXT: <torchtext.data.field.Field object at 0x7f7255584be0>


(80960, 506, 1, 41452489)

## GRU

In [5]:
class CharSeqStatefulGRU(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.GRU(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [6]:
# From the pytorch source code - for reference

def GRUCell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    gi = F.linear(input, w_ih, b_ih)
    gh = F.linear(hidden, w_hh, b_hh)
    i_r, i_i, i_n = gi.chunk(3, 1)
    h_r, h_i, h_n = gh.chunk(3, 1)

    resetgate = F.sigmoid(i_r + h_r)
    inputgate = F.sigmoid(i_i + h_i)
    newgate = F.tanh(i_n + resetgate * h_n)
    return newgate + inputgate * (hidden - newgate)

In [7]:
m = CharSeqStatefulGRU(md.nt, n_fac, 512).cuda()

opt = optim.Adam(m.parameters(), 1e-3)

In [8]:
fit(m, md, 6, opt, F.nll_loss)

epoch      trn_loss   val_loss                                   
    0      1.379423   1.381629  
    1      1.370106   1.369721                                   
    2      1.366583   1.367863                                   
    3      1.378308   1.3734                                     
    4      1.383073   1.386877                                   
    5      1.408547   1.409568                                   



[array([1.40957])]

In [9]:
set_lrs(opt, 1e-4)

In [10]:
fit(m, md, 3, opt, F.nll_loss)

epoch      trn_loss   val_loss                                   
    0      1.33916    1.325978  
    1      1.331367   1.315031                                   
    2      1.321926   1.308757                                   



[array([1.30876])]