In [1]:
#imports
from fastai.text import *

# Language Model Fine-tuning on IMDB Dataset

In [2]:
#get data, set up weights and vocab paths
path = untar_data(URLs.IMDB)
name = 'imdb_gen'
lm_fns = [f'{name}_wts', f'{name}_vocab']
path.ls()

[WindowsPath('C:/Users/Oren/.fastai/data/imdb/imdb.vocab'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/imdb_databunch'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/imdb_gen_databunch'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/imdb_pos_databunch'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/models'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/README'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/test'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/tmp_clas'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/tmp_lm'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/train'),
 WindowsPath('C:/Users/Oren/.fastai/data/imdb/unsup')]

In [3]:
#prepare the databunch
bs=64
### comment below after you run it for the first time
# data = (TextList.from_folder(path)
#            #Inputs: all the text files in path
#             .filter_by_folder(include=['train', 'test', 'unsup']) 
#            #We may have other temp folders that contain text files so we only keep what's in train and test
#             .split_by_rand_pct(0.1, seed=42)
#            #We randomly split and keep 10% (10,000 reviews) for validation
#             .label_for_lm()           
#            #We want to do a language model so we label accordingly
#             .databunch(bs=bs))
# data.save(f'{name}_databunch')
# len(data.vocab.itos),len(data.train_ds)

In [4]:
### uncomment below after you run above for the first time
data = load_data(path, f'{name}_databunch', bs=bs)
data.show_batch()

idx,text
0,"later , by which time i did not care . xxmaj the character we should really care about is a very cocky , overconfident xxmaj ashton xxmaj kutcher . xxmaj the problem is he comes off as kid who thinks he 's better than anyone else around him and shows no signs of a cluttered closet . xxmaj his only obstacle appears to be winning over xxmaj costner . xxmaj"
1,"sidebar , xxmaj swayze 's character who is no xxmaj monk ( ! ) has sired a xxmaj russian beauty xxmaj elena ( played by the gorgeous xxmaj marta xxmaj xxunk ) on his previous missions to the former xxmaj commie state . xxmaj xxunk xxmaj swayze does a passable job in setting out to defeat the evil xxmaj russians . xxmaj but young unknown actress xxmaj marta xxmaj xxunk"
2,"proper sentences . \n \n xxmaj in short i absolutely hated everything about this movie and not in "" so bad its good "" kinda way ... \n \n xxmaj it was unadulterated drek . \n \n xxmaj gavin xxbos xxup ok , let me start off by saying this is n't a horrible movie by any means . xxmaj it 's just not good . i"
3,"3 . xxup if your not happy all the time , you are a bad person . xxmaj no one seems to show any other emotion but happiness , no matter which situation they are in . xxmaj if the child 's parents get mad or sad for some reason , the child may think of xxmaj mommy or xxmaj daddy differently . xxmaj not a good message at all"
4,"bad performances from all the huge stars . xxmaj the jokes ai n't funny , the lines are absurd and sometimes , they does n't make sense at all . xxmaj in fact , i recently read that on the stage , xxmaj ben xxmaj affleck has asked xxmaj bay whether it would be easier if they teach astronauts to drill , than drillers to becomes astronauts and xxmaj bay"


In [5]:
### comment below after you run it for the first time
# learn = language_model_learner(data, AWD_LSTM, pretrained=True, drop_mult=0.30)

In [6]:
### comment below after you run it for the first time
# learn.lr_find()
# learn.recorder.plot(skip_end=12)

In [7]:
### comment below after you run it for the first time
# lr = 1e-3
# lr *= bs/48  # Scale learning rate by batch size

In [8]:
### comment below after you run it for the first time
# learn.fit_one_cycle(1, lr*10, moms=(0.8,0.7))

In [9]:
### comment below after you run it for the first time
# learn.unfreeze()
# learn.fit_one_cycle(4, lr, moms=(0.8,0.7))

In [10]:
### comment below after you run it for the first time
# mdl_path = data_folder/'models'
# mdl_path.mkdir(exist_ok=True)
# learn.save(mdl_path/lm_fns[0], with_opt=False)
# learn.data.vocab.save(mdl_path/(lm_fns[1] + '.pkl'))

In [11]:
### uncomment below after you run it for the first time
learn = language_model_learner(data, AWD_LSTM, pretrained_fnames=lm_fns, drop_mult=0.0)

# Text Generation Methods & Comparison

In [12]:
#setup for text generation - prompts & number of words in each generated review
TOKENS = ["xxbos","the","this","when","i really", "you can","if", "i was", "what"]
N_SENT = len(TOKENS)
N_WORDS = 100 
temp = 0.8

In [13]:
def predict(learn, text, n_words=1, temp=1., top1=False, min_p=None, sep=' ', decoder=decode_spec_tokens):
    '''
    Based on fastai implementation.
    For every word, gets the network activations, sets unknown token to 0,
    only considers tokens above a certain value, then either returns the token
    with the highest activation or samples from the distribution of activations.
    '''
    learn.model.reset()
    xb,yb = learn.data.one_item(text)
    new_idx = []
    for _ in range(n_words):
        res = learn.pred_batch(batch=(xb,yb))[0][-1]
        res[learn.data.vocab.stoi[UNK]] = 0.
        if min_p is not None: res[res < min_p] = 0.
        res.pow_(1 / temp)
        if top1: idx = torch.argmax(res).item() # greedy decoding
        else: idx = torch.multinomial(res, 1).item()
        new_idx.append(idx)
        xb = xb.new_tensor([idx])[None]
    return '[' + text + ']' + sep + sep.join(decoder(learn.data.vocab.textify(new_idx, sep=None)))
    
def beam_search(learn, text, n_words=1, top_k=10, beam_sz=1000, temp=1., sep=' ', decoder=decode_spec_tokens):
    '''
    Based on fastai implementation.
    Performs beam search
    '''
    learn.model.reset()
    learn.model.eval()
    xb, yb = learn.data.one_item(text)
    nodes = None
    nodes = xb.clone()
    scores = xb.new_zeros(1).float()
    with torch.no_grad():
        for k in progress_bar(range(n_words), leave=False):
            out = F.log_softmax(learn.model(xb)[0][:,-1], dim=-1)
            out[:,learn.data.vocab.stoi[UNK]] = -float('Inf')
            values, indices = out.topk(top_k, dim=-1)
            scores = (-values + scores[:,None]).view(-1)
            indices_idx = torch.arange(0,nodes.size(0))[:,None].expand(nodes.size(0), top_k).contiguous().view(-1)
            sort_idx = scores.argsort()[:beam_sz]
            scores = scores[sort_idx]
            nodes = torch.cat([nodes[:,None].expand(nodes.size(0),top_k,nodes.size(1)),
                            indices[:,:,None].expand(nodes.size(0),top_k,1),], dim=2)
            nodes = nodes.view(-1, nodes.size(2))[sort_idx]
            learn.model[0].select_hidden(indices_idx[sort_idx])
            xb = nodes[:,-1][:,None]
    #print(scores)
    scores -= scores.max()
    node_idx = torch.multinomial(torch.exp(-scores / temp), 1).item()
    #node_idx = torch.argmax(scores) # returns top candidate instead of multinomial
    return '[' + text + ']' + sep + sep.join(decoder(
        learn.data.vocab.textify([i.item() for i in nodes[node_idx][2:]], sep=None)))
    
def predict_topk(learn, text, n_words=1, k=5, temp=1., min_p=None, sep=' ', decoder=decode_spec_tokens):
    "Based on paper."
    "Return `text` and the `n_words` that come after"
    learn.model.reset()
    xb,yb = learn.data.one_item(text)
    new_idx = []
    for _ in range(n_words):
        outp = learn.pred_batch(batch=(xb,yb))[0][-1]
        outp[learn.data.vocab.stoi[UNK]] = 0.
        if min_p is not None: outp[outp < min_p] = 0.
        probs = F.softmax(outp / temp,dim=-1)
        vals,idxs = probs.topk(k, dim=-1)
        idx = idxs[torch.multinomial(vals, 1).item()]
        new_idx.append(idx)
        xb = xb.new_tensor([idx])[None]
    return '[' + text + ']' + sep + sep.join(decoder(learn.data.vocab.textify(new_idx, sep=None)))
    
def predict_nucleus(learn, text, n_words=1, p=0.5, temp=1., min_p=None, sep=' ', decoder=decode_spec_tokens):
    "Based on paper."
    "Return `text` and the `n_words` that come after"
    learn.model.reset()
    xb,yb = learn.data.one_item(text)
    new_idx = []
    for _ in range(n_words):
        outp = learn.pred_batch(batch=(xb,yb))[0][-1]
        outp[learn.data.vocab.stoi[UNK]] = 0.
        if min_p is not None: outp[outp < min_p] = 0.
        probs = F.softmax(outp / temp, dim=-1) 
        cumsum_prob = (probs.sort(descending=True)[0]).cumsum(0)
        k = (cumsum_prob > p).nonzero().view(-1)[0].int() + 1
        vals,idxs = probs.topk(k, dim=-1)
        idx = idxs[torch.multinomial(vals, 1).item()]
        new_idx.append(idx)
        xb = xb.new_tensor([idx])[None]
    return '[' + text + ']' + sep + sep.join(decoder(learn.data.vocab.textify(new_idx, sep=None)))

In [14]:
#greedy top-1 prediction
print("\n\n".join(str(i+1) + ". " + predict(learn, TOKENS[i], N_WORDS, temp, True)
                  for i in range(N_SENT)))

1. [xxbos] This is a very good movie . It is a very good movie . It is a very good movie . It is a very good movie . It is a very good movie . It is a very good movie . It is a movie that is not for everyone . It is a movie that is not for everyone . It is a movie that is not for everyone . It is a movie that is not for everyone . It is a movie that is not

2. [the] first time i saw this movie i was in the theater and i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it . i was so excited to see it .

3. [this] movie is a total waste of time . i have seen a lot of movies , but this one is so bad that it is funny . The acting is terrible , the plot is ridiculous , and the plot is so stupid . The only thing that really makes this movie worth watching is the

In [15]:
#greedy multinomial prediction
print("\n\n".join(str(i+1) + ". " + predict(learn, TOKENS[i], N_WORDS, temp, False)
                  for i in range(N_SENT)))

1. [xxbos] This film is a total waste of time . The acting is appalling ... a game on the Game People is a laugh - out - loud comedy . The acting is n't bad , but the story is simply awful . Where did this movie get made ? No , just the same , LIFELESS LIFE is the worst film i have ever seen . The only good thing about the movie is that it is actually filmed in Mexico , even though it is a plot to

2. [the] most classic BRITISH television series of our time . This is truly one of the best British television series we 've ever seen . i mean , that SHOWS World Wide Range and shows such strong women who come to Britain to live . And , once again , they are the only TV productions i know out there and the last TV series i 've seen . The sets are flawless , the costumes and scenery are wonderful and the story line is well written from the

3. [this] is a nice little film about the " Big Dark " of the Cold War . The film is far from that true . It should have been filmed with an set on some 

In [16]:
#beam-search prediction
print("\n\n".join(str(i+1) + ". " + beam_search(learn, TOKENS[i], N_WORDS, top_k=6, beam_sz=20, temp=temp)
                  for i in range(N_SENT)))

1. [xxbos] This is one of the worst movies i have ever seen . The acting is terrible , the plot is non - existent , and the acting is terrible . The only good thing i can say about this movie is that it is so bad it 's good . If you want a good laugh , watch this movie . Do n't waste your time . Do n't waste your time . Do n't waste your time . Do n't waste your time or money on this . xxbos This

2. [the] first time i saw this movie , i thought it was the worst movie i have ever seen in my life . The first time i saw it , i thought it was the worst movie i have ever seen in my life . The first time i saw it , i thought it was the worst movie i have ever seen in my life . The first time i saw this movie , i thought it was the worst movie i have ever seen . It was so bad , i had to watch it

3. [this] is one of the worst movies i have ever seen . the acting is terrible , the plot is ridiculous , and the acting is terrible . the only reason i gave it a 2 instead of a 1 instead of a 1 is 

In [17]:
#top-k prediction
print("\n\n".join(str(i+1) + ". " + predict_topk(learn, TOKENS[i], N_WORDS, 5, temp)
                  for i in range(N_SENT)))

1. [xxbos] i was lucky that i found a video store that bought it and got me into buying it because the movie was a complete bore . There was nothing good to say , the movie seemed like a bad one , but that is what the movies were supposed for , so i did not like the story , or at some point did the plot line in this movie , but the story was so good i did n't know how it got to go , it 's a very good story , and i thought it was

2. [the] film is very slow and slow and it is a bit of another mess . There is a scene where John ( a guy with his own hair ) gets a shot of his neck , he gets out . ( i have n't seen the DVD for over 5 years , and the video looks very nice but that is n't enough . 
 
  The acting by some is very good and it 's very hard not Michael Keaton to do the right part . 
 
  SPOILER ALERT : the first

3. [this] was the first time that a person would have said something about the film and i 'm glad that the people responsible for it are still there , because i do n't w

In [18]:
#top-p prediction
print("\n\n".join(str(i+1) + ". " + predict_nucleus(learn, TOKENS[i], N_WORDS, p=1e-4, temp=temp)
                  for i in range(N_SENT)))

1. [xxbos] a Christmas Eve has the potential of a cult movie but the story is very thin in the beginning . 
 
  In addition to that there is a good amount of gore and some nudity , and there was n't much gore in the movie and it does seem very tame in comparison . i have no complaints that i do enjoy watching John Travolta 's acting as the bad guys , i have no problem saying it was n't a total failure to watch him act like a bad ass , he was a

2. [the] film starts off slow but the story moves quickly , the plot has some nice ideas , it keeps your interest . i was expecting to find something new , and was very excited . it is n't very original , just a bunch that is just a bit boring . i do n't like the film for its story and characters . i would recommend the book , the story , etc to anyone who enjoys reading Lovecraft , especially the books . i would n't be surprised that they would make a better movie , but the

3. [this] show was the most awful show that i have watched since " Ame