In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.learner import *

import torchtext
from torchtext import vocab, data
from torchtext.datasets import language_modeling

from fastai.rnn_reg import *
from fastai.rnn_train import *
from fastai.nlp import *
from fastai.lm_rnn import *

import dill as pickle

In [3]:
PATH = 'data/aclImdb'

os.makedirs(f'{PATH}/train/all', exist_ok=True)
os.makedirs(f'{PATH}/test/all', exist_ok=True)
os.makedirs(f'{PATH}/models', exist_ok=True)
os.makedirs(f'{PATH}/tmp', exist_ok=True)

TRN_PATH = 'train/all'
VAL_PATH = 'test/all'

TRN = f'{PATH}/{TRN_PATH}'
VAL = f'{PATH}/{VAL_PATH}'

# !!cp -r {PATH}/train/pos/* {TRN}/
# !!cp -r {PATH}/train/neg/* {TRN}/
# !!cp -r {PATH}/train/unsup/* {TRN}/ # have to run this line in terminal for it to work!

# !!cp -r {PATH}/test/pos/* {VAL}/
# !!cp -r {PATH}/test/neg/* {VAL}/

%ls {PATH}

[0m[01;31maclImdb_v1.tar.gz[0m  imdbEr.txt  imdb.vocab  [01;34mmodels[0m/  README  [01;34mtest[0m/  [01;34mtmp[0m/  [01;34mtrain[0m/


In [4]:
# each review is stored as an individual text file
trn_files = !ls {TRN}

print(f'Total files in /train/all: {len(trn_files)}')
trn_files[:10]

Total files in /train/all: 25001


['0_3.txt',
 '0_9.txt',
 '10000_4.txt',
 '10000_8.txt',
 '10001_10.txt',
 '10001_4.txt',
 '10002_1.txt',
 '10002_7.txt',
 '10003_1.txt',
 '10003_8.txt']

In [5]:
# an example review
review = !cat {TRN}/{trn_files[6]}
review[0]

'Sorry everyone,,, I know this is supposed to be an "art" film,, but wow, they should have handed out guns at the screening so people could blow their brains out and not watch. Although the scene design and photographic direction was excellent, this story is too painful to watch. The absence of a sound track was brutal. The loooonnnnng shots were too long. How long can you watch two people just sitting there and talking? Especially when the dialogue is two people complaining. I really had a hard time just getting through this film. The performances were excellent, but how much of that dark, sombre, uninspired, stuff can you take? The only thing i liked was Maureen Stapleton and her red dress and dancing scene. Otherwise this was a ripoff of Bergman. And i\'m no fan f his either. I think anyone who says they enjoyed 1 1/2 hours of this is,, well, lying.'

In [6]:
# how many words in the dataset (train)
!find {TRN} -name '*.txt' | xargs cat | wc -w

17486581


In [7]:
# how many words in the dataset (val)
!find {VAL} -name '*.txt' | xargs cat | wc -w

5686719


In [8]:
# tokenize = split each sentence into a list of words
' '.join(spacy_tok(review[0]))

'Sorry everyone , , , I know this is supposed to be an " art " film , , but wow , they should have handed out guns at the screening so people could blow their brains out and not watch . Although the scene design and photographic direction was excellent , this story is too painful to watch . The absence of a sound track was brutal . The loooonnnnng shots were too long . How long can you watch two people just sitting there and talking ? Especially when the dialogue is two people complaining . I really had a hard time just getting through this film . The performances were excellent , but how much of that dark , sombre , uninspired , stuff can you take ? The only thing i liked was Maureen Stapleton and her red dress and dancing scene . Otherwise this was a ripoff of Bergman . And i \'m no fan f his either . I think anyone who says they enjoyed 1 1/2 hours of this is , , well , lying .'

In [9]:
# createa torchtext field = describes how to preprocess a piece of text
TEXT = data.Field(lower=True, tokenize=spacy_tok)

In [10]:
# create a ModelData object for language modeling
bs = 8 #64
bptt = 70

In [11]:
FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)

# min_freq = 10 says, "treat any word that appears less than 10 times as the word <unk>"
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=10)

> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/nlp.py[0m(310)[0;36mfrom_text_files[0;34m()[0m
[0;32m    308 [0;31m                                    path, text_field=field, train=train, validation=validation, test=test)
[0m[0;32m    309 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 310 [0;31m        [0;32mreturn[0m [0mcls[0m[0;34m([0m[0mpath[0m[0;34m,[0m [0mfield[0m[0;34m,[0m [0mtrn_ds[0m[0;34m,[0m [0mval_ds[0m[0;34m,[0m [0mtest_ds[0m[0;34m,[0m [0mbs[0m[0;34m,[0m [0mbptt[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    311 [0;31m[0;34m[0m[0m
[0m[0;32m    312 [0;31m[0;34m[0m[0m
[0m
ipdb> c


In [12]:
# after building the ModelData object, TEXT.vocab is set.  because this will be needed again, save it
pickle.dump(TEXT, open(f'{PATH}/models/TEXT.pkl', 'wb'))

In [13]:
# batches
# of unique tokens in vocab
# of items in training set (as LanguageModel is concerned, there is only one thing, the whole corpus)
# of words
len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

(12257, 20270, 1, 6864658)

In [14]:
# int to string mapping
TEXT.vocab.itos[:12]

['<unk>', '<pad>', 'the', ',', '.', 'and', 'a', 'of', 'to', 'is', 'it', 'in']

In [15]:
# string to int mapping
TEXT.vocab.stoi['the']

2

In [16]:
# in a LanguageModelData object there is only one item in each dataset: all the words joined together
md.trn_ds[0].text[:12]

['one',
 'of',
 'my',
 'favorite',
 'movies',
 ',',
 'with',
 'a',
 'very',
 'nostalgic',
 'ending',
 '.']

In [17]:
# torchtext will handle turning this words into integer Ids
TEXT.numericalize([md.trn_ds[0].text[:12]])

Variable containing:
   37
    7
   72
  519
  114
    3
   22
    6
   66
 4451
  288
    4
[torch.cuda.LongTensor of size 12x1 (GPU 0)]

In [18]:
batch = next(iter(md.trn_dl))
print(batch[0].size()), print(batch[1].size())

batch

torch.Size([81, 8])
torch.Size([648])


(Variable containing:
     37    690     74     28  17528   3332    547    585
      7      4     10     32     21     17   1284      4
     72     38     16    439   9002      0     23     18
    519     13   5140   2935      3    230      5     12
    114     19     14      8    232     66    109    252
      3     44     34     79     20  13537     10    112
     22    439    857      6     58     29    233   5751
      6    404  12157  11570     28     59      5      8
     66     23     96      7     32     22   2728   1739
   4451      4     36   1789    277    104      2      4
    288     53    511   1512     41    125    183     12
      4     30      8      4  11081      2   2435   3447
      2     36    350     20     16   1049  12952    163
     23  10727   1690     12  13552     17      5    478
      9     11      0    315  15688    555   2980    134
     54     43      4     41   2720      5     20      2
      2   1362     10      2      7      2     13     23
   3049  

In [19]:
emb_sz = 200       # size of each embedding vector
nh = 500           # of hidden activations per layer
nl = 3             # of layers

In [20]:
# for NLP, configure Adam to use less momentum than the defaul of 0.9
opt_fn = partial(optim.Adam, betas=(0.7, 0.99))

In [21]:
learner = md.get_model(opt_fn, emb_sz, nh, nl,
                      dropouti=0.24, dropout=0.025, wdrop=0.05, dropoute=0.01, dropouth=0.025)

learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
learner.clip = 0.3

--Return--
None
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(72)[0;36m__init__[0;34m()[0m
[0;32m     70 [0;31m        [0mself[0m[0;34m.[0m[0mdropouti[0m [0;34m=[0m [0mLockedDropout[0m[0;34m([0m[0mdropouti[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     71 [0;31m        [0mself[0m[0;34m.[0m[0mdropouths[0m [0;34m=[0m [0mnn[0m[0;34m.[0m[0mModuleList[0m[0;34m([0m[0;34m[[0m[0mLockedDropout[0m[0;34m([0m[0mdropouth[0m[0;34m)[0m [0;32mfor[0m [0ml[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnlayers[0m[0;34m)[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m---> 72 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     73 [0;31m[0;34m[0m[0m
[0m[0;32m     74 [0;31m    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0minput[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> c
--Return--
None
> [0;32m/home/wgilli

In [None]:
lrf = learner.lr_find() # took about 20 mins on AWS

In [None]:
learner.sched.plot()

In [22]:
learner.fit(3e-3, 2, wds=1e-6, cycle_len=1, cycle_mult=2) # took about 

epoch      trn_loss   val_loss                                  
    0      5.173734   5.053396  
    1      4.950676   4.841609                                  
    2      4.8607     4.77143                                   



[4.7714305]

In [23]:
learner.save_encoder('imdb_adam1_enc')
# learner.load_encoder('imdb_adam1_enc')

In [None]:
# learner.fit(3e-3, 4, wds=1e-6, cycle_len=10, cycle_save_name='imdb_adam2_4_10')
learner.fit(3e-3, 2, wds=1e-6, cycle_len=10, cycle_save_name='imdb_adam2_c2_cl10')

In [None]:
learner.save_encoder('imdb_adam2_enc')
# learner.load_encoder('imdb_adam2_enc')

In [None]:
# learner.fit(3e-3, 1, wds=1e-6, cycle_len=20, cycle_save_name='imdb_adam2_1_20')
learner.fit(3e-4, 1, wds=1e-6, cycle_len=10, cycle_save_name='imdb_adam3_c1_cl10')

In [None]:
learner.save_encoder('imdb_adam3_enc')

In [None]:
learner.load_cycle('imdb_adam2_c2_cl10', 1)

In [None]:
# metric perplexity (how language model accuracy generally measured) = exp() of loss functino
np.exp(4.21699)

## Test

In [24]:
# create a short bit of text to "prime" the precitions, then use torchtext to numericalize it
# so we can feed it into our language model
m = learner.model
ss = """. So, it wasn't quite what I was expecting, but I really liked it anways! The best"""
ss = """. I couldn't believe this movie was so scary, but I loved it. The best part"""
s = [spacy_tok(ss)]
t = TEXT.numericalize(s)
' '.join(s[0])

". I could n't believe this movie was so scary , but I loved it . The best part"

In [41]:
m[0].bs = 1      # set batch size = 1
m.eval()         # turn-off dropout
m.reset()        # reset hidden state
res, *_ = m(t)   # get predictions from model
m[0].bs = bs     # put batch size back to what it was

In [42]:
res[-1].size()   # the prediction based on the full sentence; the last prediction
len(res)         # the number of words in "t"

19

In [43]:
# top 10 predictions for next word
nexts = torch.topk(res[-1], 10)[1]           # return the 10 indexes of the top 10 predictions
[TEXT.vocab.itos[o] for o in to_np(nexts)]

['of', 'is', 'was', ',', '.', ':', 'in', 'about', 'for', 'i']

In [44]:
# try to generate more text
print(ss, "\n")

for i in range(50):
    n = res[-1].topk(2)[1]
    n = n[1] if n.data[0] == 0 else n[0]
    print(TEXT.vocab.itos[n.data[0]], end=' ')
    res, *_ = m(n[0].unsqueeze(0))
    
print('...')

. I couldn't believe this movie was so scary, but I loved it. The best part 

of the film , and the film is a bit too long . 

 the film is a bit of a good movie , but it is a very good movie . <eos> i have seen this movie on dvd and i was expecting a lot of fun . i ...


## Sentiment

In [45]:
bs = 8 #64
bptt = 70

emb_sz = 200       # size of each embedding vector
nh = 500           # of hidden activations per layer
nl = 3             # of layers

# for NLP, configure Adam to use less momentum than the defaul of 0.9
opt_fn = partial(optim.Adam, betas=(0.7, 0.99))

In [46]:
# use the same vocab built from the language model so as to ensure words map to same Ids
TEXT = pickle.load(open(f'{PATH}/models/TEXT.pkl', 'rb'))

In [47]:
IMDB_LABEL = data.Field(sequential=False)
splits = torchtext.datasets.IMDB.splits(TEXT, IMDB_LABEL, 'data/')

In [48]:
t = splits[0].examples[0]

In [49]:
t.label, ' '.join(t.text[:10])

('pos', 'one of my favorite movies , with a very nostalgic')

In [50]:
# fastai can create a ModelData object directly from torchtext splits
md2 = TextData.from_splits(PATH, splits, bs)

> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/nlp.py[0m(355)[0;36mfrom_splits[0;34m()[0m
[0;32m    353 [0;31m                 else len(getattr(splits[0][0], label_name)))
[0m[0;32m    354 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 355 [0;31m        [0;32mreturn[0m [0mobj[0m[0;34m[0m[0m
[0m[0;32m    356 [0;31m[0;34m[0m[0m
[0m[0;32m    357 [0;31m    [0;32mdef[0m [0mto_model[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mm[0m[0;34m,[0m [0mopt_fn[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> obj.bs
8
ipdb> objs.pad_idx
*** NameError: name 'objs' is not defined
ipdb> obj.pad_idx
1
ipdb> obj.nt
20270
ipdb> obj.c
3
ipdb> c


In [51]:
m3 = md2.get_model(opt_fn, 1500, bptt, emb_sz=emb_sz, n_hid=nh, n_layers=nl,
                      dropout=0.1, dropouti=0.4, wdrop=0.5, dropoute=0.05, dropouth=0.3)

m3.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)
m3.load_encoder(f'imdb_adam1_enc')

--Return--
None
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(72)[0;36m__init__[0;34m()[0m
[0;32m     70 [0;31m        [0mself[0m[0;34m.[0m[0mdropouti[0m [0;34m=[0m [0mLockedDropout[0m[0;34m([0m[0mdropouti[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     71 [0;31m        [0mself[0m[0;34m.[0m[0mdropouths[0m [0;34m=[0m [0mnn[0m[0;34m.[0m[0mModuleList[0m[0;34m([0m[0;34m[[0m[0mLockedDropout[0m[0;34m([0m[0mdropouth[0m[0;34m)[0m [0;32mfor[0m [0ml[0m [0;32min[0m [0mrange[0m[0;34m([0m[0mnlayers[0m[0;34m)[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m---> 72 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     73 [0;31m[0;34m[0m[0m
[0m[0;32m     74 [0;31m    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0minput[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> c
--Return--
None
> [0;32m/home/wgilli

In [52]:
m3 # contains two models, the first is an instance of RNN_Encoder

SequentialRNN(
  (0): MultiBatchRNN(
    (encoder): Embedding(20270, 200, padding_idx=1)
    (encoder_with_dropout): EmbeddingDropout(
      (embed): Embedding(20270, 200, padding_idx=1)
    )
    (rnns): ModuleList(
      (0): WeightDrop(
        (module): LSTM(200, 500, dropout=0.3)
      )
      (1): WeightDrop(
        (module): LSTM(500, 500, dropout=0.3)
      )
      (2): WeightDrop(
        (module): LSTM(500, 200, dropout=0.3)
      )
    )
    (dropouti): LockedDropout(
    )
    (dropouths): ModuleList(
      (0): LockedDropout(
      )
      (1): LockedDropout(
      )
      (2): LockedDropout(
      )
    )
  )
  (1): PoolingLinearClassifier(
    (layers): ModuleList(
      (0): LinearBlock(
        (lin): Linear(in_features=600, out_features=3)
        (drop): Dropout(p=0.1)
        (bn): BatchNorm1d(600, eps=1e-05, momentum=0.1, affine=True)
      )
    )
  )
)

In [53]:
m3.clip = 25.
lrs = np.array([1e-4, 1e-3, 1e-2])

In [54]:
m3.freeze_to(-1) # freeze everything except last layer
m3.fit(lrs/2, 1, metrics=[accuracy])

  0%|          | 0/3124 [00:00<?, ?it/s]> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(135)[0;36mforward[0;34m()[0m
[0;32m    133 [0;31m                [0moutputs[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    134 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 135 [0;31m        [0;32mreturn[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0mraw_outputs[0m[0;34m)[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0moutputs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    136 [0;31m[0;34m[0m[0m
[0m[0;32m    137 [0;31m[0;32mclass[0m [0mLinearDecoder[0m[0;34m([0m[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> input.size()
torch.Size([189, 8])
ipdb> self.bptt
70
ipdb> 70*3
210
ipdb> c
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-project

ipdb> input.size()
*** AttributeError: 'tuple' object has no attribute 'size'
ipdb> len(input)
2
ipdb> input[0]
[Variable containing:
( 0 ,.,.) = 
  3.0482e-01  1.4634e-02  8.2850e-08  ...  -2.9924e-03 -2.5956e-02 -1.6865e-02
  6.2726e-01 -2.3886e-02  4.5073e-05  ...  -1.3863e-03 -6.5185e-03 -2.8018e-02
  1.6438e-03 -4.1667e-01  5.9774e-02  ...  -1.5805e-01 -1.0211e-02 -3.7333e-02
                 ...                   ⋱                   ...                
  7.2423e-01  1.3097e-01 -2.1679e-06  ...  -2.1566e-04 -2.4592e-02 -3.8522e-02
 -1.4999e-01 -8.2773e-07  2.9512e-06  ...  -1.4369e-05 -4.7184e-02 -5.1053e-02
  7.4440e-01  2.6489e-01 -9.4652e-04  ...  -2.3397e-04  1.7172e-02 -3.2194e-02

( 1 ,.,.) = 
  8.5194e-04  4.0759e-02  3.5672e-02  ...  -7.4968e-02 -1.8334e-03 -3.2396e-02
  1.8823e-01  6.3061e-05  2.3715e-02  ...  -1.6540e-05 -2.6546e-03 -3.2292e-02
  4.0109e-03 -9.7999e-03  1.8703e-02  ...  -9.9771e-07  7.9307e-03 -4.2351e-02
                 ...                   ⋱         

ipdb> input[0].size(), input[1].size()
*** AttributeError: 'list' object has no attribute 'size'
ipdb> input[0]
[Variable containing:
( 0 ,.,.) = 
  3.0482e-01  1.4634e-02  8.2850e-08  ...  -2.9924e-03 -2.5956e-02 -1.6865e-02
  6.2726e-01 -2.3886e-02  4.5073e-05  ...  -1.3863e-03 -6.5185e-03 -2.8018e-02
  1.6438e-03 -4.1667e-01  5.9774e-02  ...  -1.5805e-01 -1.0211e-02 -3.7333e-02
                 ...                   ⋱                   ...                
  7.2423e-01  1.3097e-01 -2.1679e-06  ...  -2.1566e-04 -2.4592e-02 -3.8522e-02
 -1.4999e-01 -8.2773e-07  2.9512e-06  ...  -1.4369e-05 -4.7184e-02 -5.1053e-02
  7.4440e-01  2.6489e-01 -9.4652e-04  ...  -2.3397e-04  1.7172e-02 -3.2194e-02

( 1 ,.,.) = 
  8.5194e-04  4.0759e-02  3.5672e-02  ...  -7.4968e-02 -1.8334e-03 -3.2396e-02
  1.8823e-01  6.3061e-05  2.3715e-02  ...  -1.6540e-05 -2.6546e-03 -3.2292e-02
  4.0109e-03 -9.7999e-03  1.8703e-02  ...  -9.9771e-07  7.9307e-03 -4.2351e-02
                 ...                   ⋱         

ipdb> outputs[-1]
Variable containing:
( 0 ,.,.) = 
  1.3501e-01 -1.9047e-02  1.6263e-01  ...  -7.3689e-02 -1.1781e-03  5.7154e-03
 -2.8141e-02 -3.8059e-02 -2.1128e-01  ...   2.4266e-02 -8.0584e-04  3.7372e-03
  3.5554e-02  1.7791e-01 -1.8791e-02  ...  -1.8143e-01  2.4491e-02  5.6532e-02
                 ...                   ⋱                   ...                
 -1.4464e-01  1.6767e-02  2.3936e-01  ...  -3.5129e-02 -2.6848e-02  1.2881e-02
  5.9751e-03  3.5120e-02 -7.0760e-03  ...  -2.4877e-02 -5.9188e-02  4.2381e-02
 -1.0946e-01  1.7107e-02  3.2354e-01  ...  -3.8421e-03 -8.4416e-03  3.1667e-02

( 1 ,.,.) = 
  1.6591e-01  4.5821e-04 -1.4173e-02  ...  -8.2288e-02 -5.8149e-02  3.5861e-02
 -2.3416e-02  1.4861e-01 -1.6804e-01  ...  -8.3639e-02  6.1359e-03  1.4606e-02
  1.7186e-01  6.6252e-02 -3.0466e-01  ...  -1.3552e-01 -5.0332e-03  6.1522e-03
                 ...                   ⋱                   ...                
 -6.9175e-02  2.7616e-01 -2.2785e-02  ...  -1.5282e-01  6.3021e-0

ipdb> c
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(189)[0;36mforward[0;34m()[0m
[0;32m    187 [0;31m            [0mx[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mrelu[0m[0;34m([0m[0ml_x[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    188 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 189 [0;31m        [0;32mreturn[0m [0ml_x[0m[0;34m,[0m [0mraw_outputs[0m[0;34m,[0m [0moutputs[0m[0;34m[0m[0m
[0m[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m    191 [0;31m[0;34m[0m[0m
[0m
ipdb> outputs
[Variable containing:
( 0 ,.,.) = 
  0.0000  0.1154 -0.0000  ...  -0.0002 -0.0351 -0.0380
  0.0000  0.6630  0.0000  ...  -0.0000 -0.0132 -0.0810
  0.0592 -0.0044 -0.0000  ...  -0.0240 -0.0167 -0.0000
           ...             ⋱             ...          
  1.0679  0.1805 -0.0000  ...  -0.0000 -0.0054 -0.0000
  0.0000  0.2372  0.0000  ...  -0.0000 -0.0188 -0.0000
  0.0000 

ipdb> c
  0%|          | 3/3124 [10:21<179:34:27, 207.13s/it, loss=1.13]> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(135)[0;36mforward[0;34m()[0m
[0;32m    133 [0;31m                [0moutputs[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    134 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 135 [0;31m        [0;32mreturn[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0mraw_outputs[0m[0;34m)[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0moutputs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    136 [0;31m[0;34m[0m[0m
[0m[0;32m    137 [0;31m[0;32mclass[0m [0mLinearDecoder[0m[0;34m([0m[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> outputs
[[Variable containing:
( 0 ,.,.) = 
  0.0000  1.0279  1.0879  ...  -0.0000 -0.0000  0.0068
  1.0755  0.1046 -0.00

ipdb> c
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(189)[0;36mforward[0;34m()[0m
[0;32m    187 [0;31m            [0mx[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mrelu[0m[0;34m([0m[0ml_x[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    188 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 189 [0;31m        [0;32mreturn[0m [0ml_x[0m[0;34m,[0m [0mraw_outputs[0m[0;34m,[0m [0moutputs[0m[0;34m[0m[0m
[0m[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m    191 [0;31m[0;34m[0m[0m
[0m
ipdb> outputs
[Variable containing:
( 0 ,.,.) = 
  0.0000  1.0279  1.0879  ...  -0.0000 -0.0000  0.0068
  1.0755  0.1046 -0.0000  ...  -0.0004 -0.0045 -0.0518
  0.0000 -0.1226 -0.0000  ...  -0.2664  0.0000 -0.0004
           ...             ⋱             ...          
  0.3793 -0.0160 -0.0000  ...  -0.0423  0.0143 -0.0488
  0.4796 -0.0000  0.0003  ...  -0.0000 -0.0349 -0.0404
  0.0000 

ipdb> c
  0%|          | 4/3124 [10:48<140:33:01, 162.17s/it, loss=1.11]> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(135)[0;36mforward[0;34m()[0m
[0;32m    133 [0;31m                [0moutputs[0m[0;34m.[0m[0mappend[0m[0;34m([0m[0mo[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    134 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 135 [0;31m        [0;32mreturn[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0mraw_outputs[0m[0;34m)[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0moutputs[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    136 [0;31m[0;34m[0m[0m
[0m[0;32m    137 [0;31m[0;32mclass[0m [0mLinearDecoder[0m[0;34m([0m[0mnn[0m[0;34m.[0m[0mModule[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m
ipdb> c
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(189)[0

ipdb> outputs[-1]
[Variable containing:
( 0 ,.,.) = 
  0.1249  0.0000  0.2455  ...  -0.0927 -0.0593  0.1775
  0.0143  0.0001  0.0000  ...  -0.0277 -0.0340  0.0000
  0.0120 -0.0011  0.2021  ...  -0.0169 -0.0000  0.0656
           ...             ⋱             ...          
  0.0003 -0.0000  0.0917  ...  -0.0000  0.0207 -0.0000
  0.8671  0.0000 -0.4425  ...  -0.0000 -0.0000  0.0081
  0.0002  0.0000  0.9481  ...  -0.0001 -0.0532 -0.0184

( 1 ,.,.) = 
  0.0887  0.0000  0.2375  ...  -0.1685 -0.0348  0.1430
  0.0182 -0.0020  0.0000  ...  -0.0344 -0.0296  0.0000
  0.0131 -0.0018  0.2130  ...  -0.0454 -0.0000  0.0941
           ...             ⋱             ...          
  0.0051  0.0000  0.0000  ...  -0.0000  0.0030 -0.0000
  0.0005  0.0000  0.1727  ...  -0.0000 -0.0000 -0.0820
  0.0058  0.0000 -0.0454  ...  -0.0000 -0.0018 -0.0587

( 2 ,.,.) = 
  0.0642  0.0000  0.1595  ...  -0.1557 -0.0248  0.1196
  0.0178 -0.0019  0.0000  ...  -0.0421 -0.0234  0.0000
  0.0124 -0.0027  0.1748  ...  -0.0395 

ipdb> outputs[-1]
[Variable containing:
( 0 ,.,.) = 
  0.1249  0.0000  0.2455  ...  -0.0927 -0.0593  0.1775
  0.0143  0.0001  0.0000  ...  -0.0277 -0.0340  0.0000
  0.0120 -0.0011  0.2021  ...  -0.0169 -0.0000  0.0656
           ...             ⋱             ...          
  0.0003 -0.0000  0.0917  ...  -0.0000  0.0207 -0.0000
  0.8671  0.0000 -0.4425  ...  -0.0000 -0.0000  0.0081
  0.0002  0.0000  0.9481  ...  -0.0001 -0.0532 -0.0184

( 1 ,.,.) = 
  0.0887  0.0000  0.2375  ...  -0.1685 -0.0348  0.1430
  0.0182 -0.0020  0.0000  ...  -0.0344 -0.0296  0.0000
  0.0131 -0.0018  0.2130  ...  -0.0454 -0.0000  0.0941
           ...             ⋱             ...          
  0.0051  0.0000  0.0000  ...  -0.0000  0.0030 -0.0000
  0.0005  0.0000  0.1727  ...  -0.0000 -0.0000 -0.0820
  0.0058  0.0000 -0.0454  ...  -0.0000 -0.0018 -0.0587

( 2 ,.,.) = 
  0.0642  0.0000  0.1595  ...  -0.1557 -0.0248  0.1196
  0.0178 -0.0019  0.0000  ...  -0.0421 -0.0234  0.0000
  0.0124 -0.0027  0.1748  ...  -0.0395 

ipdb> c
> [0;32m/home/wgilliam/development/_training/ml/fastai-course/fastai-projects/part1v2/dl1/fastai/lm_rnn.py[0m(189)[0;36mforward[0;34m()[0m
[0;32m    187 [0;31m            [0mx[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mrelu[0m[0;34m([0m[0ml_x[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m    188 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m--> 189 [0;31m        [0;32mreturn[0m [0ml_x[0m[0;34m,[0m [0mraw_outputs[0m[0;34m,[0m [0moutputs[0m[0;34m[0m[0m
[0m[0;32m    190 [0;31m[0;34m[0m[0m
[0m[0;32m    191 [0;31m[0;34m[0m[0m
[0m
ipdb> outputs[-1]
Variable containing:
( 0  ,.,.) = 
 -5.1802e-03  1.3690e-02  4.2789e-02  ...  -5.7836e-02 -6.7825e-02  1.4687e-02
 -5.7780e-02  6.6521e-03  3.7108e-01  ...  -2.3029e-02 -2.8528e-02  6.2480e-02
 -3.3851e-02  3.3785e-02 -1.4701e-01  ...   3.7199e-02 -5.2373e-03  1.9855e-02
                 ...                   ⋱                   ...                
  1.3672e-02 -3.727

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1512387374934/work/torch/lib/THC/generic/THCStorage.cu:58

In [None]:
m3.unfreeze()
m3.fit(lrs, 1, metrics=[accuracy], cycle_len=1)

In [None]:
m3.fit(lrs, 7, metrics=[accuracy], cycle_len=2, cycle_save_name='imdb_sent1_c7_cl2')

In [None]:
m3.load_cycle('imdb_sent1_c7_cl2', 6)

In [None]:
preds, y = m3.predict_with_targs()

In [None]:
preds[:10], y[:10]

In [None]:
np.exp(preds[0])