# Train Haiku Model

In [1]:
import sys

from fastai.text import *
import fastai
print(fastai.__version__)

sys.path.append('..')
from custom_callbacks import *

1.0.51


## Setting

In [38]:
path = Path('../data')

epochs_head = 1
lr_head = slice(1e-2)

epochs_unfreeze = 10
lr_unfreeze = slice(3e-3)

epochs_finetune = 40
lr_finetune = slice(1e-5)

fn = 'haikus2.csv'

# Prepare Data

In [3]:
# Download data (someone already scraped a dataset on github)
!wget https://raw.githubusercontent.com/bfaure/hAIku/master/data.tsv

--2019-04-13 12:08:35--  https://raw.githubusercontent.com/bfaure/hAIku/master/data.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26734459 (25M) [text/plain]
Saving to: ‘data.tsv’


2019-04-13 12:08:35 (272 MB/s) - ‘data.tsv’ saved [26734459/26734459]



In [4]:
haiku_list = open('data.tsv', 'r').readlines()
print(len(haiku_list))
haiku_list[:3]

351711


["Can't you see how much<br> better you make the world just<br>by being in it?\n",
 "I'm fine. I'm listening<br> to music and eating lunch.<br>You're still an asshole.\n",
 'Your cat has no more<br> metaphysical value<br>than a deer or cow.\n']

In [5]:
# Put into dataframe
df = pd.DataFrame({'text':haiku_list})
# df['text'] = 'style: haiku\ndescription: none\npoem:\n' + df['text']
df['text'] = df['text'].apply(lambda x: x.replace('<br> ', '\n').replace('<br>', '\n'))

In [6]:
print(df.text.iloc[0])

Can't you see how much
better you make the world just
by being in it?



In [8]:
df.text.apply(lambda x: len(x.split())).mean(), df.text.apply(lambda x: len(x.split())).max()

(12.942085973995695, 18)

In [10]:
df.to_csv(path/'haikus.csv', index=True)

In [11]:
!rm -f data.tsv  # Remove old file

In [29]:
len(df)

351711

In [30]:
df2 = df.sample(10000)

In [32]:
df2.to_csv(path/'haikus2.csv', index=True)

## Build Basic Language Model

In [49]:
data_lm = TextLMDataBunch.from_csv(path, 'haikus2.csv', bptt=32, 
                                   bs = 256,
                                   max_vocab=10000, valid_pct=0.02,
                                   include_eos=True, include_bos=True)

TypeError: intercept_args() got an unexpected keyword argument 'random_seed'

In [34]:
len(data_lm.train_ds), len(data_lm.valid_ds)

(9799, 201)

In [35]:
data_lm.show_batch(2)

idx,text
0,into wings some how . \n xxeos xxbos xxmaj xxunk \n is good but it comes with it 's \n own limitations . \n xxeos xxbos xxmaj until no one is \n
1,"\n off to them and they ca n't be \n taught any lessons . \n xxeos xxbos i am not ok . \n i do n't have a girlfriend , man ."


In [36]:
callback_fns=[SampleWriter, SaveModelCallback, CSVLogger]

In [37]:
learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5, pretrained=True,
                               callback_fns=callback_fns)

## Start Fitting

In [21]:
print(learn.predict('xxbos', n_words=35))

xxbos = Move To Tell It = 
  Become Better than anything that i Get Along is a volume of emotionally themed books which bought food and race


In [40]:
learn.fit_one_cycle(epochs_head, lr_head)

epoch,train_loss,valid_loss,accuracy,time
0,5.359911,4.385373,0.255371,00:06


Better model found at epoch 0 with val_loss value: 4.385372638702393.


In [41]:
learn.unfreeze()

In [43]:
learn.fit_one_cycle(epochs_unfreeze, lr_finetune)

epoch,train_loss,valid_loss,accuracy,time
0,4.692933,4.394634,0.256592,00:09
1,4.685663,4.369449,0.255493,00:09
2,4.677875,4.356232,0.256958,00:09
3,4.698989,4.383489,0.259399,00:09
4,4.680072,4.358307,0.259766,00:09
5,4.68508,4.343387,0.258789,00:09
6,4.664062,4.34886,0.261108,00:09


Better model found at epoch 0 with val_loss value: 4.394633769989014.
Better model found at epoch 1 with val_loss value: 4.369449138641357.
Better model found at epoch 2 with val_loss value: 4.356232166290283.
Better model found at epoch 5 with val_loss value: 4.343387126922607.


KeyboardInterrupt: 

In [None]:
learn.save('awd_first_phase')

In [46]:
learn.callback_fns.append(ReduceLROnPlateauCallback)

In [47]:
learn.fit(epochs_finetune, lr_finetune)

epoch,train_loss,valid_loss,accuracy,time
0,4.650712,4.337271,0.256958,00:09
1,4.648029,4.317322,0.260864,00:09


Better model found at epoch 0 with val_loss value: 4.337271213531494.
Better model found at epoch 1 with val_loss value: 4.31732177734375.


KeyboardInterrupt: 