In [1]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [2]:
#hide
from fastbook import *
from IPython.display import display,HTML
from fastai.text.all import *

In [3]:
path = Path("texts/")

In [None]:
files = get_text_files(path)

In [4]:
dls_lm = DataBlock(
    blocks=TextBlock.from_folder(path, is_lm=True),
    get_items=get_text_files, splitter=RandomSplitter(0.1)
).dataloaders(path, path=path, bs=128, seq_len=80)

In [None]:
dls_lm.show_batch(max_n=2)

### Fine-Tuning the Language Model

In [5]:
learn = language_model_learner(
    dls_lm, AWD_LSTM, drop_mult=0.3, 
    metrics=[accuracy, Perplexity()]).to_fp16()

In [None]:
learn.fit_one_cycle(1, 2e-2)

### Saving and Loading Models

In [None]:
learn.save('1epoch')

In [6]:
learn = learn.load('1epoch')

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, 2e-3)
learn.save_encoder('5-finetuned')

In [None]:
learn.fit_one_cycle(3, 2e-3)
learn.save_encoder('8-finetuned')

### Text Generation

In [7]:
learn = learn.load_encoder('5-finetuned')

In [8]:
TEXT = "The United States and the Kingdom of Thailand"
N_WORDS = 50
N_SENTENCES = 2
preds = [learn.predict(TEXT, N_WORDS, temperature=0.75) 
         for _ in range(N_SENTENCES)]
print("\n".join(preds))

The United States and the Kingdom of Thailand the Government of the Republic of Korea ( hereinafter referred to as “ korea ” ) and the Review and Review Committee established in accordance with Article mfn iii xa0restrictionsxa0restrictionsxa0restrictions ((( thethethe Free Trade Commission ) ,
The United States and the Kingdom of Thailand the Parties , the two countries to concern in the light of the economic situation of the physical and the removal of any difficulties in the process of economic development . To this end , Article ASSIGNED of GATT 1994 and its japan notes are


### Creating the Classifier DataLoaders

In [None]:
dls_clas = DataBlock(
    blocks=(TextBlock.from_folder(path, vocab=dls_lm.vocab),CategoryBlock),
    get_y = parent_label,
    get_items=partial(get_text_files, folders=['train', 'test']),
    splitter=GrandparentSplitter(valid_name='test')
).dataloaders(path, path=path, bs=128, seq_len=72)

In [None]:
dls_clas.show_batch(max_n=3)

In [None]:
nums_samp = toks200[:10].map(num)

In [None]:
nums_samp.map(len)

In [None]:
learn = text_classifier_learner(dls_clas, AWD_LSTM, drop_mult=0.5, 
                                metrics=accuracy).to_fp16()

In [None]:
learn = learn.load_encoder('finetuned')

### Fine-Tuning the Classifier

In [None]:
learn.fit_one_cycle(1, 2e-2)

In [None]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

In [None]:
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

In [None]:
learn.unfreeze()
learn.fit_one_cycle(2, slice(1e-3/(2.6**4),1e-3))