# Preparation

An example as baseline: [ULMFit](https://nbviewer.jupyter.org/github/fastai/fastai/blob/master/examples/ULMFit.ipynb) tutorial.

> Fine-tuning a forward and backward langauge model to get to 95.4% accuracy on the IMDB movie reviews dataset. This tutorial is done with fastai v1.0.53.

> The example was run on a Titan RTX (24 GB of RAM) so you will probably need to adjust the batch size accordinly. If you divide it by 2, don't forget to divide the learning rate by 2 as well in the following cells. You can also reduce a little bit the bptt to gain a bit of memory.

In [None]:
# Ensure GPU spec; T4 is for colab and one can change it for another env.
gpu_list = !nvidia-smi -L
if gpu_list[0].startswith('NVIDIA-SMI has failed'):
  print('Runtime type should be GPU.')
elif not gpu_list[0].startswith('GPU 0: Tesla T4'):
  display(gpu_list)
  print('Please reset all runtimes. We need a Tesla T4 to reproduce the experiments!')
else:
  display(gpu_list)

## Dependency

### Install

In [None]:
# Ensure no surprises from conflict packages.
!pip check

In [None]:
!pip install -qU fastai==1.0.55 jupyter-console==5.2.0 coveralls coverage datascience albumentations
!pip check

### Import

In [None]:
from pathlib import Path
import random

import numpy as np
import torch
from google.colab import drive

from fastai import basic_train, basic_data, core
from fastai import *
from fastai.text import *
from fastprogress import fastprogress

### Init

In [None]:
# Not set earlier because pip may require a restart.
SESSN_START_T, = !date +%Y%m%dT%H%M

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Set a constant seed for every random number generator.
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True  # About 15% slower but...
torch.backends.cudnn.benchmark = False

In [None]:
# A special treatment for colab to decrease network traffic.
fastprogress.NO_BAR = True
master_bar, progress_bar = fastprogress.force_console_behavior()
basic_train.master_bar, basic_train.progress_bar = master_bar, progress_bar
basic_data.master_bar, basic_data.progress_bar = master_bar, progress_bar
dataclass.master_bar, dataclass.progress_bar = master_bar, progress_bar
text.master_bar, text.progress_bar = master_bar, progress_bar
text.data.master_bar, text.data.progress_bar = master_bar, progress_bar
core.master_bar, core.progress_bar = master_bar, progress_bar

In [None]:
GD_DIR_S = '/content/gdrive/'
drive.mount(GD_DIR_S, force_remount=True)

In [None]:
BASE_DIR_P = GD_DIR_S / Path('My Drive/imdb/')
BASE_DIR_P.mkdir(parents=True, exist_ok=True)
DATA_DIR_P = BASE_DIR_P / 'data/'
DATA_DIR_P.mkdir(parents=True, exist_ok=True)
MDLS_DIR_P = BASE_DIR_P / 'models/'
MDLS_DIR_P.mkdir(parents=True, exist_ok=True)
LOGS_DIR_P = BASE_DIR_P / 'logs/'
LOGS_DIR_P.mkdir(parents=True, exist_ok=True)

FASTAI_DATA_DIR_P = Path('/root/.fastai/data/')
FASTAI_DATA_DIR_P.mkdir(parents=True, exist_ok=True)
Path('/content/data').symlink_to(FASTAI_DATA_DIR_P)
FASTAI_MDLS_DIR_P = Path('/root/.torch/models/')
FASTAI_MDLS_DIR_P.mkdir(parents=True, exist_ok=True)
Path('/content/models').symlink_to(FASTAI_MDLS_DIR_P)

In [None]:
!set -x; rm -rf /content/sample_data/

# Train

## Assign Shared Hyperparams & Args

In [None]:
bs = 64
bptt = 80  # From the example, but fastai defaults to 70.
moms = (0.8,0.7)
wd = 0.1  # The example uses fastai default 1e-2.

In [None]:
# Set num_workers to main process since the training set will be shuffled.
n_dbnch_wrkrs = 0

In [None]:
FW_DBNCH_FILE_S = 'fw_dbnch.pkl'
FW_DBNCH_P = DATA_DIR_P / FW_DBNCH_FILE_S
FW_ENC_NAME = 'fw_enc'
FW_MDL_NAME = 'imdb_fw'

In [None]:
fw_log_file_p = BASE_DIR_P / f'logs/{SESSN_START_T}_history-fw.csv'

## Process Data Once

In [None]:
# Untar into colab disk so no latency to GDrive.
colab_dir_p = untar_data(URLs.IMDB, dest=FASTAI_DATA_DIR_P)
colab_dir_p.ls()

In [None]:
lm_dbnch = (TextList.from_folder(colab_dir_p)
            #Inputs: all the text files in path
            .filter_by_folder(include=['train', 'test', 'unsup'])
            #We may have other temp folders that contain text files so we only keep what's in train and test
            .split_by_rand_pct(
                0.1,
                seed=SEED  # Set the seed again since in theory one can call np.random before this.
            )
            #We randomly split and keep 10% (10,000 reviews) for validation
            .label_for_lm()
            #We want to do a language model so we label accordingly
            .databunch(bs=bs, bptt=bptt, num_workers=n_dbnch_wrkrs))

In [None]:
lm_dbnch.show_batch()

## Use Persistent Path

In [None]:
# Save and load the databunch using a non-voatile path (e.g.: GDrive).
lm_dbnch.save(FW_DBNCH_P)
fw_lm_dbnch = load_data(DATA_DIR_P, FW_DBNCH_FILE_S, bs=bs, bptt=bptt, num_workers=n_dbnch_wrkrs)

In [None]:
# The batch should look the same if the above efforts keep the reproducibility.
fw_lm_dbnch.show_batch()

In [None]:
# Backward counterparts are not implemented yet.

# bw_lm_dbnch = load_data(DATA_DIR_P, FW_DBNCH_FILE_S, bs=bs, bptt=bptt, num_workers=n_dbnch_wrkrs, backwards=True)
# bw_lm_dbnch.show_batch()

## Fit LM

In [None]:
def init_learner(dbnch, drop_mult):
  learner = language_model_learner(dbnch, AWD_LSTM, drop_mult=drop_mult)
  learner = learner.to_fp16(clip=0.1)  # 2x faster
  return learner

In [None]:
def fit_lm_1st_cycle(learner, lr, moms, wd, csv_logger):
  learner.fit_one_cycle(1, lr, moms=moms, wd=wd, callbacks=[csv_logger])
  learner.save('fit_head')
  display(learner.path.ls())
  return learner

In [None]:
def fit_lm_rest_cycles(learner, lr, moms, wd, csv_logger, n_cycles=10):
  learner.unfreeze()
  learner.fit_one_cycle(n_cycles, lr, moms=moms, wd=wd, callbacks=[csv_logger])
  return learner

### Assign LM-specific Hyperparams

In [None]:
# Decrease the lr from the example's 2e-2 proportionally to the orig bs=256.
orig_bs = 256
lm_lr = bs / orig_bs * 2e-2

# The example uses 1.0, probably because no pretrained models for it yet?
drop_mult = 0.3

### Fit Forward LM

In [None]:
fw_lm_learn = init_learner(fw_lm_dbnch, drop_mult)

In [None]:
# Not sure why partial didn't work, so initialize the logger here.
fw_lm_csv_logger = callbacks.CSVLogger(fw_lm_learn, fw_log_file_p, append=True)

In [None]:
fw_lm_learn = fit_lm_1st_cycle(fw_lm_learn, lm_lr, moms, wd, fw_lm_csv_logger)

In [None]:
fw_lm_learn = fit_lm_rest_cycles(fw_lm_learn, lm_lr/10, moms, wd, fw_lm_csv_logger, 1)

epoch     train_loss  valid_loss  accuracy  time    


In [None]:
# Redundant to fw_lm_dbnch.save()?
fw_lm_learn.save('tuned_fw_lm')

In [None]:
fw_lm_learn.save_encoder(FW_ENC_NAME)

In [None]:
fw_lm_dbnch.save(FW_MDL_NAME)

In [None]:
fw_lm_learn.csv_logger.read_logged_file()

In [None]:
fw_lm_learn.path.ls()

### Fit Forward Classifier

In [None]:
fw_cf_dbnch = (TextList.from_folder(colab_dir_p, vocab=fw_lm_dbnch.vocab)
               #grab all the text files in path
               .split_by_folder(valid='test')
               #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
               .label_from_folder(classes=['neg', 'pos'])
               #label them all with their folders
               .databunch(bs=bs, num_workers=n_dbnch_wrkrs))

In [None]:
fw_cf_learn = text_classifier_learner(fw_cf_dbnch, AWD_LSTM, drop_mult=0.5)
#learn.load_encoder('fine_tuned_enc')

In [None]:
# Not sure why this was 2e-2 for the classifier; the example uses 1e-1.
cf_lr = 1e-1

In [None]:
fw_cf_learn.fit_one_cycle(1, cf_lr, moms=moms)

In [None]:
fw_cf_learn.freeze_to(-2)
cf_lr /= 2
fw_cf_learn.fit_one_cycle(1, slice(cf_lr/(2.6**4),cf_lr), moms=moms)

In [None]:
fw_cf_learn.freeze_to(-3)
cf_lr /= 2
fw_cf_learn.fit_one_cycle(1, slice(cf_lr/(2.6**4),cf_lr), moms=moms)

In [None]:
fw_cf_learn.unfreeze()
cf_lr /= 5
fw_cf_learn.fit_one_cycle(2, slice(cf_lr/(2.6**4),cf_lr), moms=moms)

In [None]:
fw_cf_learn.save('fwd_clas')

# Incomplete?

In [None]:
# Not sure what was the purpose of these.

data_clas = TextDataBunch.load(path/'tmp_clas/','.')

# save np.load
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
#np.load = np_load_old

learn.load("../../models/clas");