In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import pandas as pd
import pickle

import matplotlib.cm as cm

from fastai import *
from fastai.text import *
from fastai.callbacks import *

from pathlib import Path

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix, classification_report, hamming_loss, zero_one_loss, accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
seed = 42

# python RNG
import random
random.seed(seed)

# pytorch RNGs
import torch
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

# numpy RNG
import numpy as np
np.random.seed(seed)

In [None]:
!python -m fastai.utils.show_install

In [None]:
bs=32

## Loading Data

In [None]:
path = Path(".")

In [None]:
df = pd.read_csv(path/"data/clean/train.csv")

In [None]:
df

In [None]:
df_train = df[df["is_valid"] == False]; df_train

In [None]:
df_test = df[df["is_valid"] == True]; df_test

In [None]:
df_lm = pd.read_csv(path/'data/clean/unsup/unsup.csv'); df_lm

In [None]:
X, y = df_train["text"].to_list(), df_train["label"].to_list()

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
df_train = pd.DataFrame()
df_valid = pd.DataFrame()

In [None]:
df_train["label"], df_train["text"] = y_train, X_train

In [None]:
df_valid["label"], df_valid["text"] = y_valid, X_valid

In [None]:
df_train

In [None]:
df_valid

In [None]:
df_train.label.value_counts(), df_valid.label.value_counts()

In [None]:
df_train["is_valid"] = False
df_valid["is_valid"] = True

In [None]:
df_train_val = pd.concat([df_train, df_valid])

In [None]:
df_train_val.to_csv(path/"data/clean/train_val.csv", index=False)

In [None]:
df_train_val

In [None]:
tknzer = path/'models'
(tknzer/'tmp').ls()

In [None]:
!pip install sentencepiece

In [None]:
data_lm = (TextList.from_df(df_lm,cols='text', processor=SPProcessor.load(tknzer))
                           .split_by_rand_pct(0.2, seed=seed)
                           .label_for_lm()
                           .databunch(bs=bs))

In [None]:
data_lm.show_batch()

In [None]:
data_lm.save('data/data_lm_export.pkl')

In [None]:
config = awd_lstm_lm_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
perplexity = Perplexity()
f1 = FBeta(beta=1, average="weighted")

In [None]:
lm_fns3 = ['pt_wt_sp15_multifit', 'pt_wt_vocab_sp15_multifit']
lm_fns3_bwd = ['pt_wt_sp15_multifit_bwd', 'pt_wt_vocab_sp15_multifit_bwd']

## Fine-tune forward LM

In [None]:
learn_lm = language_model_learner(data_lm, AWD_LSTM, path=path, config=config, pretrained_fnames=lm_fns3, drop_mult=1., 
                                  metrics=[error_rate, accuracy, perplexity])

In [None]:
learn_lm.save_encoder("no_fine_tune_enc")

In [None]:
learn_lm.predict("O Governo", n_words=20)

In [None]:
learn_lm.freeze()
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr = 1e-1

In [None]:
learn_lm.fit_one_cycle(2, lr, wd=0.1, moms=(0.8,0.7))

In [None]:
learn_lm.save('fine_tune_lm')
learn_lm.save_encoder('fine_tune_enc')

In [None]:
learn_lm.unfreeze()
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr = 1e-2

In [None]:
learn_lm.fit_one_cycle(10, lr, wd=0.1, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)])

In [None]:
learn_lm.recorder.plot_lr()

In [None]:
learn_lm.predict("O Governo", n_words=30)

In [None]:
learn_lm.save('fine_tune_lm')
learn_lm.save_encoder('fine_tune_enc')

## Fine-tune backward LM

In [None]:
data_lm = (TextList.from_df(df_lm,cols='text', processor=SPProcessor.load(tknzer))
                           .split_by_rand_pct(0.2, seed=seed)
                           .label_for_lm()
                           .databunch(bs=bs, backwards=True))

In [None]:
data_lm.show_batch()

In [None]:
data_lm.save('./data/data_lm_back.pkl')

In [None]:
learn_lm = language_model_learner(data_lm, AWD_LSTM, path=path, config=config, pretrained_fnames=lm_fns3_bwd, drop_mult=1., 
                                  metrics=[error_rate, accuracy, perplexity])

In [None]:
learn_lm.save_encoder("no_fine_tune_enc_bwd")

In [None]:
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr = 1e-1

In [None]:
learn_lm.fit_one_cycle(2, lr, wd=0.1, moms=(0.8,0.7))

In [None]:
learn_lm.save('fine_tune_lm_bwd')
learn_lm.save_encoder('fine_tune_enc_bwd')

In [None]:
learn_lm.unfreeze()
learn_lm.lr_find()

In [None]:
learn_lm.recorder.plot()

In [None]:
lr=1e-2

In [None]:
learn_lm.fit_one_cycle(10, lr, wd=0.1, moms=(0.8,0.7), callbacks=[ShowGraph(learn_lm)])

In [None]:
learn_lm.save('fine_tune_lm_bwd')
learn_lm.save_encoder('fine_tune_enc_bwd')

## Train forward classifier

In [None]:
bs=8

In [None]:
data_lm = load_data("data/", "data_lm_export.pkl", bs=bs)

In [None]:
data_clas = (TextList.from_df(df_train_val, path, cols='text',
                              processor=SPProcessor.load(tknzer))
                         .split_from_df(col=2)
                         .label_from_df(cols=0)
                         .databunch(bs=bs))

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
len(data_clas.vocab.itos), len(data_lm.vocab.itos)

In [None]:
data_clas.save(path/'data/data_clas_export.pkl')

In [None]:
data_clas.show_batch()

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(10, lr, wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd")

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(10, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd")

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(10, slice(lr/2/(2.6**4),lr/2), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd")

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(10, slice(lr/10/(2.6**4),lr/10), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd")

## Evaluate Forward CLF

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = (TextList.from_df(df_test, path, cols='text',
                              processor=SPProcessor.load(tknzer))
                         .split_none()
                         .label_from_df(cols=0)
                         .databunch(bs=bs))

In [None]:
data_test.show_batch()

In [None]:
data_test.c, data_clas.c

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
data_test.save(path/"data/test_data.pkl")

In [None]:
learn_c.path = path
learn_c.load(path/"clf_fwd");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
learn_c.show_results()

In [None]:
txt_ci = TextClassificationInterpretation.from_learner(learn_c)

In [None]:
txt_ci.show_top_losses(5)

## Train backwards classifier

In [None]:
bs=8

In [None]:
data_lm = load_data("data/", "data_lm_back.pkl", bs=bs, backwards=True)

In [None]:
data_lm.show_batch()

In [None]:
data_clas = (TextList.from_df(df_train_val, path, cols='text',
                              processor=SPProcessor.load(tknzer))
                         .split_from_df(col=2)
                         .label_from_df(cols=0)
                         .databunch(bs=bs, backwards=True))

In [None]:
len(data_clas.vocab.itos), len(data_lm.vocab.itos)

In [None]:
data_clas.save(path/'data/data_clas_bwd.pkl')

In [None]:
data_clas.show_batch()

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.load_encoder("fine_tune_enc_bwd");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=3e-2

In [None]:
learn_c.fit_one_cycle(10, lr, wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_bwd")

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(10, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_bwd")

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(10, slice(lr/2/(2.6**4),lr/2), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_bwd")

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(10, slice(lr/10/(2.6**4),lr/10), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_bwd")

## Evaluate bwd clf

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", backwards=True)

In [None]:
data_test = (TextList.from_df(df_test, path, cols='text',
                              processor=SPProcessor.load(tknzer))
                         .split_none()
                         .label_from_df(cols=0)
                         .databunch(bs=bs, backwards=True))

In [None]:
data_test.show_batch()

In [None]:
data_test.c, data_clas.c

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
data_test.save(path/"data/test_data_bwd.pkl")

In [None]:
learn_c.load(path/"clf_bwd");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
learn_c.show_results()

In [None]:
txt_ci = TextClassificationInterpretation.from_learner(learn_c)

In [None]:
txt_ci.show_top_losses(5)

## Bwd + Fwd

In [None]:
bs = 8

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1])
learn_c.path = path
learn_c.load("clf_fwd");

In [None]:
data_test = load_data(path/"data", "test_data.pkl", bs=bs)

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs)

In [None]:
data_clas_bwd = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.path = path
learn_c_bwd.load("clf_bwd");

In [None]:
data_test_bwd = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
learn_c_bwd.data.valid_dl = data_test_bwd.fix_dl

In [None]:
preds_bwd,targs_bwd = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_bwd,targs_bwd)

In [None]:
preds_avg = (preds+preds_bwd)/2
accuracy(preds_avg, targs)

In [None]:
predictions = np.argmax(preds_avg, axis = 1)

In [None]:
print(classification_report(targs, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(targs, predictions))

## Here we go again - with class weights

In [None]:
bs = 8

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs)

In [None]:
n_samples = len(data_clas.train_ds.x); n_samples

In [None]:
n_classes = data_clas.c; n_classes

In [None]:
y = data_clas.train_ds.y.items; y

In [None]:
class_weights = n_samples / (n_classes * np.bincount(y)); class_weights

In [None]:
class_weights = 1 - np.bincount(y)/n_samples; trn_weights

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc");

In [None]:
learn_c.loss_func

In [None]:
loss_weights = torch.FloatTensor(class_weights).cuda()
learn_c.loss_func = FlattenedLoss(CrossEntropyFlat, weight=loss_weights)

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(10, lr, wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd_weighted")

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(10, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd_weighted")

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(10, slice(lr/2/(2.6**4),lr/2), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd_weighted")

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(10, slice(lr/10/(2.6**4),lr/10), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_fwd_weighted")

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = load_data(path/"data", "test_data.pkl")

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_fwd_weighted");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
learn_c.loss_func = FlattenedLoss(CrossEntropyFlat, weight=loss_weights.cpu())
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

## Ablation Study

### Pre-Trained LM + Fine-tune LM (No gradual_unfreeze)

In [None]:
bs = 8

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs)

In [None]:
len(data_clas.vocab.itos)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc");

In [None]:
learn_c.unfreeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=1e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_fwd_no_gradual_unfreeze")])

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = load_data(path/"data", "test_data.pkl")

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_fwd_no_gradual_unfreeze");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc_bwd");

In [None]:
learn_c.unfreeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=1e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_bwd_no_gradual_unfreeze")])

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
data_test = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_bwd_no_gradual_unfreeze");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1])
learn_c.path = path
learn_c.load("clf_fwd_no_gradual_unfreeze");

In [None]:
data_test = load_data(path/"data", "test_data.pkl", bs=bs)

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs)

In [None]:
data_clas_bwd = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.path = path
learn_c_bwd.load("clf_bwd_no_gradual_unfreeze");

In [None]:
data_test_bwd = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
learn_c_bwd.data.valid_dl = data_test_bwd.fix_dl

In [None]:
preds_bwd,targs_bwd = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_bwd,targs_bwd)

In [None]:
preds_avg = (preds+preds_bwd)/2
accuracy(preds_avg, targs)

In [None]:
predictions = np.argmax(preds_avg, axis = 1)

In [None]:
print(classification_report(targs, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(targs, predictions))

### Pre-Trained LM + Fine-tune LM  + Top Only

In [None]:
bs = 8

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs)

In [None]:
len(data_clas.vocab.itos)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_fwd_top_only")])

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = load_data(path/"data", "test_data.pkl")

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_fwd_top_only");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("fine_tune_enc_bwd");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_bwd_top_only")])

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
data_test = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_bwd_top_only");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1])
learn_c.path = path
learn_c.load("clf_fwd_top_only");

In [None]:
data_test = load_data(path/"data", "test_data.pkl", bs=bs)

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs)

In [None]:
data_clas_bwd = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.path = path
learn_c_bwd.load("clf_bwd_top_only");

In [None]:
data_test_bwd = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
learn_c_bwd.data.valid_dl = data_test_bwd.fix_dl

In [None]:
preds_bwd,targs_bwd = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_bwd,targs_bwd)

In [None]:
preds_avg = (preds+preds_bwd)/2
accuracy(preds_avg, targs)

In [None]:
predictions = np.argmax(preds_avg, axis = 1)

In [None]:
print(classification_report(targs, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(targs, predictions))

### Pre-Trained LM + No Fine-Tune LM + Gradual Unfreezing

In [None]:
bs = 8

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs)

In [None]:
len(data_clas.vocab.itos)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("no_fine_tune_enc");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=1e-2

In [None]:
learn_c.fit_one_cycle(10, lr, wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune")

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(10, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune")

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(10, slice(lr/2/(2.6**4),lr/2), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune")

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(10, slice(lr/10/(2.6**4),lr/10), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune")

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = load_data(path/"data", "test_data.pkl")

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_no_lm_tune");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("no_fine_tune_enc_bwd");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(10, lr, wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune_bwd")

In [None]:
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(10, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune_bwd")

In [None]:
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(10, slice(lr/2/(2.6**4),lr/2), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune_bwd")

In [None]:
learn_c.unfreeze()
learn_c.fit_one_cycle(10, slice(lr/10/(2.6**4),lr/10), wd=0.6, moms=(0.8,0.7))

In [None]:
learn_c.save("clf_no_lm_tune_bwd")

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
data_test = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_no_lm_tune_bwd");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1])
learn_c.path = path
learn_c.load("clf_no_lm_tune");

In [None]:
data_test = load_data(path/"data", "test_data.pkl", bs=bs)

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs)

In [None]:
data_clas_bwd = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.path = path
learn_c_bwd.load("clf_no_lm_tune_bwd");

In [None]:
data_test_bwd = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
learn_c_bwd.data.valid_dl = data_test_bwd.fix_dl

In [None]:
preds_bwd,targs_bwd = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_bwd,targs_bwd)

In [None]:
preds_avg = (preds+preds_bwd)/2
accuracy(preds_avg, targs)

In [None]:
predictions = np.argmax(preds_avg, axis = 1)

In [None]:
print(classification_report(targs, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(targs, predictions))

### Pre-Trained LM + No Fine-Tune LM + No Gradual Unfreezing

In [None]:
bs = 8

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs)

In [None]:
len(data_clas.vocab.itos)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("no_fine_tune_enc");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_no_lm_tune_no_gradual_unfreeze")])

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl")

In [None]:
data_lm = load_data(path/"data", "data_lm_export.pkl")

In [None]:
data_test = load_data(path/"data", "test_data.pkl")

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_no_lm_tune_no_gradual_unfreeze");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
config = awd_lstm_clas_config.copy()
config['qrnn'] = True
config['n_hid'] = 1550 #default 1152
config['n_layers'] = 4 #default 3

In [None]:
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, pretrained=False, drop_mult=0.5, 
                                  metrics=[accuracy,f1])
learn_c.path = path
learn_c.load_encoder("no_fine_tune_enc_bwd");

In [None]:
learn_c.freeze()
learn_c.lr_find()

In [None]:
learn_c.recorder.plot()

In [None]:
lr=2e-2

In [None]:
learn_c.fit_one_cycle(40, slice(lr/(2.6**4),lr), wd=0.6, moms=(0.8,0.7),
                      callbacks=[SaveModelCallback(learn_c, name="clf_no_lm_tune_no_gradual_unfreeze_bwd")])

In [None]:
data_clas = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, backwards=True)

In [None]:
data_test = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
len(data_test.vocab.itos), len(data_clas.vocab.itos)

In [None]:
learn_c.load(path/"clf_no_lm_tune_no_gradual_unfreeze_bwd");

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,y,losses = learn_c.get_preds(with_loss=True)
predictions = np.argmax(preds, axis = 1)

interp = ClassificationInterpretation(learn_c, preds, y, losses)
interp.plot_confusion_matrix()

In [None]:
predictions[:15], y[:15], predictions.shape, y.shape

In [None]:
print(classification_report(y, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(y, predictions))

In [None]:
data_clas = load_data(path/"data", "data_clas_export.pkl", bs=bs, num_workers=1)
learn_c = text_classifier_learner(data_clas, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1])
learn_c.path = path
learn_c.load("clf_no_lm_tune_no_gradual_unfreeze");

In [None]:
data_test = load_data(path/"data", "test_data.pkl", bs=bs)

In [None]:
learn_c.data.valid_dl = data_test.fix_dl

In [None]:
preds,targs = learn_c.get_preds(ordered=True)
accuracy(preds,targs)

In [None]:
data_clas_bwd = load_data(path/"data", "data_clas_bwd.pkl", bs=bs, num_workers=1, backwards=True)
learn_c_bwd = text_classifier_learner(data_clas_bwd, AWD_LSTM, config=config, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c_bwd.path = path
learn_c_bwd.load("clf_no_lm_tune_no_gradual_unfreeze_bwd");

In [None]:
data_test_bwd = load_data(path/"data", "test_data_bwd.pkl", bs=bs, backwards=True)

In [None]:
learn_c_bwd.data.valid_dl = data_test_bwd.fix_dl

In [None]:
preds_bwd,targs_bwd = learn_c_bwd.get_preds(ordered=True)
accuracy(preds_bwd,targs_bwd)

In [None]:
preds_avg = (preds+preds_bwd)/2
accuracy(preds_avg, targs)

In [None]:
predictions = np.argmax(preds_avg, axis = 1)

In [None]:
print(classification_report(targs, predictions, target_names=learn_c.data.classes, digits=4))
print(accuracy_score(targs, predictions))