Training a LSTM model with ULMFIT approach on original text and then on reversed text. This is sort of  student model which is being trained on the predictions of a nother similar model. These predictions serve as the pseudo labels.

Inpiration source [here](https://www.kaggle.com/gurharkhalsa/backwards-forwards-ulmfit-ensemble).

In [None]:
#!pip install --no-index --find-links="../input/commonlit-pkgs-offline-download" spacy==3.1.1
!pip install spacy==3.1.1

In [None]:
#!pip install --no-index --find-links="../input/commonlit-pkgs-offline-download" fastai==2.4.1
!yes Y|conda install -c fastai fastai=2.4.1

## Import libraries

In [None]:
from typing import *
from pandas.core.frame import DataFrame
from fastai.text.all import *

In [None]:
def drop_cols(df: DataFrame, cols: List) -> DataFrame:
    """drop the given list of columns from the dataframe"""
    return df.drop(cols, axis=1)

def concat_dfs(dfs: List) -> DataFrame:
    """concatenate two dataframes"""
    return pd.concat(dfs)

In [None]:
path = Path('../input')
op_path = Path('/kaggle/working')
test_path = path/'commonlitreadabilityprize/test.csv'
inf_label_path = path/'commonlit-inf-ensmble-fwd-bkwd-student/submission.csv'
cbt_aug_pseudo_label = path/'commonlit-pseudo-labeler/pseudo_labels.csv'

In [None]:
df_test = pd.read_csv(test_path)
df_subm = pd.read_csv(inf_label_path)
df_cbt_aug_pseudo_label = pd.read_csv(cbt_aug_pseudo_label)

In [None]:
df_cbt_aug_pseudo_label.head(1)

In [None]:
df_test.head(1)

In [None]:
df_test = drop_cols(df_test, ['id', 'url_legal',
                             'license'])
df_test.head(1)

In [None]:
inf_targets = df_subm.target.to_list()

In [None]:
df_test['target'] = inf_targets
df_test.head(1)

In [None]:
df_cbt_aug_pseudo_label = concat_dfs([df_cbt_aug_pseudo_label,df_test])
df_cbt_aug_pseudo_label.head(1)

## Create the forward model 

## The dataloader

In [None]:
dls_lm_forward = TextDataLoaders.from_df(df_cbt_aug_pseudo_label, text_col='excerpt',
                                      is_lm=True, valid_pct=0.1,
                                      bs=128, seq_len=72, backwards=False)

In [None]:
dls_lm_forward.show_batch(min_n=3)

## The forward Language model

In [None]:
learn_forward = language_model_learner(dls_lm_forward,
                               AWD_LSTM,
                               drop_mult=2.0,
                               metrics=[accuracy, Perplexity()], 
                               path=path,
                              wd=0.5).to_fp16()

In [None]:
learn_forward.path = op_path

In [None]:
learn_forward.fit_one_cycle(1, 2e-2)

In [None]:
def show_me_lrs(learn):
    suggestions = namedtuple('Suggestions', ["min", "steep",
                                            "valley", "slide"])
    lr_min, lr_steep,lr_valley, lr_slide = learn.lr_find(suggest_funcs=(minimum, steep,valley, slide))
    suggested_lrs = suggestions(lr_min, lr_steep, lr_valley, lr_slide)
    
    print(f"Minimum/10:\t{lr_min:.2e}\
          \nSteepest point:\t{lr_steep:.2e}\
          \nLongest valley:\t{lr_valley:.2e}\
          \nSlide interval:\t{lr_slide:.2e}")
    
    return suggested_lrs

In [None]:
suggested_lrs = show_me_lrs(learn_forward)

In [None]:
learn_forward.unfreeze()
learn_forward.fit_one_cycle(3, suggested_lrs.slide)

In [None]:
learn_forward.path = Path(".")
learn_forward.save_encoder('./forward_final_encoder')

## Forward Text Regressor

In [None]:
data = DataBlock(
       blocks=(TextBlock.from_df('excerpt', 
                                 vocab=dls_lm_forward.vocab, seq_len=72,
                                 backwards=False), RegressionBlock),
       get_x=ColReader('text'),get_y=ColReader('target'),
       splitter=TrainTestSplitter(test_size=0.2))

In [None]:
#data.summary(df_aug)

## The forward dataloader

In [None]:
dls_reg_forward = data.dataloaders(df_cbt_aug_pseudo_label, bs=8)

In [None]:
dls_reg_forward.show_batch()

## The regression model

In [None]:
learn_reg_forward = text_classifier_learner(dls_reg_forward, AWD_LSTM, drop_mult=2.0,
                                opt_func=QHAdam,
                                metrics=rmse,
                                   wd=1.0).to_fp16()

In [None]:
learn_reg_forward.path = Path(".")
learn_reg_forward = learn_reg_forward.load_encoder('./forward_final_encoder')

## Training forward regression model

In [None]:
learn_reg_forward.fit_one_cycle(1, 1e-2)

In [None]:
suggested_lrs = show_me_lrs(learn_reg_forward)

In [None]:
learn_reg_forward.freeze_to(-2)
learn_reg_forward.fit_one_cycle(1, suggested_lrs.slide)

In [None]:
suggested_lrs = show_me_lrs(learn_reg_forward)

In [None]:
learn_reg_forward.freeze_to(-3)
learn_reg_forward.fit_one_cycle(1, suggested_lrs.slide)

In [None]:
suggested_lrs = show_me_lrs(learn_reg_forward)

In [None]:
learn_reg_forward.unfreeze()
learn_reg_forward.fit_one_cycle(7, suggested_lrs.slide)

In [None]:
learn_reg_forward.export("./forward_final_model")