## import

In [1]:
from shutil import copyfile
from typing import List, TypeVar, Dict, Any
from pandas.core.frame import DataFrame
from fastai.basics import *
from fastai.callback.all import *
from fastai.text.all import *

## Helper functions

In [2]:
def file_copy(file_paths: List, dest: Path):
    dest.mkdir(parents=True, exist_ok=True)
    for path in file_paths:
        fn = path.name
        dest_path = dest/fn
        dest_path.touch()
        copyfile(path, dest_path)
        
def drop_cols(df: DataFrame, cols: List) -> DataFrame:
    """drop the given list of columns from the dataframe"""
    return df.drop(cols, axis=1)

def concat_dfs(dfs: List) -> DataFrame:
    """concatenate two dataframes"""
    return pd.concat(dfs)

def get_lm_learner(df: DataFrame, path: Path,
                   valid_pct: float = 0.3, bs: int = 64,
                   seq_len:int = 72, drop_mult: float = 1.0,
                   opt_func = None, metrics = None,
                   wd: float = 0.1):
    """get the dataloader and the language model learner"""
    dls_lm = TextDataLoaders.from_df(df, path, is_lm=True,
                                     valid_pct=valid_pct, bs=bs,
                                     seq_len=seq_len)
    
    learn = language_model_learner(dls_lm,
                               AWD_LSTM,
                               drop_mult=drop_mult,
                               opt_func=opt_func,
                               metrics=metrics, 
                               path=path,
                              wd=wd).to_fp16()
    
    return dls_lm, learn
    
def get_text_regressor(df: DataFrame, text_col: str, target_col:str, cols2Drop: List,
                       bs: int = 8, drop_mult: float = 1.0,
                       cbs=None, seq_len: int=80, opt_func=RAdam, wd=None):
    """pre proces the data, get the dataloader and build the text regressor learner"""
    df_train_for_regressor = df.drop(cols2Drop, axis=1)
    
    data = DataBlock(
    blocks=(TextBlock.from_df(text_col, vocab=dls_lm.vocab,seq_len=seq_len), RegressionBlock),
    get_x=ColReader('text'),get_y=ColReader(target_col),
    splitter=RandomSubsetSplitter(0.3,0.1, seed=2))
    
    dls = data.dataloaders(df_train_for_regressor, bs=bs)
    learn = text_classifier_learner(dls, AWD_LSTM, drop_mult=drop_mult,
                                cbs=cbs,
                                opt_func=opt_func,
                                metrics=rmse,
                                   wd=0.5)
    
    learn.path = Path(".")
    learn = learn.load_encoder('./final_encoder')
    
    return df_train_for_regressor, dls, learn

## Get path

In [3]:
path = Path('../input')
op_path = Path('/kaggle/working')

## Move data to pwd

In [4]:
file_copy([Path('../input/commonlitreadabilityprize/test.csv'),
           Path('../input/commonlit-infusing-data-with-same-domain-data/vanilla_comlit_train_infused_with_cbt.csv')],
           op_path/'common-lit-datset')

## Initial cleaning of data for lm

In [5]:
df_train = pd.read_csv(op_path/'common-lit-datset/vanilla_comlit_train_infused_with_cbt.csv')
df_train = drop_cols(df_train, 'Unnamed: 0')
df_train.head(2)

Unnamed: 0,excerpt
0,"When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThe..."
1,"All through dinner time, Mrs. Fayre was somewhat silent, her eyes resting on Dolly with a wistful, uncertain expression. She wanted to give the child the pleasure she craved, but she had hard work to bring herself to the point of overcoming her own objections.\nAt last, however, when the meal was nearly over, she smiled at her little daughter, and said, ""All right, Dolly, you may go.""\n""Oh, mother!"" Dolly cried, overwhelmed with sudden delight. ""Really?\nOh, I am so glad! Are you sure you're willing?""\n""I've persuaded myself to be willing, against my will,"" returned Mrs. Fayre, whimsically..."


In [6]:
df_test=pd.read_csv(op_path/'common-lit-datset/test.csv')
df_test.head(2)

Unnamed: 0,id,url_legal,license,excerpt
0,c0f722661,,,"My hope lay in Jack's promise that he would keep a bright light burning in the upper story to guide me on my course. On a clear night this light was visible from the village, but somehow or other I failed to take into account the state of the weather. The air was full of eddying flakes, which would render the headlight of a locomotive invisible a hundred yards distant. Strange that this important fact never occurred to me until I was fully a fourth of a mile from the village. Then, after looking in vain for the beacon light, the danger of my situation struck me, and I halted.\n""I am certai..."
1,f0953f0a5,,,"Dotty continued to go to Mrs. Gray's every night with the milk. Sometimes Katie went with her, and then they always paused a while under the acorn-tree and played ""King and Queen."" Dotty said she wished they could ever remember to bring their nipperkins, for in that case the milk would taste a great deal more like nectar. The ""nipperkins"" were a pair of handled cups which the children supposed to be silver, and which they always used at table.\nDotty knew she was doing wrong every time she played ""King and Queen."" She knew the milk was not hers, but Mrs. Gray's; still she said to herself, ..."


In [7]:
df_train.shape, df_test.shape

((128603, 1), (7, 4))

## Re-arrenging and pre-processing the data for the Language model
* dropping all columns except excerpts
* concatenating both dataframes(optional)

In [8]:
df_train_excerpts = df_train
df_test_excerpts = df_test.copy()
df_test_excerpts = drop_cols(df_test_excerpts, [ 'id',
                                                  'url_legal',
                                                  'license'])

In [9]:
df_train_excerpts.head(1)

Unnamed: 0,excerpt
0,"When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThe..."


In [10]:
df_test_excerpts.head(1)

Unnamed: 0,excerpt
0,"My hope lay in Jack's promise that he would keep a bright light burning in the upper story to guide me on my course. On a clear night this light was visible from the village, but somehow or other I failed to take into account the state of the weather. The air was full of eddying flakes, which would render the headlight of a locomotive invisible a hundred yards distant. Strange that this important fact never occurred to me until I was fully a fourth of a mile from the village. Then, after looking in vain for the beacon light, the danger of my situation struck me, and I halted.\n""I am certai..."


In [11]:
df = concat_dfs([df_train_excerpts, df_test_excerpts])
df.head(1)

Unnamed: 0,excerpt
0,"When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThe..."


In [12]:
df.shape

(128610, 1)

## taking sample of cbt data

In [13]:
df_train_excerpts_sample = df_train_excerpts[:20000]
df_train_excerpts_sample.shape

(20000, 1)

## Language model

In [14]:
def get_lm_learner(df: DataFrame, path: Path,
                   valid_pct: float = 0.3, bs: int = 64,
                   seq_len:int = 72, drop_mult: float = 1.0,
                   opt_func = None, metrics = None,
                   wd: float = 0.1):
    """get the dataloader and the language model learner"""
    dls_lm = TextDataLoaders.from_df(df, path, is_lm=True,
                                     valid_pct=valid_pct, bs=bs,
                                     seq_len=seq_len)
    
    learn = language_model_learner(dls_lm,
                               AWD_LSTM,
                               drop_mult=drop_mult,
                               opt_func=opt_func,
                               metrics=metrics, 
                               path=path,
                              wd=wd).to_fp16()
    
    return dls_lm, learn
    

In [15]:
dls_lm , learn  = get_lm_learner(df_train_excerpts_sample, op_path, opt_func=RAdam,bs=128,
                                 seq_len=80,
               metrics=[accuracy, Perplexity()])

  return array(a, dtype, copy=False, order=order)


In [16]:
dls_lm.show_batch()

Unnamed: 0,text,text_
0,"xxbos xxmaj but pleased though the boy was , he determined not to tell anyone about it , particularly not his mother , who never could keep anything from her neighbours .still , in spite of his resolutions , he could not hide altogether that something had happened , and when he went in to breakfast his mother asked him what was the matter . ` xxmaj oh , mother , i had such a nice dream last night ,","xxmaj but pleased though the boy was , he determined not to tell anyone about it , particularly not his mother , who never could keep anything from her neighbours .still , in spite of his resolutions , he could not hide altogether that something had happened , and when he went in to breakfast his mother asked him what was the matter . ` xxmaj oh , mother , i had such a nice dream last night , '"
1,"the angry victim who was sitting below .sometimes , however , the inhabitants of the forest were so foolish as to provoke him , and then they got the worst of it .this was what happened to the barber , whom the monkey visited one morning , saying that he wished to be shaved .the barber bowed politely to his customer , and begging him to be seated , tied a large cloth round his neck , and rubbed his","angry victim who was sitting below .sometimes , however , the inhabitants of the forest were so foolish as to provoke him , and then they got the worst of it .this was what happened to the barber , whom the monkey visited one morning , saying that he wished to be shaved .the barber bowed politely to his customer , and begging him to be seated , tied a large cloth round his neck , and rubbed his chin"
2,", and ordered a bed to be made ready and the best doctors in all the country to be sent for .when they heard of the queen 's summons they flocked from all parts , but none could cure him .after everyone had tried and failed a servant entered and informed the queen that a strange old man had just knocked at the palace gate and declared that he was able to heal the dying youth .now this was a","and ordered a bed to be made ready and the best doctors in all the country to be sent for .when they heard of the queen 's summons they flocked from all parts , but none could cure him .after everyone had tried and failed a servant entered and informed the queen that a strange old man had just knocked at the palace gate and declared that he was able to heal the dying youth .now this was a holy"
3,".morgiana , meanwhile , put on a head - dress like a dancing - girl 's , and clasped a girdle round her waist , from which hung a dagger with a silver hilt , and said to xxmaj abdallah : ` ` xxmaj take your tabor , and let us go and divert our master and his guest . ' ' abdallah took his tabor and played before xxmaj morgiana until they came to the door , where xxmaj",", meanwhile , put on a head - dress like a dancing - girl 's , and clasped a girdle round her waist , from which hung a dagger with a silver hilt , and said to xxmaj abdallah : ` ` xxmaj take your tabor , and let us go and divert our master and his guest . ' ' abdallah took his tabor and played before xxmaj morgiana until they came to the door , where xxmaj abdallah"
4,"to each other : ` xxmaj if we can only get hold of that basket it will make our fortune ! ' so they began by inviting xxmaj father xxmaj grumbler to sit down to the table , and brought out the best wine in the cellar , hoping it might loosen his tongue .but xxmaj father xxmaj grumbler was wiser than they gave him credit for , and though they tried in all manner of ways to find out","each other : ` xxmaj if we can only get hold of that basket it will make our fortune ! ' so they began by inviting xxmaj father xxmaj grumbler to sit down to the table , and brought out the best wine in the cellar , hoping it might loosen his tongue .but xxmaj father xxmaj grumbler was wiser than they gave him credit for , and though they tried in all manner of ways to find out who"
5,". xxmaj there were lakes and xxunk where for centuries there have been fields of corn . xxmaj on the oldest sites of our towns were groups of huts made of clay and xxunk , and dominated , perhaps , by the large xxunk house of the xxunk prince . xxmaj in the xxunk , natural islands , or artificial xxunk made of piles ( xxunk ) , afforded standing - ground and protection to villages , if indeed these","xxmaj there were lakes and xxunk where for centuries there have been fields of corn . xxmaj on the oldest sites of our towns were groups of huts made of clay and xxunk , and dominated , perhaps , by the large xxunk house of the xxunk prince . xxmaj in the xxunk , natural islands , or artificial xxunk made of piles ( xxunk ) , afforded standing - ground and protection to villages , if indeed these lake"
6,"xxmaj paperarello , you have won my daughter . ' and he thought to himself : ` xxmaj this fellow must really be a magician . ' but when the princess heard what was in store for her she wept bitterly , and declared that never , never would she marry that dirty xxmaj paperarello ! however , the king paid no heed to her tears and prayers , and before many days were over the wedding was celebrated with","paperarello , you have won my daughter . ' and he thought to himself : ` xxmaj this fellow must really be a magician . ' but when the princess heard what was in store for her she wept bitterly , and declared that never , never would she marry that dirty xxmaj paperarello ! however , the king paid no heed to her tears and prayers , and before many days were over the wedding was celebrated with great"
7,", and before long he had begged the honour of her hand for his master .she declared at first that she was too unworthy to accept the position offered her , and that the minister would soon repent his choice ; but this only made him the more eager , and in the end he gained her consent , and prevailed on her to return with him at once to his own country .the minister then conducted the mother and","and before long he had begged the honour of her hand for his master .she declared at first that she was too unworthy to accept the position offered her , and that the minister would soon repent his choice ; but this only made him the more eager , and in the end he gained her consent , and prevailed on her to return with him at once to his own country .the minister then conducted the mother and daughter"
8,"thing call to me .my name is xxmaj litill . ' the young man walked on till he reached the open space in the wood , where he stopped for dinner .in a moment all the birds in the world seemed flying round his head , and he crumbled some of his bread for them and watched them as they darted down to pick it up .when they had cleared off every crumb the largest bird with the gayest plumage","call to me .my name is xxmaj litill . ' the young man walked on till he reached the open space in the wood , where he stopped for dinner .in a moment all the birds in the world seemed flying round his head , and he crumbled some of his bread for them and watched them as they darted down to pick it up .when they had cleared off every crumb the largest bird with the gayest plumage said"


In [17]:
learn.fit_one_cycle(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,3.539593,3.136593,0.353829,23.025282,05:43


In [18]:
learn.save('./1epoch')

Path('/kaggle/working/models/1epoch.pth')

In [19]:
learn.load('./1epoch')
learn.unfreeze()
learn.fit_one_cycle(2, 1e-3)

epoch,train_loss,valid_loss,accuracy,perplexity,time
0,2.797444,2.289397,0.492841,9.868982,06:18
1,2.479941,1.972135,0.558973,7.186005,06:19


In [20]:
learn.path = Path(".")
learn.save_encoder('./final_encoder')

## The text regressor

In [21]:
df_for_regressor = pd.read_csv(path/'common-lit-datset-with-synonym-replacement/aug_df.csv')
df_for_regressor, dls, learn = get_text_regressor(df_for_regressor, 'excerpt','target', ['Unnamed: 0', 'id',
                                                                    'url_legal',
                                                                     'license',
                                                                     'standard_error'],
                                                                  opt_func=RAdam)

  return array(a, dtype, copy=False, order=order)


In [22]:
learn.fit_one_cycle(1, 2e-2)

epoch,train_loss,valid_loss,_rmse,time
0,0.655972,0.636928,0.798078,00:22


In [23]:
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

epoch,train_loss,valid_loss,_rmse,time
0,0.639375,0.475875,0.689837,00:26


In [24]:
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

epoch,train_loss,valid_loss,_rmse,time
0,0.584705,0.449428,0.670394,00:37


In [25]:
learn.unfreeze()
learn.fit_one_cycle(10, slice(1e-2/(2.6**4),1e-2))

epoch,train_loss,valid_loss,_rmse,time
0,0.571718,0.5105,0.714493,00:48
1,0.576624,0.697369,0.835086,00:47
2,0.576116,0.683531,0.826759,00:48
3,0.476609,0.384437,0.62003,00:48
4,0.465219,0.568555,0.754026,00:48
5,0.41694,0.417191,0.645903,00:47
6,0.417749,0.288966,0.537556,00:48
7,0.351979,0.299771,0.547513,00:47
8,0.318437,0.275085,0.524486,00:47
9,0.310659,0.288986,0.537574,00:47


## saving final model

In [26]:
learn.export("./final_model")