This is a simple notebook that shows amazing capabilities of the fastai library for NLP. If you want to know more, check out the course:
https://youtu.be/XfoYk_Z5AkI

NLP is covered mostly in:
* https://youtu.be/MpZxV6DVsmM
* https://youtu.be/qqt3aMPB81c

In [None]:
from fastai.text import *
import pandas as pd
import seaborn as sns

In [None]:
!mkdir spamham
!cp /kaggle/input/spam-text-message-classification/* /kaggle/working/spamham

In [None]:
path = Path('/kaggle/working/spamham')
df = pd.read_csv(path/'SPAM text message 20170820 - Data.csv')
df.head()

In [None]:
df.isna().sum()

In [None]:
sns.countplot(df['Category'])

In [None]:
data_lm = (TextList
    .from_csv(path, 'SPAM text message 20170820 - Data.csv', cols=1)
    .split_by_rand_pct(0.1)
    .label_for_lm()
    .databunch(bs=64)
)

In [None]:
learn_lm = language_model_learner(data_lm, AWD_LSTM)
learn_lm.lr_find()
learn_lm.recorder.plot()

In [None]:
learn_lm.fit_one_cycle(7, slice(0.05))

In [None]:
learn_lm.unfreeze()
learn_lm.lr_find()
learn_lm.recorder.plot()

In [None]:
learn_lm.fit_one_cycle(15, slice(1e-3, 0.01))

In [None]:
learn_lm.predict('Hi', 15)

In [None]:
learn_lm.save_encoder('enc')

In [None]:
datacls = (TextList
    .from_csv(path, 'SPAM text message 20170820 - Data.csv', cols=1, vocab=data_lm.vocab)
    .split_by_rand_pct(0.33, seed=42)
    .label_from_df(0)
    .databunch(bs=64)
)

In [None]:
learn = text_classifier_learner(datacls, AWD_LSTM)
learn.load_encoder('enc')
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(8, slice(1e-3/2))

In [None]:
learn.unfreeze()
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(10, slice(1e-2, 1e-3/10))

In [None]:
interp = TextClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_confusion_matrix()

In [None]:
cmx = interp.confusion_matrix()
print(f'Sensitivity: {cmx[1,1]/cmx[1].sum()}')
print(f'Specificity: {cmx[0,0]/cmx[0].sum()}')

In [None]:
interp.show_intrinsic_attention("Would you like to buy this amazing product?")

In [None]:
interp.show_top_losses(3)