In [1]:
from fastai.text.all import *
import polars as pl

In [2]:
pl.Config.set_fmt_str_lengths(200);

# Data

In [3]:
labels_dict = {0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
df = (
    pl.concat([
        pl.scan_csv('emotions-dataset/training.csv').with_columns(is_valid=False),
        pl.scan_csv('emotions-dataset/validation.csv').with_columns(is_valid=True),
    ])
    .with_columns(pl.col.label.replace_strict(labels_dict).alias('label_name'))
    .collect()
)
print(df.shape)
df.head()

(18000, 4)


text,label,is_valid,label_name
str,i64,bool,str
"""i didnt feel humiliated""",0,False,"""sadness"""
"""i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake""",0,False,"""sadness"""
"""im grabbing a minute to post i feel greedy wrong""",3,False,"""anger"""
"""i am ever feeling nostalgic about the fireplace i will know that it is still on the property""",2,False,"""love"""
"""i am feeling grouchy""",3,False,"""anger"""


In [4]:
dls = TextDataLoaders.from_df(df.to_pandas(), text_col='text', label_col='label_name', valid_col='is_valid')
dls.show_batch(max_n=3)

Unnamed: 0,text,category
0,xxbos i feel in my bones like nobody cares if i m here nobody cares if i m gone here i am again saying i m feeling so lonely people either say its ok to be alone or just go home it xxunk me and i do nt know why it does nt mean i do nt try i try and try but people just treat me like i m a xxunk,sadness
1,xxbos i guess which meant or so i assume no photos no words or no other way to convey what it really feels unless you feels it yourself or xxunk bi t xxunk th m i bi t th ng ng i b xxunk i xxunk it to a bit more gloomy context unless you are hurt yourself you will never have sympathy for the hurt ones,sadness
2,xxbos i lost my special mind but don t worry i m still xxunk i just wanted you to feel what i felt while reading this book i don t know how many times it was said that sam was special but i can guarantee you it was many more times than what i used in that xxunk did i tell you she was special,joy


In [5]:
L(dls.numericalize.vocab)

(#5600) ['xxunk', 'xxpad', 'xxbos', 'xxeos', 'xxfld', 'xxrep', 'xxwrep', 'xxup', 'xxmaj', 'i', 'feel', 'and', 'to', 'the', 'a', 'that', 'feeling', 'of', 'my', 'in', 'it', 'm', 'like', 'so', 'for', 'was', 'me', 'have', 'but', 'is', 'am', 'this', 'with', 'not', 'about', 'be', 'nt', 'you', 'as', 'on', 'do', 'just', 'when', 'at', 'or', 'all', 'because', 'more', 'can', 'really', 'up', 't', 'are', 'by', 'know', 'very', 'been', 'if', 'what', 's', 'out', 'myself', 'time', 'how', 'little', 'had', 'get', 've', 'he', 'they', 'will', 'now', 'from', 'being', 'would', 'people', 'want', 'them', 'her', 'some', 'still', 'did', 'think', 'him', 'one', 'there', 'who', 'an', 'even', 'life', 'we', 'its', 'make', 'bit', 'could', 'something', 'much', 'love', 'things', 'going', 'she', 'way', 'than', 'no', 'too', 'day', 'has', 'back', 'don', 'pretty', 'good', 'need', 'go', 'into', 'which', 'his', 'these', 'always', 'should', 'their', 'your', 'see', 'right', 'only', 'say', 'also', 'feelings', 'over', 'other', 't

In [6]:
dls.categorize.vocab

['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']

In [7]:
df[0]

text,label,is_valid,label_name
str,i64,bool,str
"""i didnt feel humiliated""",0,False,"""sadness"""


In [197]:
dls.train_ds[0]

(TensorText([  2,   9,  81,  36,  10, 654]), TensorCategory(4))

In [198]:
f'Label: {dls.categorize.vocab[dls.train_ds[0][1]]}'

'Label: sadness'

In [199]:
[dls.numericalize.vocab[o] for o in dls.train_ds[0][0]]

['xxbos', 'i', 'did', 'nt', 'feel', 'humiliated']

In [205]:
dls.tfms[0].decode(dls.train_ds[0][0])

'xxbos i did nt feel humiliated'

## Model

In [11]:
vocab_size = len(dls.numericalize.vocab)
vocab_size

5600

In [12]:
n_labels = len(dls.categorize.vocab)
n_labels

6

In [13]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.embs = nn.Embedding(vocab_size, 100, padding_idx=1)
        self.lstm = nn.LSTM(100, 20, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(20, 8),
            nn.ReLU(),
            nn.Linear(8, n_labels)
        )
    
    def forward(self, x, verbose=False):
        embs = self.embs(x)
        if verbose: print(f'embs: {embs.shape}')
        h,_ = self.lstm(embs)
        if verbose: print(f'h: {h.shape}')
        h = h[:,-1]
        if verbose: print(f'h[:,-1]: {h.shape}')
        out = self.fc(h)
        if verbose: print(f'out: {out.shape}')
        return out
    
model = Model().cuda()

In [14]:
model

Model(
  (embs): Embedding(5600, 100, padding_idx=1)
  (lstm): LSTM(100, 20, batch_first=True)
  (fc): Sequential(
    (0): Linear(in_features=20, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=6, bias=True)
  )
)

In [15]:
for i,(xb,yb) in enumerate(dls.train):
    print(xb.shape)
    if i==2: break

torch.Size([64, 72])
torch.Size([64, 17])
torch.Size([64, 12])


In [16]:
with torch.no_grad(): res = model(xb, verbose=True)

embs: torch.Size([64, 12, 100])
h: torch.Size([64, 12, 20])
h[:,-1]: torch.Size([64, 20])
out: torch.Size([64, 6])


In [106]:
learn = Learner(dls, model, metrics=accuracy)

## Train

In [107]:
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.022596,0.295323,0.916,00:01
1,0.060867,0.314961,0.913,00:01
2,0.079532,0.285426,0.9115,00:01
3,0.062011,0.276422,0.9225,00:01
4,0.054004,0.297907,0.923,00:01
5,0.032872,0.28596,0.9215,00:01
6,0.023305,0.287847,0.923,00:01
7,0.012118,0.28216,0.9285,00:01
8,0.010806,0.291572,0.925,00:01
9,0.007981,0.295205,0.926,00:01


In [156]:
inp,_,lbls,preds = learn.get_preds(with_input=True, with_decoded=True)

In [209]:
df_res = (
    pl.DataFrame({'inp':map(dls.tfms[0].decode, inp), 'label':lbls, 'pred':preds})
    .with_columns(pl.col('label','pred').map_elements(lambda i: dls.categorize.vocab[i], return_dtype=str))
)
df_res.head()

inp,label,pred
str,str,str
"""xxbos i m feeling quite sad and sorry for myself but ill snap out of it soon xxpad""","""sadness""","""sadness"""
"""xxbos i feel like i am still looking at a blank canvas blank pieces of paper xxpad""","""sadness""","""sadness"""
"""xxbos i feel like a faithful xxunk xxpad""","""love""","""love"""
"""xxbos i am just feeling cranky and blue xxpad""","""anger""","""anger"""
"""xxbos i can have for a treat or if i am feeling festive xxpad""","""joy""","""joy"""


In [212]:
(
    df_res
    .group_by('label')
    .agg(
        pl.len(),
        (pl.col.label==pl.col.pred).mean().alias('accuracy'),
    )
    .sort('label')
)

label,len,accuracy
str,u32,f64
"""anger""",275,0.945455
"""fear""",212,0.853774
"""joy""",704,0.948864
"""love""",178,0.865169
"""sadness""",550,0.96
"""surprise""",81,0.753086


In [213]:
interp = Interpretation.from_learner(learn)

In [214]:
interp.plot_top_losses(5)

Unnamed: 0,input,target,predicted,probability,loss
0,xxbos i never knew i could be so weak i could nt even fight what i was feeling i knew i hated to feel that way yet i just let the emotions run free i acted w xxrep 3 a y xxunk like a child deprived of xxunk,anger,sadness,0.999996781349182,17.352039337158203
1,xxbos i had applied for a job and they had assured me that the exams would take place a few months later a week later i went to obtain some more information and they told me that the exams had already taken place,anger,joy,0.9999806880950928,16.076684951782227
2,xxbos i feel so amazing about taking this trip as i think ill finally be able to relax and feel comfortable at home and somehow just xxunk back into it,surprise,joy,0.9999969005584716,14.491023063659668
3,xxbos when we were able to afford a new sofa a xxunk one which cost xxunk i had waited for it for years,joy,fear,0.8881910443305969,13.732008934020996
4,xxbos i passed an exam that i was absolutely certain that i had failed,joy,anger,0.8398582339286804,13.58497142791748
