# Multi-Class Text Classification for Emotions using BERT

## Voraussetzungen

* CUDA-fähige Grafikkarte mit 8 GB RAM - dieser Wert (ca. 7.3 GB, geprüft mit Linux-Befehl `nvidia-smi`) wird beim Training als VRAM-Auslastung erreicht mit der Batch size 8 für das Modell `bert-base-cased`
* Damit bekommt man in dem Schritt etwa 3,4 it/seconds. Mit batch_size 10, was noch in den Speicher passt, würden es weniger werden.

* Ausführungszeit: auf 20.000 Datensätzen => ca. 1:40 h:mm, also 1 h und 40 min
* auf gesamter Trainingsdatenmenge von 58.000 Datensätzen dann etwa 4,5 Stunden.

In [1]:
# Startzeitpunkt dieses Jupyter-Notebooks
! date

Mon Aug  7 23:35:49 CEST 2023


In [2]:
# Abhängigkeiten installieren
# ! pip install datasets huggingface_hub ipywidgets evaluate 'transformers[torch]' torch xformers plotnine

In [3]:
import numpy as np
import pandas as pd

# We need the sys package to load modules from another directory:
import sys
sys.path.append('../')
from preprocessing.preprocessors import *

import random # für random.sample
import evaluate # für Evaluierung beim Training des Classifiers

from datasets import Dataset # um damit Transformer-kompatible Datasets zu erzeugen, die vorher in Panda DataFrames gespeichert sind 
from sklearn.metrics import classification_report # ganz am Ende wird damit ein Bericht erzeugt, der die Klassifikation bewertet
from transformers import AutoTokenizer # damit wird der Tokenizer zu einem Huggingface-Modell gebildet
from transformers import DataCollatorWithPadding # damit werden gleich lange Input-Sequenzen erzeugt, die dann ins Transformer-Modell gehen, es wird also trunkiert und gepadded
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer  # für das Transfer Learning des angepassten Modells
from transformers import pipeline # Inferenz-Pipeline zusammenbauen
from tqdm import tqdm # Fortschritts-Monitoring

# Grafiken erzeugen ähnlich zu ggplot in R
# from plotnine import ggplot, aes, geom_tile, coord_flip, theme, geom_line, labs, element_text
# from plotnine import scale_x_discrete, geom_vline


In [4]:
# Beispieltexte komplett ansehen können
pd.options.display.max_colwidth = None # default value is 50, max would be "None"
pd.set_option('display.max_rows', 50) # default value is 10, max would be "None"

#### Get the data

In [5]:
df = pd.read_csv("../data/GoEmotions.csv")
df_clean = clean_df(df)
# r, c = df_clean.shape
# print(f"The data has {r} row and {c} columns")
df_clean

Unnamed: 0,text,id,author,subreddit,rater_id,admiration,amusement,anger,annoyance,approval,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,eew5j0j,Brdd9,nrl,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,37,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,18,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Falcon.",eda6yn6,American_Fascist713,starwarsspeculation,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,"Right? Considering it’s such an important document, I should know the damned thing backwards and forwards... thanks again for the help!",eespn2i,ImperialBoss,TrueReddit,61,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211219,"Well, I'm glad you're out of all that now. How awful. The way they act, they make you think healthy boundaries are you being hostile.",ed89acy,pompompompi,raisedbynarcissists,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211220,Everyone likes [NAME].,ee6pagw,Senshado,heroesofthestorm,16,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
211221,Well when you’ve imported about a gazillion of them I or your country it’s gets serious.,ef28nod,5inchloser,nottheonion,15,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211222,That looks amazing,ee8hse1,springt1me,shittyfoodporn,70,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
clustered_df = create_clustered_df(df_clean)
# r, c = clustered_df.shape
# print(f"The data has {r} row and {c} columns")
clustered_df

Unnamed: 0,text,id,author,subreddit,rater_id,level0,level1,level2,level3
0,That game hurt.,eew5j0j,Brdd9,nrl,1,sadness,dis_sad,dis_sad_gri,rem_emb_dis_sad_gri
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,37,neutral,neutral,neutral,neutral
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,18,love,love,exc_joy_lov,amu_exc_joy_lov
4,"[NAME] was nowhere near them, he was by the Falcon.",eda6yn6,American_Fascist713,starwarsspeculation,2,neutral,neutral,neutral,neutral
5,"Right? Considering it’s such an important document, I should know the damned thing backwards and forwards... thanks again for the help!",eespn2i,ImperialBoss,TrueReddit,61,gratitude,gra_rel,pri_adm_gra_rel,pri_adm_gra_rel_app_rea
...,...,...,...,...,...,...,...,...,...
211219,"Well, I'm glad you're out of all that now. How awful. The way they act, they make you think healthy boundaries are you being hostile.",ed89acy,pompompompi,raisedbynarcissists,2,joy,exc_joy,exc_joy_lov,amu_exc_joy_lov
211220,Everyone likes [NAME].,ee6pagw,Senshado,heroesofthestorm,16,love,love,exc_joy_lov,amu_exc_joy_lov
211221,Well when you’ve imported about a gazillion of them I or your country it’s gets serious.,ef28nod,5inchloser,nottheonion,15,caring,caring,des_opt_car,des_opt_car
211222,That looks amazing,ee8hse1,springt1me,shittyfoodporn,70,admiration,pri_adm,pri_adm_gra_rel,pri_adm_gra_rel_app_rea


In [7]:
plutchik_df = create_plutchik_df(df_clean)
# r, c = plutchik_df.shape
# print(f"The data has {r} row and {c} columns")
plutchik_df

Unnamed: 0,text,id,author,subreddit,rater_id,level0,plutchik
0,That game hurt.,eew5j0j,Brdd9,nrl,1,sadness,betrübt
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,37,neutral,neutral
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,18,love,verliebt
4,"[NAME] was nowhere near them, he was by the Falcon.",eda6yn6,American_Fascist713,starwarsspeculation,2,neutral,neutral
5,"Right? Considering it’s such an important document, I should know the damned thing backwards and forwards... thanks again for the help!",eespn2i,ImperialBoss,TrueReddit,61,gratitude,ehrfürchtig
...,...,...,...,...,...,...,...
211219,"Well, I'm glad you're out of all that now. How awful. The way they act, they make you think healthy boundaries are you being hostile.",ed89acy,pompompompi,raisedbynarcissists,2,joy,begeistert
211220,Everyone likes [NAME].,ee6pagw,Senshado,heroesofthestorm,16,love,verliebt
211221,Well when you’ve imported about a gazillion of them I or your country it’s gets serious.,ef28nod,5inchloser,nottheonion,15,caring,bewundernd
211222,That looks amazing,ee8hse1,springt1me,shittyfoodporn,70,admiration,bewundernd


### BERT for level 0 -> 27 emotions
following: https://huggingface.co/docs/transformers/tasks/sequence_classification

In [8]:
clustered_df.level0.unique()

array(['sadness', 'neutral', 'love', 'gratitude', 'disapproval',
       'amusement', 'disappointment', 'realization', 'admiration',
       'annoyance', 'confusion', 'optimism', 'excitement', 'caring',
       'remorse', 'joy', 'approval', 'embarrassment', 'surprise',
       'curiosity', 'anger', 'grief', 'disgust', 'pride', 'desire',
       'relief', 'fear', 'nervousness'], dtype=object)

In [9]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") # differentiation between upper and lower case

In [10]:
id2label = {0: 'sadness', 1: 'neutral', 2: 'love', 3: 'gratitude', 4: 'disapproval',
       5: 'amusement', 6: 'disappointment', 7: 'realization', 8: 'admiration', 9:
       'annoyance', 10: 'confusion', 11: 'optimism', 12: 'excitement', 13: 'caring',
       14: 'remorse', 15: 'joy', 16: 'approval', 17: 'embarrassment', 18: 'surprise',
       19: 'curiosity', 20: 'anger', 21: 'grief', 22: 'disgust', 23: 'pride', 24: 'desire',
       25: 'relief', 26: 'fear', 27: 'nervousness'}

label2id = {v: k for k, v in id2label.items()}

In [11]:
# create a sample for local tests

# dataset = clustered_df.sample(n=1000, replace=False, random_state=123)
dataset = clustered_df.sample(n=20000, replace=False, random_state=123)

dataset["label"] = dataset["level0"].map(label2id.get) # to add column label map individual entries of emotions to ID
dataset

Unnamed: 0,text,id,author,subreddit,rater_id,level0,level1,level2,level3,label
58047,"I actually heard [NAME] inviting [NAME] and [NAME] to be on her show, after the interview with [NAME] ex wife.",ed0xjb6,unidrogon,90dayfianceuncensored,70,neutral,neutral,neutral,neutral,1
175996,It really do,ee6rr87,Slats04,forhonor,4,neutral,neutral,neutral,neutral,1
121014,Wanted something more on Imperius Angiris Wrath ulti at lvl 20... :'( Nice buff to Valorus Pursuit though!,ees9uev,Remus88Romulus,heroesofthestorm,42,sadness,dis_sad,dis_sad_gri,rem_emb_dis_sad_gri,0
171647,I hope xe had a nice talk with the premier,ef6g4jt,thrown_41232,ontario,15,optimism,des_opt,des_opt_car,des_opt_car,11
3222,Seriously!? Wouldn't a guy do the same thing if a girl didn't even attempt to get him off?,eecrhn4,hangry_potato,AskWomen,2,curiosity,cur_con,sur_cur_con,sur_cur_con,19
...,...,...,...,...,...,...,...,...,...,...
158031,That’s very admirable and courageous of her. I appreciate the many women that are doing that here on Reddit as well.,eez329z,nliwtbat,childfree,67,admiration,pri_adm,pri_adm_gra_rel,pri_adm_gra_rel_app_rea,8
79736,"4 lives essentially, its insane",ef1u8dh,trkh,self,70,embarrassment,embarrassment,embarrassment,embarrassment,17
71777,I regret that I understand that reference :(,ee18oid,slinkslowdown,tifu,72,remorse,rem_emb,rem_emb,rem_emb_dis_sad_gri,14
2842,I thought you were just going to wait her out. You actually engineered her loss of space to someone else. WELL DONE!!!!,ef7jkmz,Fluffydress,pettyrevenge,41,admiration,pri_adm,pri_adm_gra_rel,pri_adm_gra_rel_app_rea,8


In [12]:
# split the training data
from datasets import Dataset

training_data = dataset.groupby("level0").sample(frac=0.8, random_state=25) # stratified sampling
testing_data = dataset.drop(training_data.index)

training_data = Dataset.from_pandas(training_data) # create transformers compatible dataset from dataframe
testing_data = Dataset.from_pandas(testing_data)

def tokenize_function(examples): # replace representation of data, convert column text to tensor-based representation
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_training_data = training_data.map(tokenize_function, batched=True) # convert text to tensor form
tokenized_testing_data = testing_data.map(tokenize_function, batched=True)

Map:   0%|          | 0/16002 [00:00<?, ? examples/s]

Map:   0%|          | 0/3998 [00:00<?, ? examples/s]

In [13]:
tokenized_training_data

Dataset({
    features: ['text', 'id', 'author', 'subreddit', 'rater_id', 'level0', 'level1', 'level2', 'level3', 'label', '__index_level_0__', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 16002
})

In [14]:
annoyance_example = random.sample(list(dataset.id[dataset.level0 == "annoyance"]), k=1) # example for annoyance
dataset.query('id==@annoyance_example')

Unnamed: 0,text,id,author,subreddit,rater_id,level0,level1,level2,level3,label
133015,The fact that the passenger gives a thumbs up drives me crazy,eej9uky,dantheman7480,SweatyPalms,11,annoyance,ang_ann,dis_ang_ann,dis_ang_ann_dis,9


In [15]:
desire_example = random.sample(list(dataset.id[dataset.level0 == "desire"]), k=1) # example for desire
dataset.query('id==@desire_example')

Unnamed: 0,text,id,author,subreddit,rater_id,level0,level1,level2,level3,label
22852,I wish everything the I was going through was a hallucination,edyxv3p,1000asses,mentalhealth,81,desire,des_opt,des_opt_car,des_opt_car,24


In [16]:
# check if data set is balanced
classCounts = dataset.level0.value_counts() 
print(classCounts)
# -> not balanced

level0
neutral           6482
approval          1311
admiration        1278
annoyance          956
disapproval        859
gratitude          843
amusement          695
curiosity          689
anger              604
confusion          589
love               582
joy                539
disappointment     516
optimism           501
realization        492
caring             454
sadness            452
surprise           391
excitement         371
disgust            332
desire             247
fear               208
remorse            178
embarrassment      152
relief              95
pride               82
nervousness         70
grief               32
Name: count, dtype: int64


In [17]:
numberOfDocuments = len(dataset)
numberOfDocuments

20000

In [18]:
# from huggingface_hub import notebook_login
# notebook_login()

# Classifier

In [19]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer) # Padding -> map all tensors to the same size
data_collator

DataCollatorWithPadding(tokenizer=BertTokenizerFast(name_or_path='bert-base-cased', vocab_size=28996, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True), padding=True, max_length=None, pad_to_multiple_of=None, return_tensors='pt')

In [20]:
accuracy = evaluate.load("accuracy") # define evaluation method -> quality

In [21]:
def compute_metrics(eval_pred): # function calculation metric
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [22]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-cased", num_labels=28, id2label=id2label, label2id=label2id
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initi

In [23]:
# training

training_args = TrainingArguments(
    output_dir="../models/model_level0",
    learning_rate=2e-5,  # standard
    per_device_train_batch_size=12, # size in which chunks are entered into the network, on how many data parallel weights are trained
    per_device_eval_batch_size=12,
    num_train_epochs=10,
    weight_decay=0.01,
    evaluation_strategy="epoch", # save model per epoch
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False #,
    #label_names=["level0"],
)

# IMPORTANT: Set: Model, dataset, ... , define learning process, metrics, ...

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_training_data,
    eval_dataset=tokenized_testing_data,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics  
)

#checkpointing
#use cuda
trainer.train()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.151,2.010411,0.438969
2,1.828,1.983874,0.438469
3,1.554,2.073496,0.42096
4,1.2399,2.232963,0.409455
5,1.008,2.437846,0.384692
6,0.8247,2.650943,0.381441
7,0.6649,2.823755,0.376188
8,0.5605,2.986976,0.37919
9,0.4852,3.110445,0.38019
10,0.3928,3.175715,0.374937


TrainOutput(global_step=13340, training_loss=1.0693066907250721, metrics={'train_runtime': 6260.2805, 'train_samples_per_second': 25.561, 'train_steps_per_second': 2.131, 'total_flos': 4.211285975801856e+16, 'train_loss': 1.0693066907250721, 'epoch': 10.0})

In [24]:
trainer.save_model("../models/model_level0_sample")

In [25]:
classifier = pipeline("text-classification", model="../models/model_level0_sample",device=0) # method pipeline -> string for text classificaton, folder, device (graphics card)
results = [classifier(text,truncation=True) for text in tqdm(dataset.text.to_list())] # list comprehension over all texts, tokenization in model, truncation -> padding too long texts

100%|██████████| 20000/20000 [03:54<00:00, 85.36it/s]


In [26]:
results = [tmp[0] for tmp in results]

In [27]:
results # list of dictionaries with labels, score -> decision and how high activation function for decision was
pd.DataFrame(results).to_pickle("../results/results_level0_sample.pkl")  # convert as dataframe, pick, safe

### Evaluation

In [28]:
df_results = pd.DataFrame.from_dict(results)
df_id =  pd.DataFrame(dataset["id"])
df_id = df_id.reset_index()
df_results["id"] = df_id["id"]
df_results

Unnamed: 0,label,score,id
0,neutral,0.556368,ed0xjb6
1,neutral,0.554485,ee6rr87
2,neutral,0.480834,ees9uev
3,optimism,0.547510,ef6g4jt
4,neutral,0.278006,eecrhn4
...,...,...,...
19995,admiration,0.652712,eez329z
19996,disgust,0.193177,ef1u8dh
19997,remorse,0.379275,ee18oid
19998,neutral,0.424216,ef7jkmz


In [29]:
data_classifies = pd.merge(dataset, df_results, on='id', how='left')

# merge classified data with original training data
# combine data with training data, concatenate results results and training data
# compare -> calculate f1

In [30]:
data_classifies.to_pickle("../results/data_classified_level0_sample.pkl")  
# data_classifies = pd.read_pickle("../results/data_classified_level0_1epoch.pkl")

In [31]:
data_classifies # contain gold standard and ergbnis cluster -> calculate F1, Precision, Recall
# label -> assigned by classifier (?????)
# level0 -> original label

Unnamed: 0,text,id,author,subreddit,rater_id,level0,level1,level2,level3,label_x,label_y,score
0,"I actually heard [NAME] inviting [NAME] and [NAME] to be on her show, after the interview with [NAME] ex wife.",ed0xjb6,unidrogon,90dayfianceuncensored,70,neutral,neutral,neutral,neutral,1,neutral,0.556368
1,It really do,ee6rr87,Slats04,forhonor,4,neutral,neutral,neutral,neutral,1,neutral,0.554485
2,Wanted something more on Imperius Angiris Wrath ulti at lvl 20... :'( Nice buff to Valorus Pursuit though!,ees9uev,Remus88Romulus,heroesofthestorm,42,sadness,dis_sad,dis_sad_gri,rem_emb_dis_sad_gri,0,neutral,0.480834
3,I hope xe had a nice talk with the premier,ef6g4jt,thrown_41232,ontario,15,optimism,des_opt,des_opt_car,des_opt_car,11,optimism,0.547510
4,Seriously!? Wouldn't a guy do the same thing if a girl didn't even attempt to get him off?,eecrhn4,hangry_potato,AskWomen,2,curiosity,cur_con,sur_cur_con,sur_cur_con,19,neutral,0.278006
...,...,...,...,...,...,...,...,...,...,...,...,...
25597,"4 lives essentially, its insane",ef1u8dh,trkh,self,70,embarrassment,embarrassment,embarrassment,embarrassment,17,disgust,0.193177
25598,I regret that I understand that reference :(,ee18oid,slinkslowdown,tifu,72,remorse,rem_emb,rem_emb,rem_emb_dis_sad_gri,14,remorse,0.379275
25599,I thought you were just going to wait her out. You actually engineered her loss of space to someone else. WELL DONE!!!!,ef7jkmz,Fluffydress,pettyrevenge,41,admiration,pri_adm,pri_adm_gra_rel,pri_adm_gra_rel_app_rea,8,neutral,0.424216
25600,And riding one makes you look like a man trying to be tough during his mid life crisis.,eerdryr,Blacky05,forwardsfromgrandma,20,neutral,neutral,neutral,neutral,1,neutral,0.461723


In [32]:
test_data = data_classifies.query(f'id in {tokenized_testing_data["id"]}')
# tokenized_testing_data: Daten der Testdaten

In [34]:
target_names = ['sadness', 'neutral', 'love', 'gratitude', 'disapproval',
       'amusement', 'disappointment', 'realization', 'admiration',
       'annoyance', 'confusion', 'optimism', 'excitement', 'caring',
       'remorse', 'joy', 'approval', 'embarrassment', 'surprise',
       'curiosity', 'anger', 'grief', 'disgust', 'pride', 'desire',
       'relief', 'fear', 'nervousness']
print(classification_report(test_data.level0, test_data.label_y, target_names=target_names))
# level0 -> gold standard , label -> prediction

                precision    recall  f1-score   support

       sadness       0.58      0.59      0.59       426
       neutral       0.46      0.71      0.56       240
          love       0.42      0.36      0.39       224
     gratitude       0.20      0.10      0.13       347
   disapproval       0.34      0.16      0.21       491
     amusement       0.44      0.20      0.28       178
disappointment       0.58      0.13      0.21       204
   realization       0.76      0.06      0.11       211
    admiration       0.38      0.13      0.19        70
     annoyance       0.57      0.07      0.12       192
     confusion       0.23      0.09      0.13       302
      optimism       0.28      0.18      0.22       115
    excitement       0.58      0.15      0.23        48
        caring       0.25      0.03      0.05       134
       remorse       0.66      0.43      0.52        77
           joy       0.86      0.72      0.79       261
      approval       0.00      0.00      0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
pd.DataFrame.from_dict(results)

Unnamed: 0,label,score
0,neutral,0.556368
1,neutral,0.554485
2,neutral,0.480834
3,optimism,0.547510
4,neutral,0.278006
...,...,...
19995,admiration,0.652712
19996,disgust,0.193177
19997,remorse,0.379275
19998,neutral,0.424216


In [36]:
#Final Classification/Viz
#final = pd.concat([dataset, pd.DataFrame.from_dict(results)],axis=1) # attach classified label to data
final = data_classifies.copy()
final['label_y'].value_counts()/final['label_y'].value_counts().sum() # ratio

label_y
neutral           0.528552
admiration        0.064448
amusement         0.053511
gratitude         0.036169
approval          0.035935
love              0.035818
anger             0.032341
annoyance         0.031404
joy               0.027732
surprise          0.023397
disapproval       0.022186
disgust           0.017381
sadness           0.016366
optimism          0.015272
remorse           0.014413
caring            0.014140
confusion         0.008554
fear              0.006796
desire            0.005859
excitement        0.003359
curiosity         0.003242
disappointment    0.002344
embarrassment     0.000781
Name: count, dtype: float64

In [None]:
! date