In [1]:
import torch
import json_repair
import pandas as pd

from tqdm import tqdm
from datasets import Dataset
from datasets import load_dataset
from itertools import chain

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from huggingface_hub import login

hf_key = "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"
login(hf_key)

In [3]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    #model_name="unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], 
    use_rslora=True,
    use_gradient_checkpointing=True
)

Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## data

In [5]:
dd_dataset = load_dataset("li2017dailydialog/daily_dialog", trust_remote_code=True)

In [6]:
dd_dataset

DatasetDict({
    train: Dataset({
        features: ['dialog', 'act', 'emotion'],
        num_rows: 11118
    })
    validation: Dataset({
        features: ['dialog', 'act', 'emotion'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['dialog', 'act', 'emotion'],
        num_rows: 1000
    })
})

In [7]:
dd_dataset['train'][0] # type: ignore

{'dialog': ['Say , Jim , how about going for a few beers after dinner ? ',
  ' You know that is tempting but is really not good for our fitness . ',
  ' What do you mean ? It will help us to relax . ',
  " Do you really think so ? I don't . It will just make us fat and act silly . Remember last time ? ",
  " I guess you are right.But what shall we do ? I don't feel like sitting at home . ",
  ' I suggest a walk over to the gym where we can play singsong and meet some of our friends . ',
  " That's a good idea . I hear Mary and Sally often go there to play pingpong.Perhaps we can make a foursome with them . ",
  ' Sounds great to me ! If they are willing , we could ask them to go dancing with us.That is excellent exercise and fun , too . ',
  " Good.Let ' s go now . ",
  ' All right . '],
 'act': [3, 4, 2, 2, 2, 3, 4, 1, 3, 4],
 'emotion': [0, 0, 0, 0, 0, 0, 4, 4, 4, 4]}

In [8]:
dd_utterances_train_l = []
dd_emotions_train_l = []

for i in range(len(dd_dataset['train'])): 
    
    dd_utterances_train_l.append(dd_dataset['train'][i]['dialog'])
    dd_emotions_train_l.append(dd_dataset['train'][i]['emotion'])
    
utterances_train_l = list(chain.from_iterable(dd_utterances_train_l))
emotions_train_l = list(chain.from_iterable(dd_emotions_train_l))

In [9]:
dd_utterances_test_l = []
dd_emotions_test_l = []

for i in range(len(dd_dataset['test'])): 
    
    dd_utterances_test_l.append(dd_dataset['test'][i]['dialog'])
    dd_emotions_test_l.append(dd_dataset['test'][i]['emotion'])
    
utterances_test_l = list(chain.from_iterable(dd_utterances_test_l))
emotions_test_l = list(chain.from_iterable(dd_emotions_test_l))

In [10]:
len(utterances_test_l), len(emotions_test_l)

(7740, 7740)

In [11]:
dd_utterances_val_l = []
dd_emotions_val_l = []

for i in range(len(dd_dataset['validation'])): 
    
    dd_utterances_val_l.append(dd_dataset['validation'][i]['dialog'])
    dd_emotions_val_l.append(dd_dataset['validation'][i]['emotion'])
    
utterances_val_l = list(chain.from_iterable(dd_utterances_val_l))
emotions_val_l = list(chain.from_iterable(dd_emotions_val_l))

In [12]:
df_train = pd.DataFrame({
    'utterance': utterances_train_l,
    'emotion': emotions_train_l
})

In [13]:
df_test = pd.DataFrame({
    'utterance': utterances_test_l,
    'emotion': emotions_test_l
})

In [14]:
df_val = pd.DataFrame({
    'utterance': utterances_val_l,
    'emotion': emotions_val_l
})

## DD dataset messages

In [15]:
emotion_dict = {
    0: "neutral",
    1: "anger",
    2: "disgust",
    3: "fear",
    4: "happiness",
    5: "sadness",
    6: "surprise"
}

In [16]:
def map_emotions(emotion_tag):
    
    return {"emotion": [emotion_dict.get(emotion_tag)]}

In [17]:
def build_instruction():
    
    emotion_classes = emotions = ["neutral", "anger", "disgust", "fear", "happiness", "sadness", "surprise"]
    
    #formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])
    
    instruction = f"""

### Instruction:
You are an advanced english language expert specializing in emotion recognition of sentences. Analyze the following sentences and identify the emotions expressed.

INPUT:
- A single sentence from an English dialogue.

OUTPUT:
- JSON with single key "emotions"
- Value: a list with EXACTLY ONE emotion class.
- ONLY use these emotions: {emotion_classes}
- NO OTHER EMOTION CLASSES ARE ALLOWED

RULES:
1. Each utterance must have ONLY ONE least one emotion from the list above
2. Multiple emotions per utterance are NOT allowed
3. No explanations, only JSON output

Example format:
{{"emotion": ['anger]}}

"""
    
    return instruction

In [18]:
instruction = build_instruction()

### train and eval dataset

In [19]:
sys_msg_l = []
human_msg_l = []
assistant_msg_l = []

for _, row in df_train.iterrows():
        
        #prompt = instruction.replace("<comic_title>", row['comics_title']).replace("<speaker_id>", row['speaker_id']).replace("<utterance>", row['utterance'])
        
        sys_msg = {'from': 'sys', 'value': instruction}
        usr_msg = {'from': 'human', 'value': f"""Now classify the emotion of this sentence: {row.utterance}"""}
        
        #emotion = map_emotions(row.emotion)

        assistant_msg = {'from': 'gpt', 'value': f"""{map_emotions(row.emotion)}"""}

        sys_msg_l.append(sys_msg)
        human_msg_l.append(usr_msg)
        assistant_msg_l.append(assistant_msg)
        

In [20]:
dd_dataset_train = []

for i in range(len(human_msg_l)):    

    dd_dataset_train.append([sys_msg_l[i], human_msg_l[i], assistant_msg_l[i]])

In [21]:
dd_dataset_train[0]

[{'from': 'sys',
  'value': '\n\n### Instruction:\nYou are an advanced english language expert specializing in emotion recognition of sentences. Analyze the following sentences and identify the emotions expressed.\n\nINPUT:\n- A single sentence from an English dialogue.\n\nOUTPUT:\n- JSON with single key "emotions"\n- Value: a list with EXACTLY ONE emotion class.\n- ONLY use these emotions: [\'neutral\', \'anger\', \'disgust\', \'fear\', \'happiness\', \'sadness\', \'surprise\']\n- NO OTHER EMOTION CLASSES ARE ALLOWED\n\nRULES:\n1. Each utterance must have ONLY ONE least one emotion from the list above\n2. Multiple emotions per utterance are NOT allowed\n3. No explanations, only JSON output\n\nExample format:\n{"emotion": [\'anger]}\n\n'},
 {'from': 'human',
  'value': 'Now classify the emotion of this sentence: Say , Jim , how about going for a few beers after dinner ? '},
 {'from': 'gpt', 'value': "{'emotion': ['neutral']}"}]

In [22]:
dd_dataset_train = Dataset.from_dict({
    'conversations': dd_dataset_train
})

In [23]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt", "system": "sys"},
)

def apply_template_comics(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

Unsloth: Will map <|im_end|> to EOS = <|eot_id|>.


In [24]:
dd_dataset_train = dd_dataset_train.map(apply_template_comics, batched=True)

Map:   0%|          | 0/87170 [00:00<?, ? examples/s]

In [25]:
### EVAL

In [26]:
sys_msg_l = []
human_msg_l = []
assistant_msg_l = []

for _, row in df_val.iterrows():
        
        #prompt = instruction.replace("<comic_title>", row['comics_title']).replace("<speaker_id>", row['speaker_id']).replace("<utterance>", row['utterance'])
        
        sys_msg = {'from': 'sys', 'value': instruction}
        usr_msg = {'from': 'human', 'value': f"""Now classify the emotion of this sentence: {row.utterance}"""}

        assistant_msg = {'from': 'gpt', 'value': f"""{map_emotions(row.emotion)}"""}

        sys_msg_l.append(sys_msg)
        human_msg_l.append(usr_msg)
        assistant_msg_l.append(assistant_msg)
        

In [27]:
dd_dataset_val = []

for i in range(len(human_msg_l)):    

    dd_dataset_val.append([sys_msg_l[i], human_msg_l[i], assistant_msg_l[i]])

In [28]:
dd_dataset_val = Dataset.from_dict({
    'conversations': dd_dataset_val
})

In [29]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt", "system": "sys"},
)

def apply_template_comics(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


In [30]:
dd_dataset_val = dd_dataset_val.map(apply_template_comics, batched=True)

Map:   0%|          | 0/8069 [00:00<?, ? examples/s]

In [31]:
print(dd_dataset_train[0]['text'])

<|im_start|>system


### Instruction:
You are an advanced english language expert specializing in emotion recognition of sentences. Analyze the following sentences and identify the emotions expressed.

INPUT:
- A single sentence from an English dialogue.

OUTPUT:
- JSON with single key "emotions"
- Value: a list with EXACTLY ONE emotion class.
- ONLY use these emotions: ['neutral', 'anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
- NO OTHER EMOTION CLASSES ARE ALLOWED

RULES:
1. Each utterance must have ONLY ONE least one emotion from the list above
2. Multiple emotions per utterance are NOT allowed
3. No explanations, only JSON output

Example format:
{"emotion": ['anger]}

<|im_end|>
<|im_start|>user
Now classify the emotion of this sentence: Say , Jim , how about going for a few beers after dinner ? <|im_end|>
<|im_start|>assistant
{'emotion': ['neutral']}<|im_end|>



In [32]:
print(dd_dataset_val[0]['text'])

<|im_start|>system


### Instruction:
You are an advanced english language expert specializing in emotion recognition of sentences. Analyze the following sentences and identify the emotions expressed.

INPUT:
- A single sentence from an English dialogue.

OUTPUT:
- JSON with single key "emotions"
- Value: a list with EXACTLY ONE emotion class.
- ONLY use these emotions: ['neutral', 'anger', 'disgust', 'fear', 'happiness', 'sadness', 'surprise']
- NO OTHER EMOTION CLASSES ARE ALLOWED

RULES:
1. Each utterance must have ONLY ONE least one emotion from the list above
2. Multiple emotions per utterance are NOT allowed
3. No explanations, only JSON output

Example format:
{"emotion": ['anger]}

<|im_end|>
<|im_start|>user
Now classify the emotion of this sentence: Good morning , sir . Is there a bank near here ? <|im_end|>
<|im_start|>assistant
{'emotion': ['neutral']}<|im_end|>



In [33]:
dd_dataset_val

Dataset({
    features: ['conversations', 'text'],
    num_rows: 8069
})

## TRAINING 

In [34]:
FastLanguageModel.for_training(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [35]:
OUTPUT_DIR = "/Utilisateurs/umushtaq/emotion_analysis_comics/output_dirs_dd_ft"

In [36]:
args=TrainingArguments(
    
        learning_rate=3e-4,
        lr_scheduler_type="cosine",
        per_device_train_batch_size=256,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=50,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        
        eval_strategy="steps",  # Run evaluation during training (can also use "epoch")
        eval_steps=50,  # Perform evaluation every 50 steps
        save_strategy="steps",  # Save the model every few steps
        save_steps=50,  # Save every 200 steps
        load_best_model_at_end=True,
    
        output_dir=OUTPUT_DIR,
        seed=0,
        
    )

In [37]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dd_dataset_train,  # Replace with your train dataset
    eval_dataset=dd_dataset_val, 
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=args,
)

Map (num_proc=2):   0%|          | 0/87170 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/8069 [00:00<?, ? examples/s]

In [38]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 87,170 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 256 | Gradient Accumulation steps = 2
\        /    Total batch size = 512 | Total steps = 510
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss,Validation Loss
50,0.9619,0.187421
100,0.3802,0.184905
150,0.3739,0.182792
200,0.3668,0.181979
250,0.361,0.180454
300,0.3584,0.179548
350,0.3522,0.180655
400,0.3344,0.179627
450,0.3361,0.179439
500,0.3344,0.179727


TrainOutput(global_step=510, training_loss=0.41442000258202644, metrics={'train_runtime': 11798.5815, 'train_samples_per_second': 22.165, 'train_steps_per_second': 0.043, 'total_flos': 3.351766689953415e+18, 'train_loss': 0.41442000258202644, 'epoch': 2.9853372434017595})

In [39]:
model = FastLanguageModel.for_inference(model)

In [103]:
#df_test = df_s[df_s.split == "TEST"].reset_index(drop=True)

In [40]:
sys_msg_l = []
human_msg_l = []
assistant_msg_l = []

for _, row in df_test.iterrows():
        
        #prompt = instruction.replace("<comic_title>", row['comics_title']).replace("<speaker_id>", row['speaker_id']).replace("<utterance>", row['utterance'])
        
        sys_msg = {'from': 'system', 'value': instruction}
        usr_msg = {'from': 'user', 'value': f"""Now classify the emotion of this sentence: {row.utterance}"""}
        
        #emotion = map_emotions(row.emotion)

        assistant_msg = {'from': 'assistant', 'value': ""}

        sys_msg_l.append(sys_msg)
        human_msg_l.append(usr_msg)
        assistant_msg_l.append(assistant_msg)
        

In [41]:
dd_test_messages = []

for i in range(len(human_msg_l)):    

    dd_test_messages.append([sys_msg_l[i], human_msg_l[i], assistant_msg_l[i]])

In [42]:
len(dd_test_messages)

7740

In [43]:
# test_messages = test_messages[:100]

In [47]:
raw_outputs = []

for message in tqdm(dd_test_messages):
    
    inputs = tokenizer.apply_chat_template(
    message,
    tokenize=True,
    add_generation_prompt=True,
    return_dict=True,
    return_tensors="pt",
).to("cuda")
    #print(inputs)
    #break
    
    output = model.generate(**inputs, max_new_tokens=128)[0]
    
    input_length = inputs.input_ids.shape[1]
    generated_tokens = output[input_length:]
    
    decoded_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)  
    #decoded_output = tokenizer.decode(output, skip_special_tokens=True)
    raw_outputs.append(decoded_output)
    #break

100%|██████████| 7740/7740 [43:50<00:00,  2.94it/s]


In [48]:
#print(tokenizer.decode(inputs[0]))

In [121]:
len(raw_outputs)

7740

In [122]:
raw_outputs[0]

"{'emotion': ['neutral']}"

In [123]:
grounds = df_test.emotion.tolist()

In [124]:
len(grounds)

7740

In [125]:
#predictions = [e.split('\n\n')[0] for e in raw_outputs]
#predictions = [json.loads(e) for e in raw_outputs]

In [126]:
#len(predictions)

In [127]:
#predictions

In [128]:
predictions = [json_repair.loads(e) for e in raw_outputs]

In [129]:
predictions

[{'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['happiness']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['happiness']},
 {'emotion': ['happiness']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral']},
 {'emotion': ['neutral

In [130]:
preds_l = []
bad_idx = []

for i, pred in enumerate(predictions):
    try:
        
        preds_l.append(pred['emotion'][0])
    except:
        print(i)
        bad_idx.append(i)

In [131]:
grounds = [item for i, item in enumerate(grounds) if i not in bad_idx]

In [132]:
len(grounds), len(preds_l)

(7740, 7740)

In [133]:
grounds

[0,
 6,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 4,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 0,
 5,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 4,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 6,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 4,
 0,
 0,
 4,
 0,
 0,
 5,
 0,
 0,
 6,
 0,
 0,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 4,
 4,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,


In [134]:
def map_emotions(emotion_tag):
    
    return emotion_dict.get(emotion_tag)

In [135]:
grounds = [map_emotions(elem) for elem in grounds]

In [136]:
grounds

['neutral',
 'surprise',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'fear',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral'

In [137]:
preds_l

['neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutra

In [138]:
exclude_class = 'neutral'

In [139]:
filtered_indices = [i for i, label in enumerate(grounds) if label != exclude_class]
y_true_filtered = [grounds[i] for i in filtered_indices]
y_pred_filtered = [preds_l[i] for i in filtered_indices]

In [140]:
len(y_true_filtered), len(y_pred_filtered)

(1419, 1419)

In [141]:
y_true_filtered

['surprise',
 'fear',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'sadness',
 'sadness',
 'sadness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'surprise',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'sadness',
 'surprise',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'sadness',
 'surprise',
 'sadness',
 'sadness',
 'surprise',
 'happiness',
 'anger',
 'anger',
 'anger',
 'happiness',
 'sadness',
 'surprise',
 'happiness',
 'happiness',
 'sadness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'sadness',
 'surprise',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'anger',
 'happiness',
 'happiness',
 'surprise',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',

In [142]:
y_pred_filtered

['neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'sadness',
 'neutral',
 'happiness',
 'happiness',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'happiness',
 'happiness',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'surprise',
 'happiness',
 'anger',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'surprise',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'neutral',
 'happiness',
 'neutral',
 'neutral',
 'neutral',
 'surprise',
 'neutral',
 'happiness',
 'neutral',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'happiness',
 'neutral',
 'neutral',
 'anger

In [152]:
print(classification_report(y_true_filtered, y_pred_filtered, zero_division=0))

              precision    recall  f1-score   support

       anger       0.78      0.21      0.33       118
     disgust       1.00      0.04      0.08        47
        fear       1.00      0.12      0.21        17
   happiness       0.98      0.47      0.63      1019
     neutral       0.00      0.00      0.00         0
     sadness       1.00      0.09      0.16       102
    surprise       0.76      0.45      0.57       116

    accuracy                           0.40      1419
   macro avg       0.79      0.20      0.28      1419
weighted avg       0.94      0.40      0.55      1419



In [144]:
from sklearn.metrics import classification_report, f1_score

In [155]:
micro_f1 = f1_score(y_true_filtered, y_pred_filtered, average="micro", zero_division=0)

In [156]:
micro_f1

0.4002818886539817

In [147]:
mlb = MultiLabelBinarizer()

In [148]:
y_true_mhot = mlb.fit_transform(grounds)
y_pred_mhot = mlb.transform(preds_l)

In [149]:
y_pred_mhot.shape

(7740, 14)

In [150]:
y_pred_mhot.shape

(7740, 14)

In [151]:
mlb.classes_

array(['a', 'd', 'e', 'f', 'g', 'h', 'i', 'l', 'n', 'p', 'r', 's', 't',
       'u'], dtype=object)

In [127]:
print(classification_report(y_true_mhot, y_pred_mhot, target_names=mlb.classes_, digits=3))

              precision    recall  f1-score   support

       Anger      0.545     0.574     0.559       399
     Disgust      0.250     0.213     0.230        47
        Fear      0.405     0.545     0.465       235
         Joy      0.593     0.430     0.499       244
     Neutral      0.321     0.286     0.303        63
     Sadness      0.517     0.519     0.518       314
    Surprise      0.650     0.480     0.553       279

   micro avg      0.514     0.498     0.506      1581
   macro avg      0.469     0.435     0.447      1581
weighted avg      0.527     0.498     0.507      1581
 samples avg      0.528     0.511     0.496      1581



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
