In [1]:
import json
import torch
import json_repair

import pandas as pd

from tqdm import tqdm
from datasets import Dataset
from datasets import load_dataset

from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

Unsloth currently does not support multi GPU setups - but we are working on it!
Multiple CUDA devices detected but we require a single device.
We will override CUDA_VISIBLE_DEVICES to first device: 0.


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    #model_name="unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
    model_name="unsloth/Qwen2.5-32B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Qwen2 patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], 
    use_rslora=True,
    use_gradient_checkpointing="unsloth"
)

Unsloth 2024.12.4 patched 64 layers with 64 QKV layers, 64 O layers and 64 MLP layers.


## data

In [4]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/dataset_files/comics_dataset.csv")

In [5]:
def get_emotions(row):

    utterance_emotions = row.emotion
    utterance_emotions_l = []
    emotion_class_labels = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy"]

    if utterance_emotions == 'Neutral':
        
        utterance_emotions_l.append(utterance_emotions)
    
    else:
        utterance_emotions = utterance_emotions.split("-")
       
        #emotion_annotation_l = []

        for idx, emotion_annotation in enumerate(utterance_emotions):

            if '0' not in emotion_annotation:
         
                #emotion_annotation_l.append(emotion_class_labels[idx])
                #utterance_emotions_l.append(emotion_annotation[:-1])
                utterance_emotions_l.append(emotion_class_labels[idx])
            
        #title_emotions_l.append(emotion_annotation_l)
                

    return utterance_emotions_l

In [6]:
df['emotion_u'] = df.apply(lambda row: get_emotions(row), axis=1)

In [7]:
def build_instruction():
    emotion_classes = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"]
    formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])
    
    instruction = f"""### Emotion Analysis Expert Role

You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.

INPUT:
- You will receive a single utterance from a comic book
- The utterance may express one or multiple emotions

TASK:
1. Carefully analyze the emotional context and tone of the utterance
2. Identify applicable emotions from the following classes:
   {formatted_classes}

OUTPUT REQUIREMENTS:
- Format: JSON object with a single key "list_emotion_classes"
- Value: Array of one or more emotion classes as strings

IMPORTANT NOTES:
- Do not include any explanations in the output, only the JSON object

"""
    return instruction

In [8]:
df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split,emotion_u
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,1,DID YOU HAVE TO ELECTROCUTE HER SO HARD?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE3-SA0-SU5-JO0,ID-1,TRAIN,"[Fear, Surprise]"
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,2,IT'S NOT LIKE I HAVE DIFFERENT SETTINGS.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU5-JO0,ID-2,TRAIN,[Surprise]
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,3,YOU'RE ELECTROCUTIONER. IT'S YOUR WHOLE THING....,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE2-SA0-SU0-JO0,ID-1,TRAIN,[Fear]
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,3,1,"OH, HEY. I THINK SHE'S AWAKE.",2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU4-JO0,ID-2,TRAIN,[Surprise]
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,4,1,"WELCOME BACK, MADAM MAYOR. BLOCKBUSTER IS PRET...",2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN3-DI0-FE0-SA0-SU0-JO0,ID-1,TRAIN,[Anger]
...,...,...,...,...,...,...,...,...,...,...,...,...
7124,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN,"[Fear, Sadness]"
7125,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN,"[Fear, Sadness]"
7126,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN,"[Fear, Surprise]"
7127,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,22,1,1,PUT YOUR WEAPONS DOWN AND PUT YOUR HANDS IN TH...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:ID- 2,AN4-DI0-FE0-SA0-SU0-JO0,ID- 2,TRAIN,[Anger]


In [9]:
df_train = df[df.split == "TRAIN"].reset_index(drop=True)

In [10]:
df_train.shape

(5803, 12)

### Comics dataset

In [11]:
human_msg_l = []
assistant_msg_l = []

for _, row in df_train.iterrows():
        
        human_msg = {'from': 'human', 'value': build_instruction() + "Now classify this utterance: " + row['utterance']}
        
        #obj = {"list_emotion_classes": row['emotion_u']}
        obj = row['emotion_u']
        #assistant_msg = {'from': 'gpt', 'value': obj}
        assistant_msg = {'from': 'gpt', 'value': f'{{"list_emotion_classes": {obj}}}'}

        
        human_msg_l.append(human_msg)
        assistant_msg_l.append(assistant_msg)
        

In [12]:
comics_dataset = []

for i in range(len(human_msg_l)):
    
    #obj = {"list_emotion_classes": ["Anger", "Fear"]}

    comics_dataset.append([human_msg_l[i], assistant_msg_l[i]])

In [13]:
len(comics_dataset)

5803

In [14]:
def fix_comics_dataset(comics_dataset):
    fixed_comics_dataset = []
    for conversation in comics_dataset:
        fixed_conversation = []
        for message in conversation:
            if isinstance(message['value'], list):  # If the 'value' is a list of emotions
                message['value'] = ', '.join(message['value'])  # Join the list into a string
            fixed_conversation.append(message)
        fixed_comics_dataset.append(fixed_conversation)
    return fixed_comics_dataset

In [15]:
fixed_comics_dataset = fix_comics_dataset(comics_dataset)

In [16]:
dataset = Dataset.from_dict({
    'conversations': fixed_comics_dataset
})

In [17]:
dataset

Dataset({
    features: ['conversations'],
    num_rows: 5803
})

In [18]:
dataset[0]['conversations']

[{'from': 'human',
  'value': '### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.\n\nINPUT:\n- You will receive a single utterance from a comic book\n- The utterance may express one or multiple emotions\n\nTASK:\n1. Carefully analyze the emotional context and tone of the utterance\n2. Identify applicable emotions from the following classes:\n   "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"\n\nOUTPUT REQUIREMENTS:\n- Format: JSON object with a single key "list_emotion_classes"\n- Value: Array of one or more emotion classes as strings\n\nIMPORTANT NOTES:\n- Do not include any explanations in the output, only the JSON object\n\nNow classify this utterance: DID YOU HAVE TO ELECTROCUTE HER SO HARD?'},
 {'from': 'gpt',
  'value': '{"list_emotion_classes": [\'Fear\', \'Surprise\']}'}]

In [19]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    chat_template="chatml",
)

def apply_template_comics(examples):
    messages = examples["conversations"]
    #messages = examples['input'] + examples['output']
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


In [20]:
comics_dataset = dataset.map(apply_template_comics, batched=True)

Map:   0%|          | 0/5803 [00:00<?, ? examples/s]

In [21]:
comics_dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 5803
})

In [22]:
def split_dataset(dataset, train_ratio=0.8):
    train_test = dataset.train_test_split(test_size=1 - train_ratio)
    return train_test

dataset_split = split_dataset(comics_dataset)

In [23]:
train_dataset = dataset_split['train']
eval_dataset = dataset_split['test']

In [24]:
train_dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 4642
})

In [25]:
print(train_dataset[1360]['text'])

<|im_start|>user
### Emotion Analysis Expert Role

You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.

INPUT:
- You will receive a single utterance from a comic book
- The utterance may express one or multiple emotions

TASK:
1. Carefully analyze the emotional context and tone of the utterance
2. Identify applicable emotions from the following classes:
   "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"

OUTPUT REQUIREMENTS:
- Format: JSON object with a single key "list_emotion_classes"
- Value: Array of one or more emotion classes as strings

IMPORTANT NOTES:
- Do not include any explanations in the output, only the JSON object

Now classify this utterance: NATHAN, YOU KNOW AS WELL AS I DO THAT CONRAD PAULSON IS WORTH TAKING DOWN.<|im_end|>
<|im_start|>assistant
{"list_emotion_classes": ['Anger']}<|im_end|>



In [26]:
args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="cosine",
        per_device_train_batch_size=16,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=100,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        
        eval_strategy="steps",  # Run evaluation during training (can also use "epoch")
        eval_steps=50,  # Perform evaluation every 50 steps
        save_strategy="steps",  # Save the model every few steps
        save_steps=50,  # Save every 200 steps
        load_best_model_at_end=True,
    
        output_dir="ft_uns_comics_qwen2.5",
        seed=0,
    )

In [27]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,  # Replace with your train dataset
    eval_dataset=eval_dataset, 
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=args,
)

Map (num_proc=2):   0%|          | 0/4642 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/1161 [00:00<?, ? examples/s]

In [28]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 4,642 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 2
\        /    Total batch size = 32 | Total steps = 435
 "-____-"     Number of trainable parameters = 134,217,728


Step,Training Loss,Validation Loss
50,No log,0.226696
100,0.733800,0.220214
150,0.733800,0.220964
200,0.402300,0.223569
250,0.402300,0.222638
300,0.354700,0.237229
350,0.354700,0.243921
400,0.256100,0.242972


TrainOutput(global_step=435, training_loss=0.42161094840915725, metrics={'train_runtime': 3374.1932, 'train_samples_per_second': 4.127, 'train_steps_per_second': 0.129, 'total_flos': 6.313861212991488e+17, 'train_loss': 0.42161094840915725, 'epoch': 2.9828178694158076})

In [29]:
model = FastLanguageModel.for_inference(model)

In [30]:
df_test = df[df.split == "TEST"].reset_index(drop=True)

In [31]:
human_msg_l = []
assistant_msg_l = []

for _, row in df_test.iterrows():
        
        human_msg = {'role': 'user', 'content': build_instruction() + "Now classify this utterance: " + row['utterance']}
        
        #obj = {"list_emotion_classes": row['emotion_u']}
        obj = row['emotion_u']
        assistant_msg = {'role': 'assistant', 'content': ""}
        
        human_msg_l.append(human_msg)
        assistant_msg_l.append(assistant_msg)
        
        

In [32]:
test_messages = []

for i in range(len(human_msg_l)):
    
    #obj = {"list_emotion_classes": ["Anger", "Fear"]}

    test_messages.append([human_msg_l[i], assistant_msg_l[i]])

In [33]:
len(test_messages)

1326

In [34]:
# test_messages = test_messages[:100]

In [35]:
raw_outputs = []

for message in tqdm(test_messages):
    
    inputs = tokenizer.apply_chat_template(
    message,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")
    #print(inputs)
    #break
    
    output = model.generate(input_ids=inputs, max_new_tokens=128)[0]
    
    input_length = inputs.shape[1]
    generated_tokens = output[input_length:]
    
    decoded_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)  
    #decoded_output = tokenizer.decode(output, skip_special_tokens=True)
    raw_outputs.append(decoded_output)
    #break

  0%|          | 0/1326 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|██████████| 1326/1326 [24:06<00:00,  1.09s/it]


In [36]:
#print(tokenizer.decode(inputs[0]))

In [37]:
len(raw_outputs)

1326

In [38]:
raw_outputs

['{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Joy\']}',
 '{"list_emotion_classes": [\'Anger\']}',
 '{"list_emotion_classes": [\'Anger\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Anger\', \'Fear\']}',
 '{"list_emotion_classes": [\'Anger\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Fear\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Surprise\']}',
 '{"list_emotion_classes": [\'Joy\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Fear\', 

In [39]:
grounds = df_test.emotion_u.tolist()

In [40]:
len(grounds)

1326

In [41]:
predictions = [e.split('\n\n')[0] for e in raw_outputs]
#predictions = [json.loads(e) for e in raw_outputs]

In [42]:
len(predictions)

1326

In [43]:
predictions

['{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Joy\']}',
 '{"list_emotion_classes": [\'Anger\']}',
 '{"list_emotion_classes": [\'Anger\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Anger\', \'Fear\']}',
 '{"list_emotion_classes": [\'Anger\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Fear\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Surprise\']}',
 '{"list_emotion_classes": [\'Joy\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Fear\', 

In [44]:
predictions = [json_repair.loads(e) for e in predictions]

In [45]:
predictions

[{'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Joy']},
 {'list_emotion_classes': ['Anger']},
 {'list_emotion_classes': ['Anger', 'Sadness']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Anger', 'Fear']},
 {'list_emotion_classes': ['Anger', 'Sadness']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Fear']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Fear', 'Sadness']},
 {'list_emotion_classes': ['Fear', 'Sadness']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Surprise']},
 {'list_emotion_classes': ['Joy']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Joy']},
 {'list_emotion_classes': ['Anger', 'Disgust']},
 {

In [46]:
preds_l = []
bad_idx = []

for i, pred in enumerate(predictions):
    try:
        
        preds_l.append(pred['list_emotion_classes'])
    except:
        print(i)
        bad_idx.append(i)

In [47]:
grounds = [item for i, item in enumerate(grounds) if i not in bad_idx]

In [48]:
len(grounds), len(preds_l)

(1326, 1326)

In [49]:
preds_l

[['Fear', 'Surprise'],
 ['Joy'],
 ['Anger'],
 ['Anger', 'Sadness'],
 ['Fear', 'Surprise'],
 ['Anger', 'Fear'],
 ['Anger', 'Sadness'],
 ['Fear', 'Surprise'],
 ['Fear'],
 ['Sadness'],
 ['Sadness'],
 ['Fear', 'Surprise'],
 ['Sadness'],
 ['Fear', 'Sadness'],
 ['Fear', 'Sadness'],
 ['Neutral'],
 ['Surprise'],
 ['Joy'],
 ['Sadness'],
 ['Neutral'],
 ['Fear', 'Surprise'],
 ['Joy'],
 ['Anger', 'Disgust'],
 ['Fear', 'Surprise'],
 ['Neutral'],
 ['Joy'],
 ['Anger'],
 ['Sadness'],
 ['Sadness'],
 ['Anger', 'Sadness'],
 ['Sadness'],
 ['Sadness', 'Surprise'],
 ['Joy'],
 ['Sadness'],
 ['Anger', 'Fear'],
 ['Joy'],
 ['Joy'],
 ['Joy'],
 ['Anger', 'Sadness'],
 ['Joy'],
 ['Neutral'],
 ['Anger', 'Fear'],
 ['Joy'],
 ['Fear', 'Surprise'],
 ['Fear', 'Surprise'],
 ['Fear', 'Surprise'],
 ['Fear'],
 ['Fear'],
 ['Surprise'],
 ['Sadness'],
 ['Fear'],
 ['Anger', 'Fear'],
 ['Surprise'],
 ['Fear', 'Surprise'],
 ['Anger'],
 ['Fear', 'Surprise'],
 ['Neutral'],
 ['Anger', 'Fear'],
 ['Neutral'],
 ['Joy'],
 ['Joy'],
 ['Neut

In [50]:
mlb = MultiLabelBinarizer()

In [51]:
y_true_mhot = mlb.fit_transform(grounds)
y_pred_mhot = mlb.transform(preds_l)

In [52]:
y_pred_mhot.shape

(1326, 7)

In [53]:
y_pred_mhot.shape

(1326, 7)

In [54]:
print(classification_report(y_true_mhot, y_pred_mhot, target_names=mlb.classes_, digits=3))

              precision    recall  f1-score   support

       Anger      0.561     0.529     0.544       454
     Disgust      0.185     0.100     0.130        50
        Fear      0.389     0.515     0.443       299
         Joy      0.509     0.461     0.484       297
     Neutral      0.443     0.248     0.318       109
     Sadness      0.392     0.523     0.448       344
    Surprise      0.724     0.369     0.489       355

   micro avg      0.480     0.458     0.469      1908
   macro avg      0.458     0.392     0.408      1908
weighted avg      0.509     0.458     0.468      1908
 samples avg      0.491     0.467     0.457      1908

