In [1]:
## Utterance level comics, FT, unilabel

In [2]:
import torch
import json_repair
import pandas as pd

from tqdm import tqdm
from datasets import Dataset

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    #model_name="unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    lora_alpha=32,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], 
    use_rslora=True,
    use_gradient_checkpointing=True
)

Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


### data

In [5]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/dataset_files/comics_dataset.csv")

In [6]:
df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,1,DID YOU HAVE TO ELECTROCUTE HER SO HARD?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE3-SA0-SU5-JO0,ID-1,TRAIN
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,2,IT'S NOT LIKE I HAVE DIFFERENT SETTINGS.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU5-JO0,ID-2,TRAIN
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,3,YOU'RE ELECTROCUTIONER. IT'S YOUR WHOLE THING....,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE2-SA0-SU0-JO0,ID-1,TRAIN
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,3,1,"OH, HEY. I THINK SHE'S AWAKE.",2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU4-JO0,ID-2,TRAIN
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,4,1,"WELCOME BACK, MADAM MAYOR. BLOCKBUSTER IS PRET...",2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN3-DI0-FE0-SA0-SU0-JO0,ID-1,TRAIN
...,...,...,...,...,...,...,...,...,...,...,...
7124,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
7125,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
7126,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN
7127,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,22,1,1,PUT YOUR WEAPONS DOWN AND PUT YOUR HANDS IN TH...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:ID- 2,AN4-DI0-FE0-SA0-SU0-JO0,ID- 2,TRAIN


In [7]:
import pandas as pd

def map_emotions(row):
    
    emotion_str = row.emotion
    emotions = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy"]
    
    try:
        parts = emotion_str.split('-')
        # Extract valid emotion and value pairs
        emotion_values = []
        for i, part in enumerate(parts):
            if len(part) > 2 and part[2:].isdigit():  # Ensure valid integer after the abbreviation
                value = int(part[2:])
                if value > 0:
                    emotion_values.append((i, value))
        
        if not emotion_values:
            return None  # No non-zero values

        # Find the emotion with the largest value, choose the first if tied
        max_emotion_index = max(emotion_values, key=lambda x: (x[1], -x[0]))[0]
        return emotions[max_emotion_index]

    except Exception as e:
        print(f"Error processing '{emotion_str}': {e}")
        return None

In [8]:
df['emotion_label'] = df.apply(lambda row: map_emotions(row), axis=1) # type: ignore

In [9]:
df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split,emotion_label
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,1,DID YOU HAVE TO ELECTROCUTE HER SO HARD?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE3-SA0-SU5-JO0,ID-1,TRAIN,Surprise
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,2,IT'S NOT LIKE I HAVE DIFFERENT SETTINGS.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU5-JO0,ID-2,TRAIN,Surprise
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,3,YOU'RE ELECTROCUTIONER. IT'S YOUR WHOLE THING....,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE2-SA0-SU0-JO0,ID-1,TRAIN,Fear
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,3,1,"OH, HEY. I THINK SHE'S AWAKE.",2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU4-JO0,ID-2,TRAIN,Surprise
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,4,1,"WELCOME BACK, MADAM MAYOR. BLOCKBUSTER IS PRET...",2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN3-DI0-FE0-SA0-SU0-JO0,ID-1,TRAIN,Anger
...,...,...,...,...,...,...,...,...,...,...,...,...
7124,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN,Sadness
7125,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN,Sadness
7126,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN,Surprise
7127,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,22,1,1,PUT YOUR WEAPONS DOWN AND PUT YOUR HANDS IN TH...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:ID- 2,AN4-DI0-FE0-SA0-SU0-JO0,ID- 2,TRAIN,Anger


In [10]:
df_train = df[df.split == "TRAIN"].reset_index(drop=True)

In [11]:
df.emotion_label.value_counts()

emotion_label
Anger       1972
Joy         1352
Sadness     1169
Fear        1105
Surprise    1001
Disgust       92
Name: count, dtype: int64

In [12]:
### Prepare Prompts

In [13]:
def build_instruction():
    
    emotion_classes = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"]
    formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])
    
    instruction = f"""### Emotion Analysis Expert Role

You are an advanced emotion analysis expert specializing in comics dialogue emotion interpretation.

INPUT:
- Single utterance to analyze

TASK:
Identify ONLY ONE applicable emotions from: {formatted_classes}

RULES:
1. Use ONLY the listed labels
2. Output must be a JSON with key "emotion"
4. Multiple emotions are NOT allowed
5. No explanations, only JSON output

"""
    return instruction

In [14]:
instruction = build_instruction()

In [15]:
sys_msg_l = []
user_msg_l = []
assistant_msg_l = []

for _, row in df_train.iterrows():
        
        sys_msg = {'role': 'system', 'content': instruction}
        user_msg = {'role': 'user', 'content': ""}
        assistant_msg = {'role': 'assistant', 'content': f'{{"emotion": {row["emotion_label"]}}}'}


        sys_msg_l.append(sys_msg)
        user_msg_l.append(user_msg)
        assistant_msg_l.append(assistant_msg)
        

In [16]:
comics_dataset = []

for i in range(len(sys_msg_l)):
    
    comics_dataset.append([sys_msg_l[i], user_msg_l[i], assistant_msg_l[i]])

In [17]:
def fix_comics_dataset(comics_dataset):
    fixed_comics_dataset = []
    for conversation in comics_dataset:
        fixed_conversation = []
        for message in conversation:
            if isinstance(message['content'], list):  # If the 'value' is a list of emotions
                message['content'] = ', '.join(message['content'])  # Join the list into a string
            fixed_conversation.append(message)
        fixed_comics_dataset.append(fixed_conversation)
    return fixed_comics_dataset

In [18]:
fixed_comics_dataset = fix_comics_dataset(comics_dataset)

In [19]:
dataset = Dataset.from_dict({
    'conversations': fixed_comics_dataset
})

In [20]:
dataset[0]['conversations']

[{'content': '### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comics dialogue emotion interpretation.\n\nINPUT:\n- Single utterance to analyze\n\nTASK:\nIdentify ONLY ONE applicable emotions from: "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"\n\nRULES:\n1. Use ONLY the listed labels\n2. Output must be a JSON with key "emotion"\n4. Multiple emotions are NOT allowed\n5. No explanations, only JSON output\n\n',
  'role': 'system'},
 {'content': '', 'role': 'user'},
 {'content': '{"emotion": Surprise}', 'role': 'assistant'}]

In [21]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    chat_template="chatml",
)

def apply_template_comics(examples):
    messages = examples["conversations"]
    #messages = examples['input'] + examples['output']
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

Unsloth: Will map <|im_end|> to EOS = <|eot_id|>.


In [22]:
comics_dataset = dataset.map(apply_template_comics, batched=True)

Map:   0%|          | 0/5803 [00:00<?, ? examples/s]

In [23]:
def split_dataset(dataset, train_ratio=0.8):
    train_test = dataset.train_test_split(test_size=1 - train_ratio)
    return train_test

dataset_split = split_dataset(comics_dataset)

In [24]:
train_dataset = dataset_split['train']
eval_dataset = dataset_split['test']

In [25]:
print(eval_dataset[360]['text'])

<|im_start|>system
### Emotion Analysis Expert Role

You are an advanced emotion analysis expert specializing in comics dialogue emotion interpretation.

INPUT:
- Single utterance to analyze

TASK:
Identify ONLY ONE applicable emotions from: "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"

RULES:
1. Use ONLY the listed labels
2. Output must be a JSON with key "emotion"
4. Multiple emotions are NOT allowed
5. No explanations, only JSON output

<|im_end|>
<|im_start|>user
<|im_end|>
<|im_start|>assistant
{"emotion": Surprise}<|im_end|>



In [26]:
### Training

In [27]:
OUTPUT_DIR = "/Utilisateurs/umushtaq/emotion_analysis_comics/unilabel_comics_model"

In [28]:
args=TrainingArguments(
    
        learning_rate=3e-4,
        lr_scheduler_type="cosine",
        per_device_train_batch_size=16,
        gradient_accumulation_steps=2,
        num_train_epochs=5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=50,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        
        eval_strategy="steps",  # Run evaluation during training (can also use "epoch")
        eval_steps=50,  # Perform evaluation every 50 steps
        save_strategy="steps",  # Save the model every few steps
        save_steps=50,  # Save every 200 steps
        load_best_model_at_end=True,
    
        output_dir=OUTPUT_DIR,
        seed=0,
    )

In [29]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,  # Replace with your train dataset
    eval_dataset=eval_dataset, 
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=args,
)

Map (num_proc=2):   0%|          | 0/4642 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/1161 [00:00<?, ? examples/s]

In [30]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 4,642 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 2
\        /    Total batch size = 32 | Total steps = 725
 "-____-"     Number of trainable parameters = 83,886,080


Step,Training Loss,Validation Loss
50,0.7413,0.053911
100,0.1088,0.01458
150,0.029,0.014096
200,0.0292,0.014369
250,0.0281,0.013972
300,0.0276,0.014335
350,0.0286,0.013947
400,0.0279,0.013529
450,0.0273,0.013755
500,0.0273,0.013591


TrainOutput(global_step=725, training_loss=0.08261861110555714, metrics={'train_runtime': 928.4078, 'train_samples_per_second': 25.0, 'train_steps_per_second': 0.781, 'total_flos': 1.4942847983527526e+17, 'train_loss': 0.08261861110555714, 'epoch': 4.969072164948454})

In [31]:
### inference

In [32]:
model = FastLanguageModel.for_inference(model)

In [33]:
df_test = df[df.split == "TEST"].reset_index(drop=True)

In [34]:
sys_msg_l = []
user_msg_l = []
assistant_msg_l = []

for _, row in df_test.iterrows():
        
        sys_msg = {'role': 'system', 'content': instruction}
        user_msg = {'role': 'user', 'content': ""}
        assistant_msg = {'role': 'assistant', 'content': ""}


        sys_msg_l.append(sys_msg)
        user_msg_l.append(user_msg)
        assistant_msg_l.append(assistant_msg)
        

In [35]:
test_messages = []

for i in range(len(sys_msg_l)):
    
    test_messages.append([sys_msg_l[i], user_msg_l[i], assistant_msg_l[i]])

In [36]:
raw_outputs = []

for message in tqdm(test_messages):
    
    inputs = tokenizer.apply_chat_template(
    message,
    tokenize=True,
    add_generation_prompt=True,
    return_dict=True,
    return_tensors="pt",
).to("cuda")
    #print(inputs)
    #break
    
    output = model.generate(**inputs, max_new_tokens=128)[0]
    
    input_length = inputs.input_ids.shape[1]
    generated_tokens = output[input_length:]
    
    decoded_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)  
    #decoded_output = tokenizer.decode(output, skip_special_tokens=True)
    raw_outputs.append(decoded_output)
    #break

100%|██████████| 1326/1326 [07:23<00:00,  2.99it/s]


In [37]:
### post processing

In [38]:
raw_outputs

['{"emotion": Anger}',
 '{"emotion": Surprise}',
 '{"emotion": Joy}',
 '{"emotion": Joy}',
 '{"emotion": Surprise}',
 '{"emotion": Joy}',
 '{"emotion": Joy}',
 '{"emotion": Anger}',
 '{"emotion": Joy}',
 '{"emotion": Sadness}',
 '{"emotion": Anger}',
 '{"emotion": Sadness}',
 '{"emotion": Surprise}',
 '{"emotion": Surprise}',
 '{"emotion": Joy}',
 '{"emotion": Joy}',
 '{"emotion": Sadness}',
 '{"emotion": Joy}',
 '{"emotion": Fear}',
 '{"emotion": Anger}',
 '{"emotion": Sadness}',
 '{"emotion": Joy}',
 '{"emotion": Sadness}',
 '{"emotion": Anger}',
 '{"emotion": Joy}',
 '{"emotion": Joy}',
 '{"emotion": Fear}',
 '{"emotion": Fear}',
 '{"emotion": Anger}',
 '{"emotion": Anger}',
 '{"emotion": Anger}',
 '{"emotion": Joy}',
 '{"emotion": Sadness}',
 '{"emotion": Joy}',
 '{"emotion": Sadness}',
 '{"emotion": Surprise}',
 '{"emotion": Fear}',
 '{"emotion": Sadness}',
 '{"emotion": Sadness}',
 '{"emotion": Fear}',
 '{"emotion": Sadness}',
 '{"emotion": Fear}',
 '{"emotion": Anger}',
 '{"emot

In [39]:
predictions = [json_repair.loads(e) for e in raw_outputs]

In [40]:
predictions

[{'emotion': 'Anger'},
 {'emotion': 'Surprise'},
 {'emotion': 'Joy'},
 {'emotion': 'Joy'},
 {'emotion': 'Surprise'},
 {'emotion': 'Joy'},
 {'emotion': 'Joy'},
 {'emotion': 'Anger'},
 {'emotion': 'Joy'},
 {'emotion': 'Sadness'},
 {'emotion': 'Anger'},
 {'emotion': 'Sadness'},
 {'emotion': 'Surprise'},
 {'emotion': 'Surprise'},
 {'emotion': 'Joy'},
 {'emotion': 'Joy'},
 {'emotion': 'Sadness'},
 {'emotion': 'Joy'},
 {'emotion': 'Fear'},
 {'emotion': 'Anger'},
 {'emotion': 'Sadness'},
 {'emotion': 'Joy'},
 {'emotion': 'Sadness'},
 {'emotion': 'Anger'},
 {'emotion': 'Joy'},
 {'emotion': 'Joy'},
 {'emotion': 'Fear'},
 {'emotion': 'Fear'},
 {'emotion': 'Anger'},
 {'emotion': 'Anger'},
 {'emotion': 'Anger'},
 {'emotion': 'Joy'},
 {'emotion': 'Sadness'},
 {'emotion': 'Joy'},
 {'emotion': 'Sadness'},
 {'emotion': 'Surprise'},
 {'emotion': 'Fear'},
 {'emotion': 'Sadness'},
 {'emotion': 'Sadness'},
 {'emotion': 'Fear'},
 {'emotion': 'Sadness'},
 {'emotion': 'Fear'},
 {'emotion': 'Anger'},
 {'emoti

In [41]:
predictions = [elem['emotion'] for elem in predictions]

In [42]:
df_test

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split,emotion_label
0,QC copy - 1643 - 23 American Vampire vol. 4 - ...,1,1,1,"TIME TO FACE OUR FEARS, PEOPLE…",2024-08-25 - SyimykRasulov\nFeeling:Neutral\n\...,2024-08-25 - SyimykRasulov\nFeeling:Neutral,\n2024-08-25 - SyimykRasulov\nSpokenBy:unknown,Neutral,unknown,TEST,
1,QC copy - 1643 - 23 American Vampire vol. 4 - ...,1,1,3,IT'S TIME TO LIMBO!,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-08-25 - SyimykRasulov\nSpokenBy:ID- 1,AN0-DI0-FE0-SA0-SU0-JO3,ID- 1,TEST,Joy
2,QC copy - 1643 - 23 American Vampire vol. 4 - ...,1,2,1,"HOW LOW, CAN YOU GO?! HOW LOW, CAN YOU GO?!",2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-08-25 - SyimykRasulov\nSpokenBy:ID- 2,AN0-DI0-FE0-SA0-SU2-JO3,ID- 2,TEST,Joy
3,QC copy - 1643 - 23 American Vampire vol. 4 - ...,1,2,2,HOW LOW--,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-08-25 - SyimykRasulov\nSpokenBy:ID- 2,AN0-DI0-FE0-SA0-SU2-JO3,ID- 2,TEST,Joy
4,QC copy - 1643 - 23 American Vampire vol. 4 - ...,1,2,3,WHAT IN…,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-08-25 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-08-25 - SyimykRasulov\nSpokenBy:ID- 3,AN0-DI0-FE3-SA0-SU4-JO0,ID- 3,TEST,Surprise
...,...,...,...,...,...,...,...,...,...,...,...,...
1321,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,22,2,3,DAMIAN! YOU'RE ALIVE!,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,\n2024-09-11 - aidaraliev12345\nSpokenBy:Batgirl,AN0-DI0-FE0-SA0-SU5-JO0,Batgirl,TEST,Surprise
1322,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,22,2,4,NO. HE'S NOT. HE'S A VAMPIRE.,2024-08-04 - aselermekova20\nFeeling:Neutral\n...,2024-08-04 - aselermekova20\nFeeling:Neutral,\n2024-09-11 - aidaraliev12345\nSpokenBy:Frank...,Neutral,Frankenstein,TEST,
1323,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,22,3,1,WHOA. WHOSE BIG BRAIN DID THEY PUT IN THAT HEA...,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,\n2024-09-11 - aidaraliev12345\nSpokenBy:Damian,AN0-DI0-FE0-SA0-SU5-JO3,Damian,TEST,Surprise
1324,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,22,3,2,SPARE PARTS IS RIGHT. I AM A VAMPIRE.,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-04 - aselermekova20\nFeeling:AN0-DI0-F...,\n2024-09-11 - aidaraliev12345\nSpokenBy:Damian,AN0-DI0-FE0-SA0-SU2-JO3,Damian,TEST,Joy


In [43]:
grounds = df_test.emotion_label

In [44]:
grounds

0           None
1            Joy
2            Joy
3            Joy
4       Surprise
          ...   
1321    Surprise
1322        None
1323    Surprise
1324         Joy
1325         Joy
Name: emotion_label, Length: 1326, dtype: object

In [45]:
none_indices = [i for i, value in enumerate(grounds) if value is None]

In [46]:
grounds_l = [grounds[i] for i in range(len(grounds)) if i not in none_indices]
predictions_l = [predictions[i] for i in range(len(predictions)) if i not in none_indices]

In [47]:
len(grounds_l), len(predictions_l)

(1217, 1217)

In [48]:
grounds_l

['Joy',
 'Joy',
 'Joy',
 'Surprise',
 'Anger',
 'Anger',
 'Fear',
 'Fear',
 'Joy',
 'Sadness',
 'Fear',
 'Sadness',
 'Sadness',
 'Sadness',
 'Surprise',
 'Anger',
 'Surprise',
 'Fear',
 'Disgust',
 'Surprise',
 'Surprise',
 'Sadness',
 'Fear',
 'Fear',
 'Surprise',
 'Surprise',
 'Sadness',
 'Sadness',
 'Sadness',
 'Sadness',
 'Sadness',
 'Sadness',
 'Joy',
 'Anger',
 'Surprise',
 'Anger',
 'Sadness',
 'Surprise',
 'Surprise',
 'Surprise',
 'Surprise',
 'Fear',
 'Surprise',
 'Surprise',
 'Fear',
 'Surprise',
 'Surprise',
 'Surprise',
 'Surprise',
 'Fear',
 'Surprise',
 'Surprise',
 'Sadness',
 'Sadness',
 'Surprise',
 'Fear',
 'Fear',
 'Sadness',
 'Joy',
 'Disgust',
 'Disgust',
 'Joy',
 'Joy',
 'Sadness',
 'Fear',
 'Surprise',
 'Surprise',
 'Joy',
 'Joy',
 'Sadness',
 'Joy',
 'Sadness',
 'Fear',
 'Sadness',
 'Sadness',
 'Sadness',
 'Anger',
 'Fear',
 'Fear',
 'Fear',
 'Fear',
 'Fear',
 'Fear',
 'Fear',
 'Sadness',
 'Sadness',
 'Anger',
 'Anger',
 'Anger',
 'Sadness',
 'Anger',
 'Sadness

In [49]:
print(classification_report(grounds_l, predictions_l, digits=3))

              precision    recall  f1-score   support

       Anger      0.326     0.389     0.355       365
     Disgust      0.000     0.000     0.000        18
        Fear      0.135     0.143     0.139       182
         Joy      0.236     0.255     0.245       235
     Sadness      0.188     0.180     0.184       217
    Surprise      0.163     0.105     0.128       200

    accuracy                          0.237      1217
   macro avg      0.175     0.179     0.175      1217
weighted avg      0.224     0.237     0.228      1217



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
