In [7]:
import torch
import json_repair
import pandas as pd

from tqdm import tqdm
from datasets import Dataset

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

In [9]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    #model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    #model_name="unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [10]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], 
    use_rslora=True,
    use_gradient_checkpointing=True
)

Unsloth 2024.12.4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


### Data

In [12]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/dataset_files/comics_dataset.csv")

In [13]:
df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,1,DID YOU HAVE TO ELECTROCUTE HER SO HARD?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE3-SA0-SU5-JO0,ID-1,TRAIN
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,2,IT'S NOT LIKE I HAVE DIFFERENT SETTINGS.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU5-JO0,ID-2,TRAIN
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,2,3,YOU'RE ELECTROCUTIONER. IT'S YOUR WHOLE THING....,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN0-DI0-FE2-SA0-SU0-JO0,ID-1,TRAIN
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,3,1,"OH, HEY. I THINK SHE'S AWAKE.",2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-2,AN0-DI0-FE0-SA0-SU4-JO0,ID-2,TRAIN
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,1,4,1,"WELCOME BACK, MADAM MAYOR. BLOCKBUSTER IS PRET...",2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN3-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:ID-1,AN3-DI0-FE0-SA0-SU0-JO0,ID-1,TRAIN
...,...,...,...,...,...,...,...,...,...,...,...
7124,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
7125,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
7126,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN
7127,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,22,1,1,PUT YOUR WEAPONS DOWN AND PUT YOUR HANDS IN TH...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN4-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:ID- 2,AN4-DI0-FE0-SA0-SU0-JO0,ID- 2,TRAIN


In [14]:
df.speaker_id.unique()

array(['ID-1', 'ID-2', 'NIGHTWING', 'MELINDA', 'BLOCKBUSTER', 'AUDRE',
       'MACLEAN', 'ID-3', 'ID-3 ', 'MAGGIE', 'COMMISSIONER', 'BATWOMAN',
       'ROBIN', 'ID-4', 'ID-5', 'STARGIRL', 'MARONI', 'FLASH', 'ID-6',
       'ID-7', 'ID-8', 'ID-9', 'ELLIOT', 'Eleanor', 'Momma',
       'no annotation', 'Natasha', 'John', 'BRIGHT REVENANT', 'HAWKGIRL',
       'CHRISTOPHER', 'ICE', 'JESUS', 'unknown', 'ID- 1', 'ID- 2',
       'ID- 3', 'ID- 4', 'ID- 5', 'ID- 6', 'Ms.Jones', 'Felicia Book',
       'ID- 7', 'ID- 8', 'ID- 9', 'ID- 10', 'ID- 11', 'ID- 12', 'Skinner',
       'Cal', 'Homo Abominus', 'Agent Bixby', 'Lucia', 'Trapp', 'Kill',
       'ID- 14', 'OLIVIA', 'MAGISTER PAVUS', 'AARON', 'CALIX ', 'CALIX',
       'FRANCESCA', 'FLORIAN', 'VAEA', 'AGOSTO', 'ELF', 'AUTUMN', 'Carl',
       'Rick', 'Maggie', 'Eugene', 'STEPHANIE', 'Dwight', 'Negan', 'Lobo',
       'ID-2 ', 'Director', 'ID-1 ', 'Flash', 'Black Mask', 'Jay',
       'Linda', 'Jai', 'Max', 'Irey', 'Ace', 'Jesse', 'Mr.Allen',
       'Mr

In [15]:
filtered_df = df[~df['speaker_id'].str.contains('ID-', na=False)].reset_index(drop=True)

In [16]:
filtered_df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,1,CAN YOU WALK?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE5-SA0-SU0-JO0,NIGHTWING,TRAIN
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,2,HOW DID YOU FIND ME?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:MELINDA,AN0-DI0-FE0-SA0-SU5-JO0,MELINDA,TRAIN
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,3,ORACLE HAD THE LAST PING OFF YOUR PHONE. AND A...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE0-SA0-SU0-JO5,NIGHTWING,TRAIN
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,4,1,AUDRE'S HERE?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:MELINDA,AN0-DI0-FE3-SA0-SU5-JO0,MELINDA,TRAIN
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,4,2,IN A CAR OUTSIDE.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE0-SA0-SU0-JO3,NIGHTWING,TRAIN
...,...,...,...,...,...,...,...,...,...,...,...
5787,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,1,MAYBE THIS STEPHANIE PERSON WAS JUST MESSING W...,2024-09-06 - SyimykRasulov\nFeeling:AN1-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN1-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Siddiq,AN1-DI0-FE3-SA0-SU0-JO0,Siddiq,TRAIN
5788,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
5789,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
5790,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN


In [17]:
filtered_df.speaker_id.unique()

array(['NIGHTWING', 'MELINDA', 'BLOCKBUSTER', 'AUDRE', 'MACLEAN',
       'MAGGIE', 'COMMISSIONER', 'BATWOMAN', 'ROBIN', 'STARGIRL',
       'MARONI', 'FLASH', 'ELLIOT', 'Eleanor', 'Momma', 'no annotation',
       'Natasha', 'John', 'BRIGHT REVENANT', 'HAWKGIRL', 'CHRISTOPHER',
       'ICE', 'JESUS', 'unknown', 'Ms.Jones', 'Felicia Book', 'Skinner',
       'Cal', 'Homo Abominus', 'Agent Bixby', 'Lucia', 'Trapp', 'Kill',
       'OLIVIA', 'MAGISTER PAVUS', 'AARON', 'CALIX ', 'CALIX',
       'FRANCESCA', 'FLORIAN', 'VAEA', 'AGOSTO', 'ELF', 'AUTUMN', 'Carl',
       'Rick', 'Maggie', 'Eugene', 'STEPHANIE', 'Dwight', 'Negan', 'Lobo',
       'Director', 'Flash', 'Black Mask', 'Jay', 'Linda', 'Jai', 'Max',
       'Irey', 'Ace', 'Jesse', 'Mr.Allen', 'Mrs.Allen', 'Kid Flash',
       'Flash 2', 'Bystander', 'Freezing enemy', 'Travis', 'Krunch',
       'Lady Cop', 'Non-Fat', 'Good Looks', 'Bananas', 'Mr.Rider',
       'Starman', 'Metamorpho', 'Manhunter', 'The Commodore', 'Psimon',
       'Superboy'

In [18]:
filtered_df = filtered_df[~filtered_df['speaker_id'].str.contains('no annotation', na=False)].reset_index(drop=True)

In [19]:
filtered_df.shape

(5727, 11)

In [20]:
filtered_df = filtered_df[~filtered_df['speaker_id'].str.contains('unknown_speaker', na=False)].reset_index(drop=True)

In [21]:
filtered_df.shape

(5724, 11)

In [22]:
filtered_df = filtered_df[~filtered_df['speaker_id'].str.contains('unknown', na=False)].reset_index(drop=True)

In [23]:
filtered_df.shape

(5697, 11)

In [24]:
filtered_df.speaker_id.unique()

array(['NIGHTWING', 'MELINDA', 'BLOCKBUSTER', 'AUDRE', 'MACLEAN',
       'MAGGIE', 'COMMISSIONER', 'BATWOMAN', 'ROBIN', 'STARGIRL',
       'MARONI', 'FLASH', 'ELLIOT', 'Eleanor', 'Momma', 'Natasha', 'John',
       'BRIGHT REVENANT', 'HAWKGIRL', 'CHRISTOPHER', 'ICE', 'JESUS',
       'Ms.Jones', 'Felicia Book', 'Skinner', 'Cal', 'Homo Abominus',
       'Agent Bixby', 'Lucia', 'Trapp', 'Kill', 'OLIVIA',
       'MAGISTER PAVUS', 'AARON', 'CALIX ', 'CALIX', 'FRANCESCA',
       'FLORIAN', 'VAEA', 'AGOSTO', 'ELF', 'AUTUMN', 'Carl', 'Rick',
       'Maggie', 'Eugene', 'STEPHANIE', 'Dwight', 'Negan', 'Lobo',
       'Director', 'Flash', 'Black Mask', 'Jay', 'Linda', 'Jai', 'Max',
       'Irey', 'Ace', 'Jesse', 'Mr.Allen', 'Mrs.Allen', 'Kid Flash',
       'Flash 2', 'Bystander', 'Freezing enemy', 'Travis', 'Krunch',
       'Lady Cop', 'Non-Fat', 'Good Looks', 'Bananas', 'Mr.Rider',
       'Starman', 'Metamorpho', 'Manhunter', 'The Commodore', 'Psimon',
       'Superboy', 'Supergirl', 'Cyborg', 'Ro

In [25]:
filtered_df[filtered_df.speaker_id == 'violet ring']

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split
1667,QC copy - 1508 - 48 Pequen_os Titanes 25.xlsx,14,4,1,ACTIVATE!,2024-07-25 - SyimykRasulov\nFeeling:Neutral\n\...,2024-07-25 - SyimykRasulov\nFeeling:Neutral,\n2024-07-25 - SyimykRasulov\nSpokenBy:violet ...,Neutral,violet ring,TRAIN


In [26]:
filtered_df[filtered_df.file_name == 'QC copy - 1508 - 48 Pequen_os Titanes 25.xlsx'].speaker_id.unique()

array(['Psimon', 'Superboy', 'Supergirl', 'Cyborg', 'Kid Flash', 'Robin',
       'Cassie', 'Shelly', 'Match', 'Mr.John', 'Speedy', 'Stargirl',
       'Starfire', 'Batgirl', 'Jokergirl', 'Terra', 'violet ring',
       'green ring', 'red ring', 'blue ring', 'sky blue ring',
       'blue gnom', 'blue serious gnom', 'Green Lantern'], dtype=object)

In [27]:
filtered_df

Unnamed: 0,file_name,page_nr,panel_nr,balloon_nr,utterance,raw_annotation,raw_emotion,raw_speaker_id,emotion,speaker_id,split
0,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,1,CAN YOU WALK?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE5-SA0-SU0-JO0,NIGHTWING,TRAIN
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,2,HOW DID YOU FIND ME?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:MELINDA,AN0-DI0-FE0-SA0-SU5-JO0,MELINDA,TRAIN
2,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,3,3,ORACLE HAD THE LAST PING OFF YOUR PHONE. AND A...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE0-SA0-SU0-JO5,NIGHTWING,TRAIN
3,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,4,1,AUDRE'S HERE?,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:MELINDA,AN0-DI0-FE3-SA0-SU5-JO0,MELINDA,TRAIN
4,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,3,4,2,IN A CAR OUTSIDE.,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-08-27 - aselermekova20\nFeeling:AN0-DI0-F...,2024-09-05 - aidaraliev12345\nSpokenBy:NIGHTWING,AN0-DI0-FE0-SA0-SU0-JO3,NIGHTWING,TRAIN
...,...,...,...,...,...,...,...,...,...,...,...
5692,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,1,MAYBE THIS STEPHANIE PERSON WAS JUST MESSING W...,2024-09-06 - SyimykRasulov\nFeeling:AN1-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN1-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Siddiq,AN1-DI0-FE3-SA0-SU0-JO0,Siddiq,TRAIN
5693,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,2,SHE WOULDN'T DO THAT TO US. WE TALKED FOR A LO...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
5694,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,3,3,… I KNOW HER.,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:Eugene,AN0-DI0-FE1-SA3-SU0-JO0,Eugene,TRAIN
5695,QC copy - 1737 - 34 The Walking Dead vol 15 - ...,21,4,1,"UH, GUYS…",2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,2024-09-06 - SyimykRasulov\nFeeling:AN0-DI0-FE...,\n2024-09-06 - SyimykRasulov\nSpokenBy:JUANITA...,AN0-DI0-FE3-SA0-SU4-JO0,JUANITA SANCHEZ,TRAIN


In [28]:
len(filtered_df.file_name.unique())

35

In [29]:
def get_unique_emotion(row):

    utterance_emotions = row.emotion

    utterance_emotions_l = []
    emotion_class_labels = ["anger", "disgust", "fear", "sadness", "surprise", "joy"]

    if utterance_emotions == 'Neutral':
        
        utterance_emotions_l.append('neutral')
    
    else:
        
        utterance_emotions = utterance_emotions.split("-")

        for idx, emotion_annotation in enumerate(utterance_emotions):

            if '0' not in emotion_annotation:
         
                utterance_emotions_l.append(emotion_class_labels[idx])

    return utterance_emotions_l

In [36]:
filtered_df['emotion_c'] = filtered_df.apply(lambda row: get_unique_emotion(row), axis=1)

In [31]:
grouped_df = filtered_df.groupby(['file_name', 'split']).agg({
    'utterance': list,
    'speaker_id': list,
    'emotion_c': list
}).reset_index()

In [32]:
grouped_df

Unnamed: 0,file_name,split,utterance,speaker_id,emotion_c
0,QC copy - 1499 - 58 ECC Co_mics 50 _The Jurass...,TRAIN,[THIS VILE THING ATTACKED THE SMALL BEASTS OF ...,"[AQUANYX, AQUANYX, AQUANYX, AQUANYX, AQUANYX, ...","[[anger], [anger], [fear, sadness], [anger], [..."
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,TRAIN,"[CAN YOU WALK?, HOW DID YOU FIND ME?, ORACLE H...","[NIGHTWING, MELINDA, NIGHTWING, MELINDA, NIGHT...","[[fear], [surprise], [joy], [fear, surprise], ..."
2,QC copy - 1501 - 09 Mundos sin Liga de la Just...,TRAIN,"["" Tell me another story, Momma. "", "" What kin...","[Eleanor, Momma, Eleanor, Eleanor, Momma, Elea...","[[joy], [surprise], [joy], [joy], [sadness, su..."
3,QC copy - 1502 - 09 Mundos sin Liga de la Just...,TRAIN,"[MOTHERFRAGGER., LOOK, FRIEND, YOU BASTICHES A...","[Lobo, Lobo, Lobo, Lobo, Lobo, Lobo, Lobo, Lob...","[[anger], [anger], [anger], [anger, surprise],..."
4,QC copy - 1503 - 10 Crisis Oscura Flash - FLS ...,TRAIN,"[I'M DONE LETTING YOU TORTURE ME!, YOU'RE NEVE...","[Flash, Flash, Flash, Black Mask, Flash, Black...","[[anger], [anger, sadness], [surprise], [neutr..."
5,QC copy - 1507 - 22 Calle Peligro 1.xlsx,TEST,"[HEY., JUST A COKE., THANKS., IT'S ALL RIGHT.,...","[Travis, Travis, Travis, Travis, Krunch, Krunc...","[[joy], [joy], [joy], [joy], [anger, disgust],..."
6,QC copy - 1508 - 48 Pequen_os Titanes 25.xlsx,TRAIN,"[ SO, LET ME GET THIS STRAIGHT… THERE'S A SUPE...","[Psimon, Psimon, Superboy, Supergirl, Superboy...","[[surprise], [surprise], [joy], [joy], [surpri..."
7,QC copy - 1513 - 21 Blanco Humano 9.xlsx,TRAIN,"[I'M ALREADY TOO LATE., $ % # @., I'M ALSO TOO...","[CHRISTOPHER, CHRISTOPHER, CHRISTOPHER, ICE, C...","[[fear, sadness], [anger], [fear], [fear, surp..."
8,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,TEST,[YOU LIKE THAT? IT'S MY OWN INVENTION. THREE P...,"[Green Arrow, Green Arrow, Green Arrow, Green ...","[[anger, joy], [sadness, joy], [sadness, joy],..."
9,QC copy - 1517 - 37 John Carpenter Historias p...,TRAIN,[FOUND IT ON THE WAY HERE. THERE WAS A BURNED ...,"[Jones's partner, Jones's partner, John Wesley...","[[anger], [anger, joy], [anger], [anger], [ang..."


In [33]:
def get_stats(row):
    
    return [len(row.utterance), len(row.speaker_id), len(row.emotion_c)]

In [34]:
grouped_df['lens'] = grouped_df.apply(lambda row: get_stats(row), axis=1)

In [35]:
grouped_df

Unnamed: 0,file_name,split,utterance,speaker_id,emotion_c,lens
0,QC copy - 1499 - 58 ECC Co_mics 50 _The Jurass...,TRAIN,[THIS VILE THING ATTACKED THE SMALL BEASTS OF ...,"[AQUANYX, AQUANYX, AQUANYX, AQUANYX, AQUANYX, ...","[[anger], [anger], [fear, sadness], [anger], [...","[124, 124, 124]"
1,QC copy - 1500 - 04 Nightwing 19 _Nightwing 95...,TRAIN,"[CAN YOU WALK?, HOW DID YOU FIND ME?, ORACLE H...","[NIGHTWING, MELINDA, NIGHTWING, MELINDA, NIGHT...","[[fear], [surprise], [joy], [fear, surprise], ...","[103, 103, 103]"
2,QC copy - 1501 - 09 Mundos sin Liga de la Just...,TRAIN,"["" Tell me another story, Momma. "", "" What kin...","[Eleanor, Momma, Eleanor, Eleanor, Momma, Elea...","[[joy], [surprise], [joy], [joy], [sadness, su...","[167, 167, 167]"
3,QC copy - 1502 - 09 Mundos sin Liga de la Just...,TRAIN,"[MOTHERFRAGGER., LOOK, FRIEND, YOU BASTICHES A...","[Lobo, Lobo, Lobo, Lobo, Lobo, Lobo, Lobo, Lob...","[[anger], [anger], [anger], [anger, surprise],...","[46, 46, 46]"
4,QC copy - 1503 - 10 Crisis Oscura Flash - FLS ...,TRAIN,"[I'M DONE LETTING YOU TORTURE ME!, YOU'RE NEVE...","[Flash, Flash, Flash, Black Mask, Flash, Black...","[[anger], [anger, sadness], [surprise], [neutr...","[130, 130, 130]"
5,QC copy - 1507 - 22 Calle Peligro 1.xlsx,TEST,"[HEY., JUST A COKE., THANKS., IT'S ALL RIGHT.,...","[Travis, Travis, Travis, Travis, Krunch, Krunc...","[[joy], [joy], [joy], [joy], [anger, disgust],...","[259, 259, 259]"
6,QC copy - 1508 - 48 Pequen_os Titanes 25.xlsx,TRAIN,"[ SO, LET ME GET THIS STRAIGHT… THERE'S A SUPE...","[Psimon, Psimon, Superboy, Supergirl, Superboy...","[[surprise], [surprise], [joy], [joy], [surpri...","[142, 142, 142]"
7,QC copy - 1513 - 21 Blanco Humano 9.xlsx,TRAIN,"[I'M ALREADY TOO LATE., $ % # @., I'M ALSO TOO...","[CHRISTOPHER, CHRISTOPHER, CHRISTOPHER, ICE, C...","[[fear, sadness], [anger], [fear], [fear, surp...","[229, 229, 229]"
8,QC copy - 1514 - 15 DC contra Vampiros 11.xlsx,TEST,[YOU LIKE THAT? IT'S MY OWN INVENTION. THREE P...,"[Green Arrow, Green Arrow, Green Arrow, Green ...","[[anger, joy], [sadness, joy], [sadness, joy],...","[120, 120, 120]"
9,QC copy - 1517 - 37 John Carpenter Historias p...,TRAIN,[FOUND IT ON THE WAY HERE. THERE WAS A BURNED ...,"[Jones's partner, Jones's partner, John Wesley...","[[anger], [anger, joy], [anger], [anger], [ang...","[690, 690, 690]"


In [None]:
def build_prompt(row):
    
    utterances = row.utterance
    speakers = row.speaker_id
    emotions = row.emotions_c
    
    pg_utterances = 