In [1]:
import torch
import json_repair
import pandas as pd

from tqdm import tqdm
from datasets import Dataset

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

from sklearn.metrics import classification_report
from sklearn.preprocessing import MultiLabelBinarizer

Unsloth currently does not support multi GPU setups - but we are working on it!
Multiple CUDA devices detected but we require a single device.
We will override CUDA_VISIBLE_DEVICES to first device: 0.


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    #model_name="unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
    model_name="unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.47.0.
   \\   /|    GPU: NVIDIA H100 NVL. Max memory: 93.003 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], 
    use_rslora=True,
    use_gradient_checkpointing="unsloth"
)

Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## data

In [4]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/dataset_files/comics_dataset.csv")

In [5]:
def get_emotions(row):

    utterance_emotions = row.emotion
    utterance_emotions_l = []
    emotion_class_labels = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy"]

    if utterance_emotions == 'Neutral':
        
        utterance_emotions_l.append(utterance_emotions)
    
    else:
        utterance_emotions = utterance_emotions.split("-")
       
        #emotion_annotation_l = []

        for idx, emotion_annotation in enumerate(utterance_emotions):

            if '0' not in emotion_annotation:
         
                #emotion_annotation_l.append(emotion_class_labels[idx])
                #utterance_emotions_l.append(emotion_annotation[:-1])
                utterance_emotions_l.append(emotion_class_labels[idx])
            
        #title_emotions_l.append(emotion_annotation_l)
                

    return utterance_emotions_l

In [6]:
df['emotion_u'] = df.apply(lambda row: get_emotions(row), axis=1)

In [7]:
df_s = df[~df['speaker_id'].str.startswith('ID-')].reset_index(drop=True)

In [8]:
file_names_l = df_s.file_name.unique().tolist()

In [9]:
comics_titles = [
    
    "Nightwing",
    "Worlds Without a Justice League - Green Lantern",
    "Human Target",
    "American Vampire",
    "Dragon Age",
    "Dragon Age",
    "The Walking Dead",
    "Worlds Without a Justice League - Green Lantern",
    "Dark Crisis: The Flash",
    "Danger Street",
    "Tiny Titans",
    "The Amazing Adventures of the Ninja Turtles",
    "Sonic The Hedgehog",
    "Love Everlasting",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "Fantasmas",
    "American Vampire",
    "American Vampire",
    "Dragon Age",
    "Stillwater",
    "Jurassic League",
    "John Carpenter's Tales for a Halloweenight",
    "DC vs. Vampires",
    "Thief Of Thieves",
    "The Walking Dead",
    "The Walking Dead",
    "The Walking Dead",

]


In [10]:
df_s['comics_title'] = df_s['file_name'].apply(lambda x: comics_titles[file_names_l.index(x)] if x in file_names_l else None)

In [11]:
wrong_speaker_id = ['no annotation', 'unknown', ' ID-12', 'unknown_speaker']

In [12]:
df_s = df_s[~df_s['speaker_id'].isin(wrong_speaker_id)].reset_index(drop=True)

In [13]:
df_s.columns

Index(['file_name', 'page_nr', 'panel_nr', 'balloon_nr', 'utterance',
       'raw_annotation', 'raw_emotion', 'raw_speaker_id', 'emotion',
       'speaker_id', 'split', 'emotion_u', 'comics_title'],
      dtype='object')

In [14]:
df_s_train = df_s[df_s.split == "TRAIN"].reset_index(drop=True)

In [15]:
len(df_s_train)

4596

### Comics dataset

In [16]:
# def build_instruction():
#     emotion_classes = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"]
#     formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])
    
#     instruction = f"""### Emotion Analysis Expert Role

# You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.

# INPUT:
# - You will receive a single utterance from a comic book
# - The utterance may express one or multiple emotions

# TASK:
# 1. Carefully analyze the emotional context and tone of the utterance
# 2. Identify applicable emotions from the following classes:
#    {formatted_classes}

# OUTPUT REQUIREMENTS:
# - Format: JSON object with a single key "list_emotion_classes"
# - Value: Array of one or more emotion classes as strings

# IMPORTANT NOTES:
# - Do not include any explanations in the output, only the JSON object

# """
#     return instruction

In [17]:
def build_instruction():
    emotion_classes = ["Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"]
    formatted_classes = ", ".join([f'"{emotion}"' for emotion in emotion_classes])
    
    instruction = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Analyze the following comic book utterance and identify the emotions expressed.

Given the following information:
- Comic Title: <comic_title>
- Speaker ID: <speaker_id>
- Utterance: <utterance>

Your task is to:
1. Carefully analyze the emotional context and tone of the utterance
2. Consider the comic's context and speaker's identity
3. Identify applicable emotions from these classes:
   {formatted_classes}

### Response:
Provide your response in the following JSON format:
{{
    "list_emotion_classes": ["emotion1", ... ]
}}

### Note:
- Return only the JSON object with a single key "list_emotion_classes" and value is an array of one or more emotion classes as strings
- Include one or more emotions from the provided classes
- Do not include any explanations in the output, only the JSON object

"""
    
    return instruction

In [18]:
instruction = build_instruction()

In [19]:
human_msg_l = []
assistant_msg_l = []

for _, row in df_s_train.iterrows():
        
        prompt = instruction.replace("<comic_title>", row['comics_title']).replace("<speaker_id>", row['speaker_id']).replace("<utterance>", row['utterance'])
        
        human_msg = {'from': 'human', 'value': prompt}

        #obj = {"list_emotion_classes": row['emotion_u']}
        obj = row['emotion_u']
        #assistant_msg = {'from': 'gpt', 'value': obj}
        assistant_msg = {'from': 'gpt', 'value': f'{{"list_emotion_classes": {obj}}}'}


        human_msg_l.append(human_msg)
        assistant_msg_l.append(assistant_msg)
        

In [20]:
comics_dataset = []

for i in range(len(human_msg_l)):
    
    #obj = {"list_emotion_classes": ["Anger", "Fear"]}

    comics_dataset.append([human_msg_l[i], assistant_msg_l[i]])

In [21]:
len(comics_dataset)

4596

In [22]:
def fix_comics_dataset(comics_dataset):
    fixed_comics_dataset = []
    for conversation in comics_dataset:
        fixed_conversation = []
        for message in conversation:
            if isinstance(message['value'], list):  # If the 'value' is a list of emotions
                message['value'] = ', '.join(message['value'])  # Join the list into a string
            fixed_conversation.append(message)
        fixed_comics_dataset.append(fixed_conversation)
    return fixed_comics_dataset

In [23]:
fixed_comics_dataset = fix_comics_dataset(comics_dataset)

In [24]:
dataset = Dataset.from_dict({
    'conversations': fixed_comics_dataset
})

In [25]:
dataset

Dataset({
    features: ['conversations'],
    num_rows: 4596
})

In [26]:
dataset[0]['conversations']

[{'from': 'human',
  'value': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Analyze the following comic book utterance and identify the emotions expressed.\n\nGiven the following information:\n- Comic Title: Nightwing\n- Speaker ID: NIGHTWING\n- Utterance: CAN YOU WALK?\n\nYour task is to:\n1. Carefully analyze the emotional context and tone of the utterance\n2. Consider the comic\'s context and speaker\'s identity\n3. Identify applicable emotions from these classes:\n   "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"\n\n### Response:\nProvide your response in the following JSON format:\n{\n    "list_emotion_classes": ["emotion1", ... ]\n}\n\n### Note:\n- Return only the JSON object with a single key "list_emotion_classes" and value is an array of one or more emotion classes as strings\n- In

In [27]:
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    #chat_template="chatml",
)

def apply_template_comics(examples):
    messages = examples["conversations"]
    #messages = examples['input'] + examples['output']
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

Unsloth: Will map <|im_end|> to EOS = <|eot_id|>.


In [28]:
comics_dataset = dataset.map(apply_template_comics, batched=True)

Map:   0%|          | 0/4596 [00:00<?, ? examples/s]

In [29]:
comics_dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 4596
})

In [30]:
def split_dataset(dataset, train_ratio=0.8):
    train_test = dataset.train_test_split(test_size=1 - train_ratio)
    return train_test

dataset_split = split_dataset(comics_dataset)

In [31]:
train_dataset = dataset_split['train']
eval_dataset = dataset_split['test']

In [32]:
train_dataset

Dataset({
    features: ['conversations', 'text'],
    num_rows: 3676
})

In [33]:
print(train_dataset[1360]['text'])

<|im_start|>user
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Analyze the following comic book utterance and identify the emotions expressed.

Given the following information:
- Comic Title: American Vampire
- Speaker ID: Skinner
- Utterance: I GET IT, BUT STILL, WHAT IF I HAVE ANOTHER ONSET OF WHATEVER THE HELL THAT WAS?

Your task is to:
1. Carefully analyze the emotional context and tone of the utterance
2. Consider the comic's context and speaker's identity
3. Identify applicable emotions from these classes:
   "Anger", "Disgust", "Fear", "Sadness", "Surprise", "Joy", "Neutral"

### Response:
Provide your response in the following JSON format:
{
    "list_emotion_classes": ["emotion1", ... ]
}

### Note:
- Return only the JSON object with a single key "list_emotion_classes" and value is an array of one or more

In [35]:
args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="cosine",
        per_device_train_batch_size=16,
        gradient_accumulation_steps=2,
        num_train_epochs=4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=100,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        
        eval_strategy="steps",  # Run evaluation during training (can also use "epoch")
        eval_steps=50,  # Perform evaluation every 50 steps
        save_strategy="steps",  # Save the model every few steps
        save_steps=50,  # Save every 200 steps
        load_best_model_at_end=True,
    
        output_dir="ft_uns_comics_qwen2.5",
        seed=0,
    )

In [36]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,  # Replace with your train dataset
    eval_dataset=eval_dataset, 
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=args,
)

Map (num_proc=2):   0%|          | 0/3676 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/920 [00:00<?, ? examples/s]

In [37]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,676 | Num Epochs = 4
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 2
\        /    Total batch size = 32 | Total steps = 460
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss,Validation Loss
50,No log,0.181405
100,0.657900,0.174865
150,0.657900,0.175175
200,0.300700,0.172095
250,0.300700,0.188942
300,0.231200,0.186601
350,0.231200,0.198766
400,0.161600,0.219725
450,0.161600,0.221694


TrainOutput(global_step=460, training_loss=0.31036405667014744, metrics={'train_runtime': 1473.4765, 'train_samples_per_second': 9.979, 'train_steps_per_second': 0.312, 'total_flos': 2.0309110050019738e+17, 'train_loss': 0.31036405667014744, 'epoch': 4.0})

In [38]:
model = FastLanguageModel.for_inference(model)

In [39]:
df_test = df_s[df_s.split == "TEST"].reset_index(drop=True)

In [40]:
human_msg_l = []
assistant_msg_l = []

for _, row in df_test.iterrows():
        
        prompt = instruction.replace("<comic_title>", row['comics_title']).replace("<speaker_id>", row['speaker_id']).replace("<utterance>", row['utterance'])
        
        human_msg = {'role': 'user', 'content': prompt}
        
        #obj = {"list_emotion_classes": row['emotion_u']}
        obj = row['emotion_u']
        assistant_msg = {'role': 'assistant', 'content': ""}
        
        human_msg_l.append(human_msg)
        assistant_msg_l.append(assistant_msg)
        
        

In [41]:
test_messages = []

for i in range(len(human_msg_l)):
    
    #obj = {"list_emotion_classes": ["Anger", "Fear"]}

    test_messages.append([human_msg_l[i], assistant_msg_l[i]])

In [42]:
len(test_messages)

1101

In [43]:
# test_messages = test_messages[:100]

In [44]:
raw_outputs = []

for message in tqdm(test_messages):
    
    inputs = tokenizer.apply_chat_template(
    message,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")
    #print(inputs)
    #break
    
    output = model.generate(input_ids=inputs, max_new_tokens=128)[0]
    
    input_length = inputs.shape[1]
    generated_tokens = output[input_length:]
    
    decoded_output = tokenizer.decode(generated_tokens, skip_special_tokens=True)  
    #decoded_output = tokenizer.decode(output, skip_special_tokens=True)
    raw_outputs.append(decoded_output)
    #break

  0%|          | 0/1101 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
100%|██████████| 1101/1101 [10:02<00:00,  1.83it/s]


In [45]:
#print(tokenizer.decode(inputs[0]))

In [46]:
len(raw_outputs)

1101

In [47]:
raw_outputs

['{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Anger\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Surprise\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Anger\', \'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\', \'Joy\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classe

In [48]:
grounds = df_test.emotion_u.tolist()

In [49]:
len(grounds)

1101

In [50]:
predictions = [e.split('\n\n')[0] for e in raw_outputs]
#predictions = [json.loads(e) for e in raw_outputs]

In [51]:
len(predictions)

1101

In [52]:
predictions

['{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Anger\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Surprise\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Neutral\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Sadness\']}',
 '{"list_emotion_classes": [\'Fear\', \'Sadness\']}',
 '{"list_emotion_classes": [\'Anger\', \'Fear\', \'Surprise\']}',
 '{"list_emotion_classes": [\'Sadness\', \'Joy\']}',
 '{"list_emotion_classes": [\'Fear\', \'Surprise\']}',
 '{"list_emotion_classe

In [53]:
predictions = [json_repair.loads(e) for e in predictions]

In [54]:
predictions

[{'list_emotion_classes': ['Fear', 'Sadness']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Anger', 'Surprise']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Surprise']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Neutral']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Fear', 'Sadness']},
 {'list_emotion_classes': ['Anger', 'Fear', 'Surprise']},
 {'list_emotion_classes': ['Sadness', 'Joy']},
 {'list_emotion_classes': ['Fear', 'Surprise']},
 {'list_emotion_classes': ['Surprise', 'Joy']},
 {'list_emotion_classes': ['Sadness']},
 {'list_emotion_classes': ['Fear'

In [55]:
preds_l = []
bad_idx = []

for i, pred in enumerate(predictions):
    try:
        
        preds_l.append(pred['list_emotion_classes'])
    except:
        print(i)
        bad_idx.append(i)

In [56]:
grounds = [item for i, item in enumerate(grounds) if i not in bad_idx]

In [57]:
len(grounds), len(preds_l)

(1101, 1101)

In [58]:
preds_l

[['Fear', 'Sadness'],
 ['Fear', 'Surprise'],
 ['Sadness'],
 ['Anger', 'Surprise'],
 ['Neutral'],
 ['Neutral'],
 ['Surprise'],
 ['Neutral'],
 ['Neutral'],
 ['Neutral'],
 ['Neutral'],
 ['Neutral'],
 ['Neutral'],
 ['Fear', 'Surprise'],
 ['Sadness'],
 ['Sadness'],
 ['Fear', 'Sadness'],
 ['Anger', 'Fear', 'Surprise'],
 ['Sadness', 'Joy'],
 ['Fear', 'Surprise'],
 ['Surprise', 'Joy'],
 ['Sadness'],
 ['Fear', 'Surprise'],
 ['Neutral'],
 ['Surprise', 'Joy'],
 ['Anger'],
 ['Anger', 'Surprise'],
 ['Anger', 'Disgust'],
 ['Fear', 'Surprise'],
 ['Neutral'],
 ['Anger', 'Fear', 'Surprise'],
 ['Neutral'],
 ['Neutral'],
 ['Anger', 'Fear'],
 ['Neutral'],
 ['Neutral'],
 ['Neutral'],
 ['Fear', 'Sadness'],
 ['Anger', 'Fear'],
 ['Neutral'],
 ['Fear'],
 ['Neutral'],
 ['Fear', 'Sadness'],
 ['Neutral'],
 ['Anger', 'Disgust', 'Sadness'],
 ['Anger', 'Disgust', 'Surprise'],
 ['Anger'],
 ['Fear', 'Sadness'],
 ['Neutral'],
 ['Fear', 'Surprise'],
 ['Anger', 'Fear', 'Surprise'],
 ['Neutral'],
 ['Neutral'],
 ['Sadness'

In [59]:
mlb = MultiLabelBinarizer()

In [60]:
y_true_mhot = mlb.fit_transform(grounds)
y_pred_mhot = mlb.transform(preds_l)

In [61]:
y_pred_mhot.shape

(1101, 7)

In [62]:
y_pred_mhot.shape

(1101, 7)

In [63]:
print(classification_report(y_true_mhot, y_pred_mhot, target_names=mlb.classes_, digits=3))

              precision    recall  f1-score   support

       Anger      0.534     0.476     0.503       399
     Disgust      0.123     0.319     0.178        47
        Fear      0.401     0.540     0.460       235
         Joy      0.698     0.303     0.423       244
     Neutral      0.207     0.444     0.283        63
     Sadness      0.522     0.522     0.522       314
    Surprise      0.544     0.599     0.570       279

   micro avg      0.462     0.484     0.473      1581
   macro avg      0.433     0.458     0.420      1581
weighted avg      0.514     0.484     0.482      1581
 samples avg      0.482     0.489     0.463      1581

