In [1]:
import os
import re
import json
import torch

import pickle

import numpy as np
import pandas as pd


from tqdm.notebook import tqdm
from pathlib import Path
from sklearn.metrics import classification_report
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
model_id = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [3]:
tokenizer.pad_token = tokenizer.eos_token

In [4]:
def make_prompt(utterance):

    conversation = [
        {"role": "system", "content": "### Task description: You are an expert sentiment analysis assistant that takes an utterance from a comic book and must classify the utterance into appropriate emotion class(s): anger, surprise, fear, disgust, sadness, joy, neutral. You must absolutely not generate any text or explanation other than the following JSON format: {\"utterance_emotion\": \"<predicted emotion classes for the utterance (str)>}\"\n\n"},
        {"role":"user", "content": f"# Utterance:\n{utterance}\n\n# Result:\n"}
    ]

    return conversation

### Read data files

In [5]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/zeroshot/datasets/comics_data_processed.csv")
df = df.drop(columns=[df.columns[0], df.columns[1]])

In [6]:
emotion_map = {
    'AN': 'anger',
    'DI': 'disgust',
    'FE': 'fear',
    'SA': 'sadness',
    'SU': 'surprise',
    'JO': 'joy'
}
labels = ["anger", "surprise", "fear", "disgust", "sadness", "joy", "neutral"]

In [7]:
def extract_emotions(row):

    emotion_str = row.emotion

    if emotion_str == 'Neutral':
        return ['neutral']

    emotions = emotion_str.split('-')
    tags = []

    for emotion in emotions:
        abbrev = emotion[:2]  # Get the abbreviation
        value_part = emotion[2:]  # Get the value part
        
        if abbrev in emotion_map and value_part.isdigit():
            value = int(value_part)
            if value > 0:
                tags.append(emotion_map[abbrev].lower())
        else:
            print(f"Warning: Skipping invalid emotion entry: '{emotion}'")
    return tags  

In [8]:
df['emotions_list'] = df.apply(lambda row: extract_emotions(row), axis=1)

In [9]:
texts = df.utterance.tolist()
texts = [make_prompt(text) for text in texts]

In [10]:
inputs = tokenizer.apply_chat_template(
            texts,
            #tools=tools,
            # pad_token = tokenizer.eos_token,
            padding=True,
            truncation=True,
            add_generation_prompt=True,
            return_dict=True,
            return_tensors="pt",
)

In [11]:
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
def batch_tensor(tensor, batch_size):
    return [tensor[i:i+batch_size] for i in range(0, tensor.size(0), batch_size)]

In [13]:
BATCH_SIZE = 128

In [14]:
input_ids_batches = batch_tensor(inputs['input_ids'], BATCH_SIZE)
attention_mask_batches = batch_tensor(inputs['attention_mask'], BATCH_SIZE)

In [15]:
generated_outputs = []

In [16]:
for i, (input_ids_batch, attention_mask_batch) in tqdm(enumerate(zip(input_ids_batches, attention_mask_batches))):
    
    print(f"Processing batch {i + 1}")
    
    # Move tensors to model device
    inputs = {
        'input_ids': input_ids_batch.to(model.device),
        'attention_mask': attention_mask_batch.to(model.device)
    }
    
    # Generate output using model.generate
    generated = model.generate(**inputs, max_new_tokens=32)
    
    # Store the generated output
    generated_outputs.append(generated)

0it [00:00, ?it/s]

Processing batch 1


You are not running the flash-attention implementation, expect numerical differences.


Processing batch 2
Processing batch 3
Processing batch 4
Processing batch 5
Processing batch 6
Processing batch 7
Processing batch 8
Processing batch 9
Processing batch 10
Processing batch 11
Processing batch 12
Processing batch 13
Processing batch 14
Processing batch 15
Processing batch 16
Processing batch 17
Processing batch 18
Processing batch 19
Processing batch 20
Processing batch 21
Processing batch 22
Processing batch 23
Processing batch 24
Processing batch 25
Processing batch 26
Processing batch 27
Processing batch 28
Processing batch 29
Processing batch 30
Processing batch 31
Processing batch 32
Processing batch 33
Processing batch 34
Processing batch 35
Processing batch 36
Processing batch 37
Processing batch 38
Processing batch 39
Processing batch 40
Processing batch 41
Processing batch 42


In [17]:
decoded_outputs = []

for batch in generated_outputs:

    for prediction in batch:

        decoded_outputs.append(tokenizer.decode(prediction, skip_special_tokens=True))

In [18]:
len(decoded_outputs)

5282

In [19]:
decoded_outputs

['### Task description: You are an expert sentiment analysis assistant that takes an utterance from a comic book and must classify the utterance into appropriate emotion class(s): anger, surprise, fear, disgust, sadness, joy, neutral. You must absolutely not generate any text or explanation other than the following JSON format: {"utterance_emotion": "<predicted emotion classes for the utterance (str)>}"\n\n # Utterance:\nDID YOU HAVE TO ELECTROCUTE HER SO HARD?\n\n# Result:\n {"utterance_emotion": "anger"}',
 '### Task description: You are an expert sentiment analysis assistant that takes an utterance from a comic book and must classify the utterance into appropriate emotion class(s): anger, surprise, fear, disgust, sadness, joy, neutral. You must absolutely not generate any text or explanation other than the following JSON format: {"utterance_emotion": "<predicted emotion classes for the utterance (str)>}"\n\n # Utterance:\nIT\'S NOT LIKE I HAVE DIFFERENT SETTINGS.\n\n# Result:\n {"ut

In [20]:
x = []

for decoded_ouput in decoded_outputs:
    x.append(decoded_ouput.split("Result:\n")[1])

In [27]:
z

['anger',
 'anger',
 'anger',
 'surprise',
 'anger',
 'surprise',
 'anger',
 'surprise',
 'neutral',
 'surprise',
 'neutral',
 'surprise',
 'neutral',
 'anger',
 'joy',
 'anger',
 'neutral',
 'sadness',
 'neutral',
 'anger',
 'anger',
 'joy',
 'neutral',
 'neutral',
 'neutral',
 'joy',
 'neutral',
 'neutral',
 'anger',
 'anger',
 'neutral',
 'surprise',
 'neutral',
 'surprise',
 'fear',
 'anger',
 'surprise',
 'surprise',
 'anger',
 'anger',
 'joy',
 'sadness',
 'surprise',
 'anger',
 'neutral',
 'surprise',
 'surprise',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'surprise',
 'anger',
 'anger',
 'neutral',
 'joy',
 'joy',
 'anger',
 'anger',
 'surprise',
 'neutral',
 'anger',
 'anger',
 'neutral',
 'anger',
 'joy',
 'joy',
 'trust',
 'joy',
 'anger',
 'sadness',
 'neutral',
 'anger',
 'sadness',
 'surprise',
 'anger',
 'joy',
 'joy',
 'joy',
 'joy',
 'fear',
 'surprise',
 'neutral',
 'anger',
 'anger',
 'joy',
 'anger',
 'neutral',
 'anger',
 'joy',
 'surprise',
 'anger',
 'anger

In [21]:
z = []

for y in x:

    match = re.search(r'(\{.*?\})', y)

    if match:

        json_str = match.group(1)  # Extract the JSON object part
        try:
            # Parse the JSON string
            parsed_json = json.loads(json_str)
            
            # Extract the 'utterance_emotion' value
            utterance_emotion = parsed_json.get('utterance_emotion')
            z.append(utterance_emotion)
            #print("Extracted utterance_emotion:", utterance_emotion)
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")

In [22]:
preds_l = [[emotion] for emotion in z]

In [23]:
grounds = df.emotions_list.tolist()

In [24]:
all_labels = ["anger", "surprise", "fear", "disgust", "sadness", "joy", "neutral"]

def labels_to_binary_matrix(label_list, all_labels):
    binary_matrix = np.zeros((len(label_list), len(all_labels)))
    for i, labels in enumerate(label_list):
        for label in labels:
            if label in all_labels:
                binary_matrix[i][all_labels.index(label)] = 1
    return binary_matrix

def opposite(component_type):

    if component_type == "anger":
        return "surprise"
    elif component_type == "disgust":
        return "joy"
    elif component_type == "fear":
        return "sadness"
    elif component_type == "sadness":
        return "anger"
    elif component_type == "surprise":
        return "disgust"
    elif component_type == "joy":
        return "fear"
    elif component_type == "Neutral":
        return "sadness"
    

def harmonize_preds(grounds, preds):

    l1, l2 = len(preds), len(grounds)
    if l1 < l2:
        diff = l2 - l1
        preds = preds + [opposite(x) for x in grounds[l1:]]
    else:
        preds = preds[:l2]
        
    return preds 

def post_process_zs(grounds, preds):

    for i,(x,y) in enumerate(zip(grounds, preds)):
        
        if len(x) != len(y):
            
            preds[i] = harmonize_preds(x, y)

    true_matrix = labels_to_binary_matrix(grounds, all_labels)
    predicted_matrix = labels_to_binary_matrix(preds, all_labels)

    return true_matrix, predicted_matrix

In [25]:
grounds_matrix, preds_matrix = post_process_zs(grounds, preds_l)

In [26]:
classification_report(grounds_matrix, preds_matrix, target_names=all_labels, digits=3)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


'              precision    recall  f1-score   support\n\n       anger      0.508     0.581     0.542      1791\n    surprise      0.598     0.302     0.401      1590\n        fear      0.243     0.104     0.146      1373\n     disgust      0.061     0.190     0.092       311\n     sadness      0.385     0.198     0.261      1238\n         joy      0.409     0.496     0.448      1104\n     neutral      0.138     0.417     0.207       343\n\n   micro avg      0.358     0.343     0.350      7750\n   macro avg      0.334     0.327     0.300      7750\nweighted avg      0.411     0.343     0.352      7750\n samples avg      0.367     0.355     0.359      7750\n'

In [28]:
results_file = Path("/Utilisateurs/umushtaq/emotion_analysis_comics/zeroshot/results/zs_Phi-3-mini-4k-instruct") / "results.pickle"
results_file.parent.mkdir(parents=True, exist_ok=True)

with results_file.open('wb') as fh:
    results_d = {"ground_truths": grounds,
                 "predictions": preds_l    
        
    }
    pickle.dump(results_d, fh)

In [29]:
classification_file = Path("/Utilisateurs/umushtaq/emotion_analysis_comics/zeroshot/results/zs_Phi-3-mini-4k-instruct") / "classification_report.pickle"

with classification_file.open('wb') as fh:
    
    pickle.dump(classification_report(grounds_matrix, preds_matrix, target_names=all_labels, output_dict=True), fh)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
