# Zero Shot Classification on Comics dataset

In [31]:
import os
import json
import torch
import pickle

import pandas as pd

from tqdm.notebook import tqdm
from pathlib import Path
from sklearn.metrics import classification_report
from transformers import AutoModelForCausalLM, AutoTokenizer

In [32]:
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

In [33]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear(in_features=3072, out_features=9216, bias=False)
          (rotary_emb): Phi3RotaryEmbedding()
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
        (post_attention_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
      )
    )
    (norm): Phi3RMSNorm((3072,), eps=1e-05)
  )
 

In [35]:
# def generate_prompt(text, labels):
#     prompt = f"Classify the following text into one of these categories. Restrict your answer to only one token. Do not generate any tokens other than the label: {', '.join(labels)}.\n\nText: {text}\n\nCategory:"
#     return prompt


In [36]:
def generate_prompt(text, labels):
    prompt = f"You are an expert in emotion analysis. You are given a text from a comics book. ### Here is the text: {text}\n\nYou must classify the emotion of this text into one of the following emotions classes: {labels}. You must return only a single token to fill in the emotion label of the text. Answer:"
    return prompt


In [37]:
# def generate_prompt(text, labels):
#     prompt = f"""You are an expert in emotion analysis. You are given a text from a comics book.\n\n### Here is the text: {text}\n\nYou must classify the emotion of this text into one or more of the following emotion labels: {labels}. Do not generate any explanation after the answer. ###Answer:###"""
#     return prompt

In [38]:
# def generate_prompt(text, labels):
#     prompt = (
#         f"You are an expert in emotion analysis. Below is a passage from a comic book.\n\n"
#         f"### Text:\n{text}\n\n"
#         f"Classify the emotion(s) of the text using one of the following labels: anger, surprise, fear, disgust, sadness, joy, neutral. Provide only the labels.\n\n"
#         f"### Answer:"
#     )
#     return prompt


In [39]:
text_to_classify = "The stock market saw a sharp rise today."
labels = ["anger", "surprise", "fear", "disgust", "sadness", "joy", "neutral"]

In [40]:
generate_prompt(text_to_classify, labels)

"You are an expert in emotion analysis. You are given a text from a comics book. ### Here is the text: The stock market saw a sharp rise today.\n\nYou must classify the emotion of this text into one of the following emotions classes: ['anger', 'surprise', 'fear', 'disgust', 'sadness', 'joy', 'neutral']. You must return only a single token to fill in the emotion label of the text. Answer:"

In [41]:
def classify_text(text, labels):
    # Create the prompt
    prompt = generate_prompt(text, labels)
    
    # Tokenize and encode the prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate output
    outputs = model.generate(**inputs, max_new_tokens=1, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, no_repeat_ngram_size=2)
    
    # Decode the generated output
    classification = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract the predicted category (after the prompt)
    #predicted_category = classification.split("Answer:")[-1].strip()
    return classification
    #return predicted_category

In [42]:
# text_to_classify = "The stock market saw a sharp rise today."
# labels = ["anger", "surprise", "fear", "disgust", "sadness", "joy", "neutral"]

In [43]:
#predicted_category = classify_text(text_to_classify, labels)

In [44]:
#print(f"Predicted category: {predicted_category}")

### Read the data sets

In [45]:
df = pd.read_csv("/Utilisateurs/umushtaq/emotion_analysis_comics/zeroshot/datasets/comics_data_processed.csv")

In [46]:
df = df.drop(columns=[df.columns[0], df.columns[1]])

In [47]:
emotion_map = {
    'AN': 'anger',
    'DI': 'disgust',
    'FE': 'fear',
    'SA': 'sadness',
    'SU': 'surprise',
    'JO': 'joy'
}

In [48]:
def extract_emotions(row):

    emotion_str = row.emotion

    if emotion_str == 'Neutral':
        return ['neutral']

    emotions = emotion_str.split('-')
    tags = []

    for emotion in emotions:
        abbrev = emotion[:2]  # Get the abbreviation
        value_part = emotion[2:]  # Get the value part
        
        if abbrev in emotion_map and value_part.isdigit():
            value = int(value_part)
            if value > 0:
                tags.append(emotion_map[abbrev].lower())
        else:
            print(f"Warning: Skipping invalid emotion entry: '{emotion}'")
    return tags  

In [49]:
df['emotions_list'] = df.apply(lambda row: extract_emotions(row), axis=1)

In [50]:
df = df[df['emotions_list'].apply(lambda x: len(x) == 1)].reset_index()

In [51]:
texts = df.utterance.tolist()

In [52]:
labels = ["anger", "surprise", "fear", "disgust", "sadness", "joy", "neutral"]

In [53]:
predictions = []

for i in tqdm(range(len(texts))):
    predicted_category = classify_text(texts[i], labels)
    predictions.append(predicted_category)

  0%|          | 0/3111 [00:00<?, ?it/s]

You are not running the flash-attention implementation, expect numerical differences.


In [54]:
preds = []

for prediction in predictions:
    preds.append(prediction.split(" ")[-1])

In [55]:
grounds = [x for [x] in df.emotions_list.tolist()]

In [56]:
for idx, (ground, pred) in enumerate(zip(grounds, preds)):

    if pred not in labels:
        del grounds[idx]
        del preds[idx]

In [57]:
print(classification_report(grounds, preds))

              precision    recall  f1-score   support

           '       0.00      0.00      0.00         0
    Answer:
       0.00      0.00      0.00         0
         The       0.00      0.00      0.00         0
       anger       0.63      0.27      0.38       601
         dis       0.00      0.00      0.00         0
     disgust       0.00      0.00      0.00        14
        fear       0.25      0.18      0.21       245
         joy       0.43      0.71      0.53       612
     neutral       0.22      0.33      0.26       271
         sad       0.00      0.00      0.00         0
     sadness       0.00      0.00      0.00       314
    surprise       0.39      0.36      0.38       338

    accuracy                           0.36      2395
   macro avg       0.16      0.15      0.15      2395
weighted avg       0.37      0.36      0.34      2395



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
