In [1]:
import os
import ast
import sys
import json
import torch
import pickle
import subprocess

sys.path.append('../')

import pandas as pd

from pathlib import Path
from tqdm import tqdm
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
from sklearn.metrics import classification_report
#from utils.post_processing import post_process


In [2]:
try:    
    assert torch.cuda.is_available() is True
    
except AssertionError:
    
    print("Please set up a GPU before using LLaMA Factory...")

In [3]:
CURRENT_DIR = Path.cwd()
#FT_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning"
FT_DIR = CURRENT_DIR / "emotion_analysis_comics" / "comics_FT"
#DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning" / "datasets"
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "comics_FT" / "datasets"

ERC_DIR = FT_DIR.parent
LLAMA_FACTORY_DIR = ERC_DIR / "LLaMA-Factory"

BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
LOGGING_DIR = FT_DIR / "training_logs"
OUTPUT_DIR = FT_DIR / "saved_models" / f"""comics35_pg_uni_{BASE_MODEL.split("/")[1]}"""

In [4]:
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "comics_FT" / "datasets"

In [5]:
train_dataset_name = f"""comics35_utterance_pg_unilabel_train.json"""
test_dataset_name = f"""comics35_utterance_pg_unilabel_test.json"""

train_dataset_file = DATASET_DIR / train_dataset_name
test_dataset_file = DATASET_DIR / test_dataset_name

In [6]:
train_dataset_file

PosixPath('/Utilisateurs/umushtaq/emotion_analysis_comics/comics_FT/datasets/comics35_utterance_pg_unilabel_train.json')

In [7]:
if not os.path.exists(os.path.join(FT_DIR, "model_args")):
    os.mkdir(os.path.join(FT_DIR, "model_args"))

train_file = FT_DIR / "model_args" / f"""{train_dataset_name.split(".")[0].split("train")[0]}{BASE_MODEL.split("/")[1]}.json"""

In [8]:
dataset_info_line =  {
  "file_name": f"{train_dataset_file}",
  "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output"
  }
}

In [9]:
with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "r") as jsonFile:
    data = json.load(jsonFile)

data["comics"] = dataset_info_line

with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "w") as jsonFile:
    json.dump(data, jsonFile)

In [10]:
NB_EPOCHS = 20

In [11]:
args = dict(
    
  stage="sft",                           # do supervised fine-tuning
  do_train=True,

  model_name_or_path=BASE_MODEL,         # use bnb-4bit-quantized Llama-3-8B-Instruct model
  num_train_epochs=NB_EPOCHS,            # the epochs of training
  output_dir=str(OUTPUT_DIR),                 # the path to save LoRA adapters
  overwrite_output_dir=True,             # overrides existing output contents

  dataset="comics",                      # dataset name
  template="llama3",                     # use llama3 prompt template
  #train_on_prompt=True,
  val_size=0.1,
  max_samples=10000,                       # use 500 examples in each dataset

  finetuning_type="lora",                # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  per_device_train_batch_size=8,         # the batch size
  gradient_accumulation_steps=4,         # the gradient accumulation steps
  lr_scheduler_type="linear",            # use cosine learning rate scheduler
  loraplus_lr_ratio=32.0,                # use LoRA+ algorithm with lambda=16.0
  #temperature=0.5,
  
  warmup_ratio=0.1,                      # use warmup scheduler    
  learning_rate=5e-5,                    # the learning rate
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  
  fp16=True,                             # use float16 mixed precision training
  quantization_bit=4,                    # use 4-bit QLoRA  
  #use_liger_kernel=True,
  #quantization_device_map="auto",
  
  #load_best_model_at_end=True,
  #metric_for_best_model="eval_loss",
  #save_strategy="epoch",
  #eval_strategy="epoch",
  logging_steps=10,                      # log every 10 steps
  save_steps=5000,                       # save checkpoint every 1000 steps    
  logging_dir=str(LOGGING_DIR),
  
  #use_unsloth=True,
  report_to="tensorboard"                       # discards wandb

)

In [12]:
json.dump(args, open(train_file, "w", encoding="utf-8"), indent=2)

In [13]:
p = subprocess.Popen(["llamafactory-cli", "train", train_file], cwd=LLAMA_FACTORY_DIR)

In [14]:
p.wait()

02/25/2025 10:39:59 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.float16


[INFO|configuration_utils.py:672] 2025-02-25 10:40:00,007 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/fd5a4dc328319c1cfe9489eccfb9c6406bdfd469/config.json
[INFO|configuration_utils.py:739] 2025-02-25 10:40:00,008 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bn

02/25/2025 10:40:01 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
02/25/2025 10:40:01 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/comics_FT/datasets/comics35_utterance_pg_unilabel_train.json...


Generating train split: 718 examples [00:00, 7867.74 examples/s]
Converting format of dataset: 100%|██████████| 718/718 [00:00<00:00, 7491.17 examples/s]
Running tokenizer on dataset: 100%|██████████| 718/718 [00:01<00:00, 449.29 examples/s]


training example:
input_ids:
[128000, 128006, 882, 128007, 271, 14711, 5867, 6082, 18825, 33257, 15766, 271, 2675, 527, 459, 11084, 20356, 6492, 6335, 58394, 304, 20303, 2363, 21976, 23692, 13, 4718, 3465, 374, 311, 24564, 22256, 3095, 323, 10765, 872, 14604, 2262, 382, 30521, 512, 12, 1472, 690, 5371, 264, 1160, 315, 22256, 3095, 505, 264, 2199, 304, 264, 20303, 2363, 198, 12, 9062, 22256, 685, 1253, 3237, 27785, 25002, 21958, 271, 66913, 512, 16, 13, 10852, 3725, 24564, 279, 14604, 2317, 323, 16630, 315, 1855, 22256, 685, 304, 279, 2199, 198, 17, 13, 65647, 8581, 20356, 505, 279, 2768, 6989, 512, 256, 330, 4091, 498, 330, 4338, 70, 592, 498, 330, 69, 686, 498, 330, 83214, 2136, 498, 330, 20370, 9868, 498, 330, 4215, 498, 330, 60668, 702, 18, 13, 1789, 1855, 22256, 685, 304, 264, 20303, 2199, 11, 10765, 279, 20356, 3118, 323, 471, 459, 1358, 315, 20356, 18893, 304, 2015, 382, 93016, 50, 512, 16, 13, 5560, 27785, 279, 9382, 10212, 3485, 198, 17, 13, 9442, 28832, 7354, 264, 832, 8614, 1

[INFO|configuration_utils.py:672] 2025-02-25 10:40:03,935 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/fd5a4dc328319c1cfe9489eccfb9c6406bdfd469/config.json
[INFO|configuration_utils.py:739] 2025-02-25 10:40:03,937 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bn

02/25/2025 10:40:07 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
02/25/2025 10:40:07 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
02/25/2025 10:40:07 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
02/25/2025 10:40:07 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
02/25/2025 10:40:07 - INFO - llamafactory.model.model_utils.misc - Found linear modules: o_proj,q_proj,k_proj,up_proj,gate_proj,v_proj,down_proj
02/25/2025 10:40:08 - INFO - llamafactory.model.loader - trainable params: 20,971,520 || all params: 8,051,232,768 || trainable%: 0.2605


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
[INFO|trainer.py:667] 2025-02-25 10:40:08,405 >> Using auto half precision backend


02/25/2025 10:40:08 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 32.00.


[INFO|trainer.py:2243] 2025-02-25 10:40:09,764 >> ***** Running training *****
[INFO|trainer.py:2244] 2025-02-25 10:40:09,764 >>   Num examples = 646
[INFO|trainer.py:2245] 2025-02-25 10:40:09,764 >>   Num Epochs = 20
[INFO|trainer.py:2246] 2025-02-25 10:40:09,764 >>   Instantaneous batch size per device = 8
[INFO|trainer.py:2249] 2025-02-25 10:40:09,764 >>   Total train batch size (w. parallel, distributed & accumulation) = 32
[INFO|trainer.py:2250] 2025-02-25 10:40:09,764 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:2251] 2025-02-25 10:40:09,764 >>   Total optimization steps = 400
[INFO|trainer.py:2252] 2025-02-25 10:40:09,769 >>   Number of trainable parameters = 20,971,520
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
100%|██████████| 400/400 [51:14<00:00,  7.68s/it][INFO|trainer.py:3705] 2025-02-25 11:31:24,727 >> Saving model checkpoint to /Utilisateurs/umushtaq/emotion_analysis_comics/co

{'loss': 0.8569, 'grad_norm': 0.7068783640861511, 'learning_rate': 1.25e-05, 'epoch': 0.49}
{'loss': 0.3623, 'grad_norm': 0.4566987156867981, 'learning_rate': 2.5e-05, 'epoch': 0.99}
{'loss': 0.2757, 'grad_norm': 0.5632724761962891, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.48}
{'loss': 0.2792, 'grad_norm': 1.0834013223648071, 'learning_rate': 5e-05, 'epoch': 1.98}
{'loss': 0.2342, 'grad_norm': 0.8257381916046143, 'learning_rate': 4.8611111111111115e-05, 'epoch': 2.47}
{'loss': 0.2094, 'grad_norm': 0.6704150438308716, 'learning_rate': 4.722222222222222e-05, 'epoch': 2.96}
{'loss': 0.1519, 'grad_norm': 0.7432125210762024, 'learning_rate': 4.5833333333333334e-05, 'epoch': 3.46}
{'loss': 0.1238, 'grad_norm': 0.7792866826057434, 'learning_rate': 4.4444444444444447e-05, 'epoch': 3.95}
{'loss': 0.0729, 'grad_norm': 1.692604422569275, 'learning_rate': 4.305555555555556e-05, 'epoch': 4.44}
{'loss': 0.0529, 'grad_norm': 0.754977822303772, 'learning_rate': 4.166666666666667e-05, 'epoch

0

In [15]:
args = dict(
  model_name_or_path=BASE_MODEL, # use bnb-4bit-quantized Llama-3-8B-Instruct model
  adapter_name_or_path=str(OUTPUT_DIR),            # load the saved LoRA adapters
  template="llama3",                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  quantization_bit=4,                    # load 4-bit quantized model
)

In [16]:
model = ChatModel(args)

[INFO|configuration_utils.py:672] 2025-02-25 11:31:38,424 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/fd5a4dc328319c1cfe9489eccfb9c6406bdfd469/config.json
[INFO|configuration_utils.py:739] 2025-02-25 11:31:38,426 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bn

02/25/2025 11:31:42 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>


[INFO|configuration_utils.py:672] 2025-02-25 11:31:42,810 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/fd5a4dc328319c1cfe9489eccfb9c6406bdfd469/config.json
[INFO|configuration_utils.py:739] 2025-02-25 11:31:42,811 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bn

02/25/2025 11:31:42 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
02/25/2025 11:31:42 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.


[INFO|modeling_utils.py:3726] 2025-02-25 11:31:48,020 >> loading weights file model.safetensors from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/fd5a4dc328319c1cfe9489eccfb9c6406bdfd469/model.safetensors
[INFO|modeling_utils.py:1622] 2025-02-25 11:31:48,072 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
[INFO|configuration_utils.py:1099] 2025-02-25 11:31:48,078 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "pad_token_id": 128255
}

[INFO|quantizer_bnb_4bit.py:122] 2025-02-25 11:31:48,225 >> target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization
[INFO|modeling_utils.py:4568] 2025-02-25 11:32:01,098 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4576] 2025-02-25 11:32:01,099 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at u

02/25/2025 11:32:01 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
02/25/2025 11:32:01 - INFO - llamafactory.model.adapter - Loaded adapter(s): /Utilisateurs/umushtaq/emotion_analysis_comics/comics_FT/saved_models/comics35_pg_uni_llama-3-8b-Instruct-bnb-4bit
02/25/2025 11:32:02 - INFO - llamafactory.model.loader - all params: 8,051,232,768




In [17]:
model.engine

<llamafactory.chat.hf_engine.HuggingfaceEngine at 0x7fe97f1a4a90>

In [18]:
model.engine_type

'huggingface'

In [19]:
with open(test_dataset_file, "r+") as fh:
    test_dataset = json.load(fh)

test_prompts = []
test_grounds = []

for sample in test_dataset:
    test_prompts.append(sample["instruction"] + sample["input"])
    #test_prompts.append(sample["input"])
    test_grounds.append(sample["output"])

In [20]:
test_predictions = []

In [21]:
messages = []

for prompt in tqdm(test_prompts, desc="Running inferences ..."):
    #print(type(prompt))
    #messages.append({"role": "user", "content": prompt})
    message = [{"role": "user", "content": prompt}]
    test_predictions.append(model.chat(message))
    #test_predictions.append(model.chat(prompt))

Running inferences ...:   0%|          | 0/156 [00:00<?, ?it/s]

Running inferences ...: 100%|██████████| 156/156 [07:10<00:00,  2.76s/it]


In [22]:
# batch_size = 1  # Adjust batch size according to memory and model limitations

# # Initialize list to hold predictions
# test_predictions = []

# # Iterate over test_prompts in batches
# for i in tqdm(range(0, len(test_prompts), batch_size), desc="Running inferences ..."):
#     # Create a batch of messages
#     batch_prompts = test_prompts[i:i + batch_size]
    
#     # Prepare the messages for the batch
#     batch_messages = [[{"role": "user", "content": prompt}] for prompt in batch_prompts]
    
#     # Perform inference on the batch and store predictions
#     for message in batch_messages:
#         prediction = model.chat(message)
#         test_predictions.append(prediction)  # Collect batch predictions

In [23]:
len(test_predictions)

156

In [49]:
test_predictions

[[Response(response_text='{"emotions": ["Surprise", "Surprise", "Neutral", "Neutral", "Neutral", "Joy", "Surprise", "Sadness", "Neutral", "Neutral", "Neutral"]}', response_length=42, prompt_length=701, finish_reason='stop')],
 [Response(response_text='{"emotions": ["Sadness", "Sadness", "Anger", "Anger", "Sadness", "Sadness", "Sadness"]}', response_length=33, prompt_length=993, finish_reason='stop')],
 [Response(response_text='{"emotions": ["Anger", "Anger", "Anger", "Surprise", "Anger", "Anger", "Anger", "Anger", "Anger", "Joy", "Anger", "Anger", "Anger", "Anger", "Anger", "Anger"]}', response_length=68, prompt_length=1143, finish_reason='stop')],
 [Response(response_text='{"emotions": ["Anger", "Anger", "Anger", "Sadness", "Sadness", "Anger", "Anger", "Anger", "Sadness", "Anger", "Anger", "Anger", "Sadness", "Anger", "Anger", "Anger", "Anger", "Joy", "Anger", "Anger"]}', response_length=84, prompt_length=1099, finish_reason='stop')],
 [Response(response_text='{"emotions": ["Neutral",

In [50]:
processed_preds = []
bad_idx = []

for i, raw_pred in enumerate(test_predictions):
    try:
        x = json.loads(raw_pred[0].response_text)["emotions"]
        processed_preds.append(x)
    except:
        print(i)
        bad_idx.append(i)

In [51]:
# bad_idx.sort(reverse=True)

# # Remove elements from 'grounds' at the specified indices
# for idx in bad_idx:
    
#     #del processed_grounds[idx]
#     del processed_preds[idx]

In [52]:
len(processed_preds)

156

In [53]:
processed_grounds = []

for ground in test_grounds:
    x = json.loads(ground)["emotions"]
    processed_grounds.append(x)

In [54]:
len(processed_grounds)

156

In [55]:
bad_idx

[]

In [56]:
processed_grounds = [item for i, item in enumerate(processed_grounds) if i not in bad_idx]

In [57]:
processed_grounds

[['Joy',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Neutral',
  'Neutral'],
 ['Neutral', 'Neutral', 'Anger', 'Anger', 'Neutral', 'Sadness', 'Sadness'],
 ['Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Surprise',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Joy',
  'Anger',
  'Anger',
  'Joy',
  'Sadness',
  'Sadness',
  'Surprise'],
 ['Anger',
  'Anger',
  'Anger',
  'Fear',
  'Surprise',
  'Sadness',
  'Sadness',
  'Fear',
  'Sadness',
  'Sadness',
  'Joy',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Joy',
  'Joy',
  'Surprise',
  'Joy',
  'Anger',
  'Anger'],
 ['Neutral', 'Joy', 'Joy'],
 ['Neutral',
  'Fear',
  'Neutral',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Fear',
  'Joy',
  'Neutral',
  'Fear',
  'Joy',
  'Neutral',
  'Joy',
  'Joy',
  'Neutral',
  'Joy',
  'Anger',
  'Anger',
  'Surprise'],
 ['Anger',
  'Anger',
  'Sadness',
  'Surprise',
  'Anger',
  'Neutral',
  'Joy',
  'Joy',
  'Joy',
  'Surprise',
  'Surprise',
  'Surprise'

In [58]:
processed_preds

[['Surprise',
  'Surprise',
  'Neutral',
  'Neutral',
  'Neutral',
  'Joy',
  'Surprise',
  'Sadness',
  'Neutral',
  'Neutral',
  'Neutral'],
 ['Sadness', 'Sadness', 'Anger', 'Anger', 'Sadness', 'Sadness', 'Sadness'],
 ['Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Joy',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger'],
 ['Anger',
  'Anger',
  'Anger',
  'Sadness',
  'Sadness',
  'Anger',
  'Anger',
  'Anger',
  'Sadness',
  'Anger',
  'Anger',
  'Anger',
  'Sadness',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Joy',
  'Anger',
  'Anger'],
 ['Neutral', 'Joy', 'Joy'],
 ['Sadness',
  'Fear',
  'Sadness',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Joy',
  'Sadness',
  'Joy',
  'Joy',
  'Sadness',
  'Joy',
  'Sadness',
  'Sadness',
  'Surprise'],
 ['Surprise',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Disgust',
  'Joy',
 

In [59]:
import ast

In [60]:
#processed_grounds = [ast.literal_eval(elem) for elem in processed_grounds]

In [61]:
#processed_preds = [ast.literal_eval(elem) for elem in processed_preds]

In [62]:
bad_idx = []

for idx, (i,j) in enumerate(zip(processed_grounds, processed_preds)):
    if len(i) != len(j):
        print(idx, len(i), len(j))
        bad_idx.append(idx)

3 21 20
14 23 13
103 13 12


In [63]:
processed_grounds = [item for i, item in enumerate(processed_grounds) if i not in bad_idx]
processed_preds = [item for i, item in enumerate(processed_preds) if i not in bad_idx]

In [64]:
len(processed_grounds), len(processed_preds)

(153, 153)

In [66]:
processed_preds

[['Surprise',
  'Surprise',
  'Neutral',
  'Neutral',
  'Neutral',
  'Joy',
  'Surprise',
  'Sadness',
  'Neutral',
  'Neutral',
  'Neutral'],
 ['Sadness', 'Sadness', 'Anger', 'Anger', 'Sadness', 'Sadness', 'Sadness'],
 ['Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Joy',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger'],
 ['Neutral', 'Joy', 'Joy'],
 ['Sadness',
  'Fear',
  'Sadness',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Joy',
  'Sadness',
  'Joy',
  'Joy',
  'Sadness',
  'Joy',
  'Sadness',
  'Sadness',
  'Surprise'],
 ['Surprise',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Disgust',
  'Joy',
  'Anger',
  'Anger',
  'Anger',
  'Anger'],
 ['Neutral',
  'Fear',
  'Anger',
  'Neutral',
  'Fear',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Anger'],
 

In [72]:
grounds = [item for sublist in processed_grounds for item in sublist]
predictions = [item for sublist in processed_preds for item in sublist]

In [73]:
len(grounds), len(predictions)

(1269, 1269)

In [74]:
grounds

['Joy',
 'Joy',
 'Surprise',
 'Joy',
 'Joy',
 'Joy',
 'Surprise',
 'Joy',
 'Joy',
 'Neutral',
 'Neutral',
 'Neutral',
 'Neutral',
 'Anger',
 'Anger',
 'Neutral',
 'Sadness',
 'Sadness',
 'Anger',
 'Anger',
 'Anger',
 'Surprise',
 'Surprise',
 'Joy',
 'Surprise',
 'Joy',
 'Joy',
 'Joy',
 'Anger',
 'Anger',
 'Joy',
 'Sadness',
 'Sadness',
 'Surprise',
 'Neutral',
 'Joy',
 'Joy',
 'Neutral',
 'Fear',
 'Neutral',
 'Fear',
 'Joy',
 'Sadness',
 'Fear',
 'Fear',
 'Joy',
 'Neutral',
 'Fear',
 'Joy',
 'Neutral',
 'Joy',
 'Joy',
 'Neutral',
 'Joy',
 'Anger',
 'Anger',
 'Surprise',
 'Anger',
 'Anger',
 'Sadness',
 'Surprise',
 'Anger',
 'Neutral',
 'Joy',
 'Joy',
 'Joy',
 'Surprise',
 'Surprise',
 'Surprise',
 'Joy',
 'Joy',
 'Joy',
 'Neutral',
 'Fear',
 'Anger',
 'Neutral',
 'Fear',
 'Anger',
 'Surprise',
 'Anger',
 'Anger',
 'Anger',
 'Surprise',
 'Anger',
 'Surprise',
 'Anger',
 'Surprise',
 'Neutral',
 'Joy',
 'Joy',
 'Joy',
 'Joy',
 'Surprise',
 'Surprise',
 'Sadness',
 'Sadness',
 'Surprise

In [75]:
predictions

['Surprise',
 'Surprise',
 'Neutral',
 'Neutral',
 'Neutral',
 'Joy',
 'Surprise',
 'Sadness',
 'Neutral',
 'Neutral',
 'Neutral',
 'Sadness',
 'Sadness',
 'Anger',
 'Anger',
 'Sadness',
 'Sadness',
 'Sadness',
 'Anger',
 'Anger',
 'Anger',
 'Surprise',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Joy',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Neutral',
 'Joy',
 'Joy',
 'Sadness',
 'Fear',
 'Sadness',
 'Fear',
 'Joy',
 'Sadness',
 'Fear',
 'Fear',
 'Joy',
 'Sadness',
 'Fear',
 'Joy',
 'Sadness',
 'Joy',
 'Joy',
 'Sadness',
 'Joy',
 'Sadness',
 'Sadness',
 'Surprise',
 'Surprise',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Disgust',
 'Joy',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Neutral',
 'Fear',
 'Anger',
 'Neutral',
 'Fear',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Anger',
 'Surprise',
 'Anger',
 'Sadness',
 'Surprise',
 'Joy',
 'Surprise',
 'Surprise',
 'Surprise',
 'Sadnes

In [76]:
print(classification_report(grounds, predictions, digits=3))

              precision    recall  f1-score   support

       Anger      0.503     0.514     0.509       348
     Disgust      0.500     0.167     0.250        18
        Fear      0.488     0.446     0.466       177
 Frustration      0.000     0.000     0.000         0
        Hope      0.000     0.000     0.000         0
         Joy      0.505     0.413     0.455       230
     Neutral      0.500     0.272     0.352       103
     Sadness      0.430     0.568     0.489       199
  Stupendous      0.000     0.000     0.000         0
    Surprise      0.476     0.567     0.518       194

    accuracy                          0.478      1269
   macro avg      0.340     0.295     0.304      1269
weighted avg      0.485     0.478     0.475      1269



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [42]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [67]:
y_true_mhot = mlb.fit_transform(processed_grounds)
y_pred_mhot = mlb.transform(processed_preds)



In [68]:
y_true_mhot.shape, y_pred_mhot.shape

((153, 7), (153, 7))

In [78]:
y_true_mhot[0]

array([0, 0, 0, 1, 1, 0, 1])

In [79]:
processed_grounds

[['Joy',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Neutral',
  'Neutral'],
 ['Neutral', 'Neutral', 'Anger', 'Anger', 'Neutral', 'Sadness', 'Sadness'],
 ['Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Surprise',
  'Joy',
  'Surprise',
  'Joy',
  'Joy',
  'Joy',
  'Anger',
  'Anger',
  'Joy',
  'Sadness',
  'Sadness',
  'Surprise'],
 ['Neutral', 'Joy', 'Joy'],
 ['Neutral',
  'Fear',
  'Neutral',
  'Fear',
  'Joy',
  'Sadness',
  'Fear',
  'Fear',
  'Joy',
  'Neutral',
  'Fear',
  'Joy',
  'Neutral',
  'Joy',
  'Joy',
  'Neutral',
  'Joy',
  'Anger',
  'Anger',
  'Surprise'],
 ['Anger',
  'Anger',
  'Sadness',
  'Surprise',
  'Anger',
  'Neutral',
  'Joy',
  'Joy',
  'Joy',
  'Surprise',
  'Surprise',
  'Surprise',
  'Joy',
  'Joy',
  'Joy'],
 ['Neutral',
  'Fear',
  'Anger',
  'Neutral',
  'Fear',
  'Anger',
  'Surprise',
  'Anger',
  'Anger',
  'Anger',
  'Surprise',
  'Anger',
  'Surprise',
  'Anger',
  'Surprise',
  'Neutral'],
 ['Joy',
  'J

In [69]:
class_labels = mlb.classes_

In [70]:
class_labels

array(['Anger', 'Disgust', 'Fear', 'Joy', 'Neutral', 'Sadness',
       'Surprise'], dtype=object)

In [71]:
print(classification_report(y_true_mhot, y_pred_mhot, target_names=class_labels, digits=3))

              precision    recall  f1-score   support

       Anger      0.885     0.817     0.850       104
     Disgust      0.600     0.200     0.300        15
        Fear      0.716     0.615     0.662        78
         Joy      0.742     0.622     0.676        74
     Neutral      0.696     0.372     0.485        43
     Sadness      0.606     0.623     0.614        69
    Surprise      0.758     0.862     0.806        87

   micro avg      0.747     0.672     0.708       470
   macro avg      0.715     0.587     0.628       470
weighted avg      0.744     0.672     0.698       470
 samples avg      0.755     0.689     0.685       470



In [48]:
# with open({Path(FT_DIR)} / "classification_report.pickle", 'wb') as fh:
    
#      pickle.dump(classification_report(y_true_mhot, y_pred_mhot, target_names=class_labels, digits=3, output_dict=True), fh)