In [1]:
import os
import ast
import sys
import json
import torch
import pickle
import subprocess

sys.path.append('../')

import pandas as pd

from pathlib import Path
from tqdm import tqdm
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
from sklearn.metrics import classification_report
#from utils.post_processing import post_process


In [2]:
try:    
    assert torch.cuda.is_available() is True
    
except AssertionError:
    
    print("Please set up a GPU before using LLaMA Factory...")

In [3]:
CURRENT_DIR = Path.cwd()
FT_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning"
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning" / "datasets"

ERC_DIR = FT_DIR.parent
LLAMA_FACTORY_DIR = ERC_DIR / "LLaMA-Factory"

BASE_MODEL = "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
LOGGING_DIR = FT_DIR / "training_logs"
OUTPUT_DIR = FT_DIR / "saved_models" / f"""comics35_pg_nb_{BASE_MODEL.split("/")[1]}"""

In [4]:
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning" / "datasets"

In [5]:
train_dataset_name = f"""comics35_utterance_pg_train.json"""
test_dataset_name = f"""comics35_utterance_pg_test.json"""

train_dataset_file = DATASET_DIR / train_dataset_name
test_dataset_file = DATASET_DIR / test_dataset_name

In [6]:
train_dataset_file

PosixPath('/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json')

In [7]:

if not os.path.exists(os.path.join(FT_DIR, "model_args")):
    os.mkdir(os.path.join(FT_DIR, "model_args"))

train_file = FT_DIR / "model_args" / f"""{train_dataset_name.split(".")[0].split("train")[0]}{BASE_MODEL.split("/")[1]}.json"""

In [8]:
dataset_info_line =  {
  "file_name": f"{train_dataset_file}",
  "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output"
  }
}

In [9]:
with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "r") as jsonFile:
    data = json.load(jsonFile)

data["comics"] = dataset_info_line

with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "w") as jsonFile:
    json.dump(data, jsonFile)

In [None]:
NB_EPOCHS = 8

In [11]:
args = dict(
    
  stage="sft",                           # do supervised fine-tuning
  do_train=True,

  model_name_or_path=BASE_MODEL,         # use bnb-4bit-quantized Llama-3-8B-Instruct model
  num_train_epochs=NB_EPOCHS,            # the epochs of training
  output_dir=str(OUTPUT_DIR),                 # the path to save LoRA adapters
  overwrite_output_dir=True,             # overrides existing output contents

  dataset="comics",                      # dataset name
  template="llama3",                     # use llama3 prompt template
  #train_on_prompt=True,
  val_size=0.1,
  max_samples=10000,                       # use 500 examples in each dataset

  finetuning_type="lora",                # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  per_device_train_batch_size=2,         # the batch size
  gradient_accumulation_steps=4,         # the gradient accumulation steps
  lr_scheduler_type="cosine",            # use cosine learning rate scheduler
  loraplus_lr_ratio=16.0,                # use LoRA+ algorithm with lambda=16.0
  #temperature=0.5,
  
  warmup_ratio=0.1,                      # use warmup scheduler    
  learning_rate=5e-5,                    # the learning rate
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  
  fp16=True,                             # use float16 mixed precision training
  quantization_bit=4,                    # use 4-bit QLoRA  
  #use_liger_kernel=True,
  #quantization_device_map="auto",
  
  logging_steps=10,                      # log every 10 steps
  save_steps=5000,                       # save checkpoint every 1000 steps    
  logging_dir=str(LOGGING_DIR),
  
  # use_unsloth=True,
  report_to="tensorboard"                       # discards wandb

)

In [12]:
json.dump(args, open(train_file, "w", encoding="utf-8"), indent=2)

In [13]:
p = subprocess.Popen(["llamafactory-cli", "train", train_file], cwd=LLAMA_FACTORY_DIR)

In [14]:
p.wait()

11/06/2024 12:53:00 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:26629


W1106 12:53:01.905000 139778173302080 torch/distributed/run.py:779] 
W1106 12:53:01.905000 139778173302080 torch/distributed/run.py:779] *****************************************
W1106 12:53:01.905000 139778173302080 torch/distributed/run.py:779] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1106 12:53:01.905000 139778173302080 torch/distributed/run.py:779] *****************************************


11/06/2024 12:53:18 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.float16


[INFO|configuration_utils.py:672] 2024-11-06 12:53:18,981 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 12:53:18,982 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load

11/06/2024 12:53:19 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: torch.float16
11/06/2024 12:53:19 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.float16
11/06/2024 12:53:19 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.float16
11/06/2024 12:53:19 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: torch.float16


[INFO|tokenization_utils_base.py:2478] 2024-11-06 12:53:20,018 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[INFO|configuration_utils.py:672] 2024-11-06 12:53:20,507 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 12:53:20,508 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_ty

11/06/2024 12:53:21 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
11/06/2024 12:53:21 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
11/06/2024 12:53:23 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
11/06/2024 12:53:23 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
11/06/2024 12:53:23 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
11/06/2024 12:53:24 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
11/06/2024 12:53:26 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
11/06/2024 12:53:26 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
11/06/2024 12:53:26 - INFO - llamaf

[INFO|configuration_utils.py:672] 2024-11-06 12:53:26,853 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 12:53:26,854 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load

11/06/2024 12:53:26 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.


[INFO|modeling_utils.py:3726] 2024-11-06 12:53:27,507 >> loading weights file model.safetensors from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/model.safetensors.index.json
[INFO|modeling_utils.py:1622] 2024-11-06 12:53:27,513 >> Instantiating LlamaForCausalLM model under default dtype torch.float16.
[INFO|configuration_utils.py:1099] 2024-11-06 12:53:27,514 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "pad_token_id": 128004
}

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


11/06/2024 12:53:27 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
11/06/2024 12:53:27 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
11/06/2024 12:53:27 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
11/06/2024 12:53:27 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 6/6 [00:40<00:00,  6.74s/it]
Loading checkpoint shards: 100%|██████████| 6/6 [00:40<00:00,  6.80s/it]
Loading checkpoint shards:  83%|████████▎ | 5/6 [00:42<00:08,  8.29s/it]

11/06/2024 12:54:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
11/06/2024 12:54:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 12:54:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
11/06/2024 12:54:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
11/06/2024 12:54:15 - INFO - llamafactory.model.model_utils.misc - Found linear modules: down_proj,q_proj,gate_proj,k_proj,up_proj,v_proj,o_proj
11/06/2024 12:54:15 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
11/06/2024 12:54:15 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 12:54:15 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
11/06/2024 12:54:15 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
11/06/2024 12:54:15 - INFO - ll

Loading checkpoint shards: 100%|██████████| 6/6 [00:49<00:00,  8.28s/it]
[INFO|modeling_utils.py:4568] 2024-11-06 12:54:21,292 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4576] 2024-11-06 12:54:21,292 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
Loading checkpoint shards: 100%|██████████| 6/6 [00:49<00:00,  8.21s/it]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


11/06/2024 12:54:21 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465
11/06/2024 12:54:22 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
[INFO|configuration_utils.py:1054] 2024-11-06 12:54:22,498 >> loading configuration file generation_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/generation_config.json
[INFO|configuration_utils.py:1099] 2024-11-06 12:54:22,499 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "max_length": 131072,
  "pad_token_id": 128004,
  "temperature": 0.6,
  "top_p": 0.9
}



11/06/2024 12:54:23 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.
11/06/2024 12:54:23 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
11/06/2024 12:54:23 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 12:54:23 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
11/06/2024 12:54:23 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
11/06/2024 12:54:23 - INFO - llamafactory.model.model_utils.misc - Found linear modules: k_proj,q_proj,gate_proj,down_proj,up_proj,v_proj,o_proj
11/06/2024 12:54:23 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


Loading checkpoint shards: 100%|██████████| 6/6 [00:51<00:00,  8.65s/it]


11/06/2024 12:54:24 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
11/06/2024 12:54:24 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 12:54:24 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
11/06/2024 12:54:24 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
11/06/2024 12:54:24 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,up_proj,o_proj,v_proj,k_proj,down_proj,gate_proj
11/06/2024 12:54:26 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
11/06/2024 12:54:26 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 12:54:26 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
11/06/2024 12:54:26 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
11/06/2024 12:54:26 - INFO - ll

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


11/06/2024 12:54:31 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
[INFO|trainer.py:667] 2024-11-06 12:54:32,190 >> Using auto half precision backend


11/06/2024 12:54:33 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.
11/06/2024 12:54:33 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465
11/06/2024 12:54:33 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


11/06/2024 12:54:35 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


[INFO|trainer.py:2243] 2024-11-06 12:54:40,812 >> ***** Running training *****
[INFO|trainer.py:2244] 2024-11-06 12:54:40,820 >>   Num examples = 646
[INFO|trainer.py:2245] 2024-11-06 12:54:40,820 >>   Num Epochs = 5
[INFO|trainer.py:2246] 2024-11-06 12:54:40,820 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:2249] 2024-11-06 12:54:40,820 >>   Total train batch size (w. parallel, distributed & accumulation) = 40
[INFO|trainer.py:2250] 2024-11-06 12:54:40,820 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:2251] 2024-11-06 12:54:40,820 >>   Total optimization steps = 80
[INFO|trainer.py:2252] 2024-11-06 12:54:40,847 >>   Number of trainable parameters = 103,546,880
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ct

{'loss': 0.6048, 'grad_norm': 0.3567822277545929, 'learning_rate': 4.375e-05, 'epoch': 0.62}


 25%|██▌       | 20/80 [12:16<37:10, 37.18s/it]

{'loss': 0.3389, 'grad_norm': nan, 'learning_rate': 4.914814565722671e-05, 'epoch': 1.23}


 38%|███▊      | 30/80 [18:21<29:50, 35.81s/it]

{'loss': 0.3856, 'grad_norm': 0.8060951828956604, 'learning_rate': 4.8096988312782174e-05, 'epoch': 1.85}


 50%|█████     | 40/80 [24:25<24:30, 36.76s/it]

{'loss': 0.2911, 'grad_norm': 0.3956297039985657, 'learning_rate': 4.188975519039151e-05, 'epoch': 2.46}


 62%|██████▎   | 50/80 [30:35<19:12, 38.41s/it]

{'loss': 0.2672, 'grad_norm': 0.29665979743003845, 'learning_rate': 3.251764498760683e-05, 'epoch': 3.08}


 75%|███████▌  | 60/80 [36:36<12:02, 36.14s/it]

{'loss': 0.2422, 'grad_norm': 0.3221134841442108, 'learning_rate': 2.173684519449872e-05, 'epoch': 3.69}


 88%|████████▊ | 70/80 [42:44<06:07, 36.72s/it]

{'loss': 0.2304, 'grad_norm': 0.26553136110305786, 'learning_rate': 1.1567509791329401e-05, 'epoch': 4.31}


100%|██████████| 80/80 [48:43<00:00, 35.04s/it][INFO|trainer.py:3705] 2024-11-06 13:43:24,551 >> Saving model checkpoint to /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_nb_Meta-Llama-3.1-70B-Instruct-bnb-4bit/checkpoint-80


{'loss': 0.2107, 'grad_norm': 0.4217824935913086, 'learning_rate': 3.9152138546778625e-06, 'epoch': 4.92}


[INFO|configuration_utils.py:672] 2024-11-06 13:43:25,295 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 13:43:25,295 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit"

{'train_runtime': 2946.1052, 'train_samples_per_second': 1.096, 'train_steps_per_second': 0.027, 'train_loss': 0.3213694006204605, 'epoch': 4.92}


[INFO|configuration_utils.py:672] 2024-11-06 13:43:48,689 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 13:43:48,689 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit"

***** train metrics *****
  epoch                    =      4.9231
  total_flos               = 675036568GF
  train_loss               =      0.3214
  train_runtime            =  0:49:06.10
  train_samples_per_second =       1.096
  train_steps_per_second   =       0.027


[INFO|modelcard.py:449] 2024-11-06 13:43:55,373 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}


0

In [15]:
args = dict(
  model_name_or_path=BASE_MODEL, # use bnb-4bit-quantized Llama-3-8B-Instruct model
  adapter_name_or_path=str(OUTPUT_DIR),            # load the saved LoRA adapters
  template="llama3",                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  quantization_bit=4,                    # load 4-bit quantized model
)

In [16]:
model = ChatModel(args)

[INFO|configuration_utils.py:672] 2024-11-06 13:43:57,599 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 13:43:57,602 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load

11/06/2024 13:43:58 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>


[INFO|configuration_utils.py:672] 2024-11-06 13:43:59,085 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/config.json
[INFO|configuration_utils.py:739] 2024-11-06 13:43:59,086 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load

11/06/2024 13:43:59 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
11/06/2024 13:43:59 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.


[INFO|modeling_utils.py:3726] 2024-11-06 13:43:59,410 >> loading weights file model.safetensors from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/model.safetensors.index.json
[INFO|modeling_utils.py:1622] 2024-11-06 13:43:59,416 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
[INFO|configuration_utils.py:1099] 2024-11-06 13:43:59,417 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "pad_token_id": 128004
}

[INFO|quantizer_bnb_4bit.py:122] 2024-11-06 13:43:59,707 >> target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

[INFO|modeling_utils.py:4568] 2024-11-06 13:44:08,769 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4576] 2024-11-06 13:44:08,770 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:1054] 2024-11-06 13:44:08,896 >> loading configuration file generation_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-Instruct-bnb-4bit/snapshots/3e0db69a642d4c235a9f28f8ebac012efa0d8113/generation_config.json
[INFO|configuration_utils.py:1099] 2024-11-06 13:44:08,897 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "max_length": 131

11/06/2024 13:44:09 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
11/06/2024 13:44:10 - INFO - llamafactory.model.adapter - Loaded adapter(s): /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_nb_Meta-Llama-3.1-70B-Instruct-bnb-4bit
11/06/2024 13:44:11 - INFO - llamafactory.model.loader - all params: 70,657,253,376


In [17]:
model.engine

<llamafactory.chat.hf_engine.HuggingfaceEngine at 0x7fe150fa6d90>

In [18]:
model.engine_type

'huggingface'

In [19]:
with open(test_dataset_file, "r+") as fh:
    test_dataset = json.load(fh)

test_prompts = []
test_grounds = []

for sample in test_dataset:
    test_prompts.append(sample["instruction"] + sample["input"])
    test_grounds.append(sample["output"])

In [20]:
print(test_prompts[0])

### Emotion Analysis Expert Role

You are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.

INPUT:
- You will receive a list of utterances from a page in a comic book
- The utterance may express one or multiple emotions

TASK:
1. Carefully analyze the emotional context and tone of each utterance in the page
2. Identify applicable emotions from the following classes:
   "anger", "disgust", "fear", "sadness", "surprise", "joy", "neutral"
3. For each utterance in a comic page, identify all emotions present and return an array of emotion arrays in order.

RULES:
1. Use ONLY the labels listed above
2. Output must be a JSON with single key "page_utterance_emotions"
3. Value must be an array where:
   - Each element is an array of emotions for one utterance
   - Order matches the input utterances order
   - Multiple emotions are allowed per utterance
4. No explanations, only JSON ou

In [21]:
# test_prompts = test_prompts[:5]

In [22]:
test_predictions = []

In [23]:
messages = []

for prompt in tqdm(test_prompts, desc="Running inferences ..."):
    #print(type(prompt))
    #messages.append({"role": "user", "content": prompt})
    message = [{"role": "user", "content": prompt}]
    test_predictions.append(model.chat(message))

Running inferences ...:   0%|          | 0/156 [00:00<?, ?it/s]



Running inferences ...: 100%|██████████| 156/156 [26:44<00:00, 10.29s/it]


In [24]:
len(test_predictions)

156

In [25]:
test_predictions

[[Response(response_text='{"page_utterance_emotions": [["surprise"], ["joy"], ["surprise", "joy"], ["joy"], ["joy"], ["joy"], ["surprise"], ["joy"], ["joy"], ["neutral"], ["neutral"]]}', response_length=49, prompt_length=381, finish_reason='stop')],
 [Response(response_text='{"page_utterance_emotions": [["joy"], ["joy"], ["anger"], ["anger"], ["joy"], ["anger"], ["anger"]]}', response_length=31, prompt_length=525, finish_reason='stop')],
 [Response(response_text='{"page_utterance_emotions": [["anger"], ["anger"], ["anger"], ["anger", "surprise"], ["surprise"], ["neutral"], ["surprise"], ["anger"], ["joy"], ["joy"], ["anger"], ["anger"], ["anger"], ["anger"], ["anger"], ["surprise"]]}', response_length=65, prompt_length=620, finish_reason='stop')],
 [Response(response_text='{"page_utterance_emotions": [["anger"], ["anger"], ["anger"], ["fear", "sadness"], ["fear", "sadness"], ["neutral"], ["neutral"], ["anger"], ["fear", "sadness"], ["anger", "fear"], ["anger"], ["anger"], ["anger"], ["

In [26]:
test_predictions[0][0].response_text

'{"page_utterance_emotions": [["surprise"], ["joy"], ["surprise", "joy"], ["joy"], ["joy"], ["joy"], ["surprise"], ["joy"], ["joy"], ["neutral"], ["neutral"]]}'

In [27]:
processed_preds = []

for raw_pred in test_predictions:
    x = json.loads(raw_pred[0].response_text)["page_utterance_emotions"]
    processed_preds.append(x)

In [28]:
len(processed_preds)

156

In [29]:
processed_preds

[[['surprise'],
  ['joy'],
  ['surprise', 'joy'],
  ['joy'],
  ['joy'],
  ['joy'],
  ['surprise'],
  ['joy'],
  ['joy'],
  ['neutral'],
  ['neutral']],
 [['joy'], ['joy'], ['anger'], ['anger'], ['joy'], ['anger'], ['anger']],
 [['anger'],
  ['anger'],
  ['anger'],
  ['anger', 'surprise'],
  ['surprise'],
  ['neutral'],
  ['surprise'],
  ['anger'],
  ['joy'],
  ['joy'],
  ['anger'],
  ['anger'],
  ['anger'],
  ['anger'],
  ['anger'],
  ['surprise']],
 [['anger'],
  ['anger'],
  ['anger'],
  ['fear', 'sadness'],
  ['fear', 'sadness'],
  ['neutral'],
  ['neutral'],
  ['anger'],
  ['fear', 'sadness'],
  ['anger', 'fear'],
  ['anger'],
  ['anger'],
  ['anger'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['joy'],
  ['anger'],
  ['anger']],
 [['neutral'], ['joy'], ['joy']],
 [['joy'],
  ['fear', 'surprise'],
  ['sadness'],
  ['fear', 'sadness'],
  ['joy'],
  ['sadness'],
  ['fear', 'sadness'],
  ['fear', 'sadness']

In [30]:
test_grounds

['{"page_utterance_emotions": [["surprise", "joy"], ["joy"], ["surprise", "joy"], ["joy"], ["joy"], ["joy"], ["surprise"], ["joy"], ["joy"], ["neutral"], ["neutral"]]}',
 '{"page_utterance_emotions": [["neutral"], ["neutral"], ["anger", "disgust"], ["anger", "disgust"], ["neutral"], ["sadness"], ["sadness"]]}',
 '{"page_utterance_emotions": [["anger", "sadness"], ["anger", "sadness"], ["anger", "sadness"], ["fear", "surprise"], ["surprise"], ["joy"], ["anger", "surprise"], ["joy"], ["joy"], ["joy"], ["anger"], ["anger"], ["surprise", "joy"], ["fear", "sadness"], ["fear", "sadness"], ["fear", "surprise"]]}',
 '{"page_utterance_emotions": [["anger", "disgust"], ["anger", "disgust"], ["anger", "disgust"], ["fear", "sadness"], ["fear", "sadness", "surprise"], ["sadness"], ["sadness"], ["fear", "sadness"], ["sadness", "surprise"], ["sadness", "surprise"], ["joy"], ["anger"], ["anger"], ["anger"], ["anger", "disgust"], ["joy"], ["joy"], ["surprise", "joy"], ["surprise", "joy"], ["anger", "su

In [31]:
processed_grounds = []

for ground in test_grounds:
    x = json.loads(ground)["page_utterance_emotions"]
    processed_grounds.append(x)

In [32]:
len(processed_grounds)

156

In [33]:
processed_grounds

[[['surprise', 'joy'],
  ['joy'],
  ['surprise', 'joy'],
  ['joy'],
  ['joy'],
  ['joy'],
  ['surprise'],
  ['joy'],
  ['joy'],
  ['neutral'],
  ['neutral']],
 [['neutral'],
  ['neutral'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['neutral'],
  ['sadness'],
  ['sadness']],
 [['anger', 'sadness'],
  ['anger', 'sadness'],
  ['anger', 'sadness'],
  ['fear', 'surprise'],
  ['surprise'],
  ['joy'],
  ['anger', 'surprise'],
  ['joy'],
  ['joy'],
  ['joy'],
  ['anger'],
  ['anger'],
  ['surprise', 'joy'],
  ['fear', 'sadness'],
  ['fear', 'sadness'],
  ['fear', 'surprise']],
 [['anger', 'disgust'],
  ['anger', 'disgust'],
  ['anger', 'disgust'],
  ['fear', 'sadness'],
  ['fear', 'sadness', 'surprise'],
  ['sadness'],
  ['sadness'],
  ['fear', 'sadness'],
  ['sadness', 'surprise'],
  ['sadness', 'surprise'],
  ['joy'],
  ['anger'],
  ['anger'],
  ['anger'],
  ['anger', 'disgust'],
  ['joy'],
  ['joy'],
  ['surprise', 'joy'],
  ['surprise', 'joy'],
  ['anger', 'surprise'],
  ['anger', 

In [34]:
bad_idx = []

for idx, (i,j) in enumerate(zip(processed_grounds, processed_preds)):
    if len(i) != len(j):
        print(idx, len(i), len(j))
        bad_idx.append(idx)

14 23 22


In [35]:
bad_idx.sort(reverse=True)

# Remove elements from 'grounds' at the specified indices
for idx in bad_idx:
    
    del processed_grounds[idx]
    del processed_preds[idx]

In [36]:
len(processed_grounds), len(processed_preds)

(155, 155)

In [37]:
grounds = [item for sublist in processed_grounds for item in sublist]
predictions = [item for sublist in processed_preds for item in sublist]

In [38]:
len(grounds), len(predictions)

(1303, 1303)

In [39]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [40]:
y_true_mhot = mlb.fit_transform(grounds)
y_pred_mhot = mlb.transform(predictions)

In [41]:
y_true_mhot.shape, y_pred_mhot.shape

((1303, 7), (1303, 7))

In [42]:
class_labels = mlb.classes_

In [43]:
class_labels

array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness',
       'surprise'], dtype=object)

In [44]:
print(classification_report(y_true_mhot, y_pred_mhot, target_names=class_labels, digits=3))

              precision    recall  f1-score   support

       anger      0.576     0.606     0.591       437
     disgust      0.224     0.341     0.270        44
        fear      0.534     0.522     0.528       299
         joy      0.638     0.507     0.565       296
     neutral      0.463     0.411     0.436       107
     sadness      0.578     0.536     0.556       332
    surprise      0.672     0.577     0.621       355

   micro avg      0.575     0.542     0.558      1870
   macro avg      0.527     0.500     0.510      1870
weighted avg      0.583     0.542     0.560      1870
 samples avg      0.594     0.565     0.555      1870

