# Finetune PE dataset for ACC

## Libraries

In [1]:
# Run this cell only once to install LLaMA-Factory

# %cd ..
# %rm -rf LLaMA-Factory
# !git clone https://github.com/hiyouga/LLaMA-Factory.git
# %cd LLaMA-Factory
# %ls
# !pip install -e .[torch,bitsandbytes]

In [2]:
# !pip uninstall -y pydantic
# !pip install pydantic==1.10.9 # 

# !pip uninstall -y gradio
# !pip install gradio==3.48.0

# !pip uninstall -y bitsandbytes
# !pip install --upgrade bitsandbytes

# !pip install tqdm
# !pip install ipywidgets
# !pip install scikit-learn

# Restart kernel afterwards.

In [3]:
import os
import ast
import sys
import json
import torch
import pickle
import subprocess

sys.path.append('../')

import pandas as pd

from tqdm.notebook import tqdm
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
from sklearn.metrics import classification_report
# from utils.post_processing import post_process_acc

In [4]:
try:    
    assert torch.cuda.is_available() is True
    
except AssertionError:
    
    print("Please set up a GPU before using LLaMA Factory...")

## Parameters

In [5]:
ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

In [6]:
AM_DIR = os.path.abspath(os.path.join(os.path.join(ROOT_DIR, os.pardir), "am_work"))

In [7]:
DATASET_DIR = os.path.join(ROOT_DIR, "datasets")

In [8]:
LLAMA_FACTORY_DIR = os.path.join(AM_DIR, "coling_2025/LLaMA-Factory")

In [9]:
LLAMA_FACTORY_DIR

'/Utilisateurs/umushtaq/am_work/coling_2025/LLaMA-Factory'

In [10]:
BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"

In [11]:
TASK = "er"

In [12]:
# TAGS = 1
# TAGS = "wtags" if TAGS == 1 else "wotags"

In [13]:
# CONTEXT = "essay" # essay or paragraph

In [14]:
OUTPUT_DIR = os.path.join(ROOT_DIR, "finetuned_models", f"""comics_{TASK}_{BASE_MODEL.split("/")[1]}""")

In [15]:
OUTPUT_DIR

'/Utilisateurs/umushtaq/er_work/finetuned_models/comics_er_llama-3-8b-Instruct-bnb-4bit'

## Load Dataset

In [16]:
# *** TRAIN DATASET NAME *** #

train_dataset_name = f"""comics_utterance_train.json"""
test_dataset_name = f"""comics_utterance_test.json"""

#train_dataset_name = f"""PE_{TASK}_{CONTEXT}_train.json"""
train_dataset_file = os.path.join(DATASET_DIR, train_dataset_name)

# *** TEST DATASET NAME *** #

#test_dataset_name = f"""PE_{TASK}_{CONTEXT}_test.json"""
test_dataset_file = os.path.join(DATASET_DIR, test_dataset_name)

In [17]:
train_dataset_file, test_dataset_file

('/Utilisateurs/umushtaq/er_work/datasets/comics_train.json',
 '/Utilisateurs/umushtaq/er_work/datasets/comics_test.json')

## Fine-tune Model

In [18]:
if not os.path.exists(os.path.join(ROOT_DIR, "ft_arg_files")):
    os.mkdir(os.path.join(ROOT_DIR, "ft_arg_files"))

In [19]:
# *** TRAIN FILE ***

# model_name = f"""{train_dataset_name.split(".")[0].split("train")[0]}{BASE_MODEL.split("/")[1]}"""

train_file = os.path.join(ROOT_DIR, "ft_arg_files", f"""{train_dataset_name.split(".")[0].split("train")[0]}{BASE_MODEL.split("/")[1]}.json""")

In [20]:
dataset_info_line =  {
  "file_name": f"{train_dataset_file}",
  "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output"
  }
}

In [21]:
dataset_info_line

{'file_name': '/Utilisateurs/umushtaq/er_work/datasets/comics_train.json',
 'columns': {'prompt': 'instruction', 'query': 'input', 'response': 'output'}}

In [22]:
with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "r") as jsonFile:
    data = json.load(jsonFile)

data["comics_er"] = dataset_info_line

with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "w") as jsonFile:
    json.dump(data, jsonFile)

### Training Args

In [23]:
NB_EPOCHS = 50

In [24]:
args = dict(
  stage="sft",                           # do supervised fine-tuning
  do_train=True,
  model_name_or_path=BASE_MODEL,         # use bnb-4bit-quantized Llama-3-8B-Instruct model
  dataset="comics_er",           # use alpaca and identity datasets
  template="llama3",                     # use llama3 prompt template
  finetuning_type="lora",                # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  output_dir=OUTPUT_DIR,                 # the path to save LoRA adapters
  overwrite_output_dir=True,             # overrides existing output contents
  per_device_train_batch_size=2,         # the batch size
  gradient_accumulation_steps=4,         # the gradient accumulation steps
  lr_scheduler_type="cosine",            # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=3000,                       # save checkpoint every 1000 steps
  learning_rate=5e-5,                    # the learning rate
  num_train_epochs=NB_EPOCHS,            # the epochs of training
  max_samples=2000,                       # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  quantization_bit=4,                    # use 4-bit QLoRA
  loraplus_lr_ratio=16.0,                # use LoRA+ algorithm with lambda=16.0
  fp16=True,                             # use float16 mixed precision training
  report_to="none"                       # discards wandb
)

In [25]:
json.dump(args, open(train_file, "w", encoding="utf-8"), indent=2)

In [26]:
train_file

'/Utilisateurs/umushtaq/er_work/ft_arg_files/comics_llama-3-8b-Instruct-bnb-4bit.json'

In [27]:
p = subprocess.Popen(["llamafactory-cli", "train", train_file], cwd=LLAMA_FACTORY_DIR)

In [28]:
p.wait()

09/17/2024 13:54:16 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:29516


W0917 13:54:17.702000 140510096708928 torch/distributed/run.py:757] 
W0917 13:54:17.702000 140510096708928 torch/distributed/run.py:757] *****************************************
W0917 13:54:17.702000 140510096708928 torch/distributed/run.py:757] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W0917 13:54:17.702000 140510096708928 torch/distributed/run.py:757] *****************************************


09/17/2024 13:54:26 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.float16
09/17/2024 13:54:26 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.float16


[INFO|tokenization_utils_base.py:2269] 2024-09-17 13:54:26,960 >> loading file tokenizer.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/tokenizer.json
[INFO|tokenization_utils_base.py:2269] 2024-09-17 13:54:26,960 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2269] 2024-09-17 13:54:26,961 >> loading file special_tokens_map.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/special_tokens_map.json
[INFO|tokenization_utils_base.py:2269] 2024-09-17 13:54:26,961 >> loading file tokenizer_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/tokenizer_config.json
[INFO|tokenization_utils_base.py:2513] 2024-09-17 13:5

09/17/2024 13:54:27 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
09/17/2024 13:54:27 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/er_work/datasets/comics_train.json...
09/17/2024 13:54:27 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
09/17/2024 13:54:28 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/er_work/datasets/comics_train.json...
training example:
input_ids:
[128000, 128006, 882, 128007, 271, 14711, 1472, 527, 459, 6335, 304, 5867, 6082, 18825, 13, 1472, 527, 2728, 279, 36815, 315, 264, 20303, 2363, 902, 5727, 49926, 3752, 22256, 3095, 44910, 555, 366, 1406, 1500, 1406, 29, 9681, 13, 4718, 3465, 374, 311, 49229, 1855, 22256, 685, 304, 279, 20303, 36815, 439, 389, 279, 2768, 20356, 6989, 25, 330, 10976, 261, 1, 320, 1111, 705, 330, 4944, 70, 592, 1, 320, 18091, 705, 330, 91471, 1, 320, 11673, 705, 330, 60765, 2136, 1, 320, 7934, 705, 330, 23912, 9868, 1, 320, 60882, 8, 477, 330,

[INFO|configuration_utils.py:733] 2024-09-17 13:54:28,879 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/config.json
[INFO|configuration_utils.py:800] 2024-09-17 13:54:28,880 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtyp

09/17/2024 13:54:28 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
09/17/2024 13:54:28 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.


[INFO|modeling_utils.py:4507] 2024-09-17 13:54:34,275 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4515] 2024-09-17 13:54:34,276 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at unsloth/llama-3-8b-Instruct-bnb-4bit.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:993] 2024-09-17 13:54:34,407 >> loading configuration file generation_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/generation_config.json
[INFO|configuration_utils.py:1038] 2024-09-17 13:54:34,407 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": [
    128001,
    128009
  ],
  "max_length": 8192,
  "pad_token_id": 128255,


09/17/2024 13:54:34 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
09/17/2024 13:54:34 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
09/17/2024 13:54:34 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
09/17/2024 13:54:34 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
09/17/2024 13:54:34 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,v_proj,gate_proj,o_proj,down_proj,up_proj,k_proj
09/17/2024 13:54:34 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
09/17/2024 13:54:34 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
09/17/2024 13:54:34 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
09/17/2024 13:54:34 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
09/17/2024 13:54:34 - INFO - ll

[INFO|trainer.py:648] 2024-09-17 13:54:35,271 >> Using auto half precision backend


09/17/2024 13:54:35 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.
09/17/2024 13:54:35 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


[INFO|trainer.py:2134] 2024-09-17 13:54:35,802 >> ***** Running training *****
[INFO|trainer.py:2135] 2024-09-17 13:54:35,802 >>   Num examples = 22
[INFO|trainer.py:2136] 2024-09-17 13:54:35,802 >>   Num Epochs = 50
[INFO|trainer.py:2137] 2024-09-17 13:54:35,802 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:2140] 2024-09-17 13:54:35,802 >>   Total train batch size (w. parallel, distributed & accumulation) = 16
[INFO|trainer.py:2141] 2024-09-17 13:54:35,802 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:2142] 2024-09-17 13:54:35,802 >>   Total optimization steps = 50
[INFO|trainer.py:2143] 2024-09-17 13:54:35,806 >>   Number of trainable parameters = 20,971,520
 20%|██        | 10/50 [01:09<04:36,  6.91s/it]

{'loss': 0.693, 'grad_norm': 0.5575774908065796, 'learning_rate': 4.849231551964771e-05, 'epoch': 6.67}


 40%|████      | 20/50 [02:18<03:28,  6.96s/it]

{'loss': 0.3992, 'grad_norm': 1.3071566820144653, 'learning_rate': 3.7500000000000003e-05, 'epoch': 13.33}


 60%|██████    | 30/50 [03:28<02:19,  6.96s/it]

{'loss': 0.1079, 'grad_norm': 0.6554983258247375, 'learning_rate': 2.0658795558326743e-05, 'epoch': 20.0}


 80%|████████  | 40/50 [04:38<01:09,  6.96s/it]

{'loss': 0.01, 'grad_norm': 0.40512004494667053, 'learning_rate': 7.016504991533726e-06, 'epoch': 26.67}


100%|██████████| 50/50 [05:47<00:00,  6.95s/it][INFO|trainer.py:3503] 2024-09-17 14:00:23,643 >> Saving model checkpoint to /Utilisateurs/umushtaq/er_work/finetuned_models/comics_er_llama-3-8b-Instruct-bnb-4bit/checkpoint-50


{'loss': 0.0014, 'grad_norm': 0.023578638210892677, 'learning_rate': 6.089874350439506e-08, 'epoch': 33.33}


[INFO|configuration_utils.py:733] 2024-09-17 14:00:24,008 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/config.json
[INFO|configuration_utils.py:800] 2024-09-17 14:00:24,009 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bflo

{'train_runtime': 351.5204, 'train_samples_per_second': 3.129, 'train_steps_per_second': 0.142, 'train_loss': 0.24229629039764405, 'epoch': 33.33}


[INFO|configuration_utils.py:733] 2024-09-17 14:00:27,603 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/config.json
[INFO|configuration_utils.py:800] 2024-09-17 14:00:27,604 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bflo

***** train metrics *****
  epoch                    =    33.3333
  total_flos               = 34450820GF
  train_loss               =     0.2423
  train_runtime            = 0:05:51.52
  train_samples_per_second =      3.129
  train_steps_per_second   =      0.142


[INFO|modelcard.py:449] 2024-09-17 14:00:28,950 >> Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}


0

## Inference on the fine-tuned model

In [29]:
OUTPUT_DIR

'/Utilisateurs/umushtaq/er_work/finetuned_models/comics_er_llama-3-8b-Instruct-bnb-4bit'

In [30]:
os.listdir(OUTPUT_DIR)

['checkpoint-1',
 'README.md',
 'adapter_model.safetensors',
 'adapter_config.json',
 'tokenizer_config.json',
 'special_tokens_map.json',
 'tokenizer.json',
 'training_args.bin',
 'train_results.json',
 'all_results.json',
 'trainer_state.json',
 'comics_er_results_0.2.pickle',
 'checkpoint-5',
 'comics_er_results_5.pickle',
 'checkpoint-20',
 'comics_er_results_20.pickle',
 'trainer_log.jsonl',
 'checkpoint-50']

In [31]:
args = dict(
  model_name_or_path=BASE_MODEL, # use bnb-4bit-quantized Llama-3-8B-Instruct model
  adapter_name_or_path=OUTPUT_DIR,            # load the saved LoRA adapters
  template="llama3",                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  quantization_bit=4,                    # load 4-bit quantized model
)


In [32]:
model = ChatModel(args)

[INFO|tokenization_utils_base.py:2269] 2024-09-17 14:00:33,952 >> loading file tokenizer.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/tokenizer.json
[INFO|tokenization_utils_base.py:2269] 2024-09-17 14:00:33,954 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2269] 2024-09-17 14:00:33,956 >> loading file special_tokens_map.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/special_tokens_map.json
[INFO|tokenization_utils_base.py:2269] 2024-09-17 14:00:33,958 >> loading file tokenizer_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/tokenizer_config.json
[INFO|tokenization_utils_base.py:2513] 2024-09-17 14:0

09/17/2024 14:00:34 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>


[INFO|configuration_utils.py:733] 2024-09-17 14:00:34,316 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/config.json
[INFO|configuration_utils.py:800] 2024-09-17 14:00:34,318 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 128255,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtyp

09/17/2024 14:00:34 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
09/17/2024 14:00:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.


[INFO|modeling_utils.py:3678] 2024-09-17 14:00:34,420 >> loading weights file model.safetensors from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/model.safetensors
[INFO|modeling_utils.py:1606] 2024-09-17 14:00:34,602 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
[INFO|configuration_utils.py:1038] 2024-09-17 14:00:34,607 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "pad_token_id": 128255
}

[INFO|quantizer_bnb_4bit.py:106] 2024-09-17 14:00:34,869 >> target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization
[INFO|modeling_utils.py:4507] 2024-09-17 14:00:37,103 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4515] 2024-09-17 14:00:37,105 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at u

09/17/2024 14:00:37 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
09/17/2024 14:00:37 - INFO - llamafactory.model.adapter - Loaded adapter(s): /Utilisateurs/umushtaq/er_work/finetuned_models/comics_er_llama-3-8b-Instruct-bnb-4bit
09/17/2024 14:00:37 - INFO - llamafactory.model.loader - all params: 8,051,232,768




In [33]:
with open(test_dataset_file, "r+") as fh:
    test_dataset = json.load(fh)

In [34]:
test_prompts = []
test_grounds = []

for sample in test_dataset:
    test_prompts.append("\nUser:" + sample["instruction"] + sample["input"])
    test_grounds.append(sample["output"])

In [35]:
test_predictions = []

for prompt in tqdm(test_prompts):

    messages = []
    messages.append({"role": "user", "content": prompt})

    response = ""
    
    for new_text in model.stream_chat(messages):
        #print(new_text, end="", flush=True)
        response += new_text
        #print()
    test_predictions.append({"role": "assistant", "content": response})

    torch_gc()

  0%|          | 0/10 [00:00<?, ?it/s]

In [36]:
with open(os.path.join(OUTPUT_DIR, f"""comics_{TASK}_results_{NB_EPOCHS}.pickle"""), 'wb') as fh:
    results_d = {"ground_truths": test_grounds,
                 "predictions": test_predictions    
        
    }
    pickle.dump(results_d, fh)

In [37]:
os.listdir(OUTPUT_DIR)

['checkpoint-1',
 'README.md',
 'adapter_model.safetensors',
 'adapter_config.json',
 'tokenizer_config.json',
 'special_tokens_map.json',
 'tokenizer.json',
 'training_args.bin',
 'train_results.json',
 'all_results.json',
 'trainer_state.json',
 'comics_er_results_0.2.pickle',
 'checkpoint-5',
 'comics_er_results_5.pickle',
 'checkpoint-20',
 'comics_er_results_20.pickle',
 'trainer_log.jsonl',
 'checkpoint-50',
 'comics_er_results_50.pickle']

## Post-processing

In [38]:
with open(os.path.join(OUTPUT_DIR, f"""comics_{TASK}_results_{NB_EPOCHS}.pickle"""), "rb") as fh:
        
        results = pickle.load(fh)

In [39]:
grounds = results["ground_truths"]
preds = results["predictions"]

grounds = [json.loads(x)["list_emotion_classes"] for x in grounds]  

# preds = [x["content"] for x in preds]    
# preds = [json.loads(x)["emotion_classes"] for x in preds]

In [40]:
len(grounds), len(preds)

(10, 10)

In [41]:
len(grounds[0])

132

In [42]:
preds = [x["content"] for x in preds]    

In [43]:
len(preds)

10

In [44]:
preds

['{"list_emotion_classes": [["AN", "SA"], ["AN", "SA"], ["AN"], ["AN", "FE"], ["AN"], ["FE", "SU"], ["AN"], ["FE", "SU"], ["AN", "SU"], ["AN", "FE"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["AN"], ["SU"], ["SU"], ["SU"], ["AN", "SU"], ["AN", "SU"], ["AN"], ["AN"], ["AN"], ["AN"], ["FE"], ["FE", "SA"], ["FE", "SA"], ["AN", "FE"], ["AN", "FE"], ["AN"], ["AN"], ["FE"], ["FE"], ["SU"], ["AN", "SU"], ["AN", "SU"], ["AN"], ["AN"], ["FE", "SU"], ["FE", "SU"], ["AN", "FE", "SU"], ["AN", "FE"], ["SA"], ["AN", "SA"], ["AN", "FE", "SA"], ["AN"], ["AN", "FE", "SU"], ["FE", "SA"], ["FE", "SU"], ["AN", "FE", "SU"], ["AN", "SA"], ["AN", "FE", "SA"], ["AN"], ["AN"], ["SU"], ["AN", "SU"], ["AN", "SU"], ["SU"], ["SU"], ["AN", "SU"], ["AN", "SU"], ["AN"], ["FE"], ["FE"], ["SU"], ["AN", "FE"], ["AN", "SU"], ["AN", "FE", "SU"], ["SA"], ["FE", "SA"], ["AN", "SA"], ["AN", "FE", "SA"], ["JO"], ["AN", "JO"], ["FE", "SA", "JO"], ["SU"], ["AN", "SU"], ["FE", "SU", "JO"], 

In [91]:
# preds = [json.loads(x)["emotion_class"] for x in preds]

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [38]:
task_grounds, task_preds = post_process_acc(results)

In [42]:
# sanity check: 
len(task_preds) == len(task_grounds)

True

## Results

In [43]:
print(classification_report(task_grounds, task_preds, digits=3))

              precision    recall  f1-score   support

       Claim      0.231     0.032     0.056       283
  MajorClaim      0.724     0.136     0.230       154
     Premise      0.661     0.997     0.795       724

    accuracy                          0.648      1161
   macro avg      0.538     0.388     0.360      1161
weighted avg      0.564     0.648     0.540      1161



In [44]:
with open(f"""{OUTPUT_DIR}/classification_report.pickle""", 'wb') as fh:
    
    pickle.dump(classification_report(task_grounds, task_preds, output_dict=True), fh)