In [1]:
import os
import ast
import sys
import json
import torch
import pickle
import subprocess

sys.path.append('../')

import pandas as pd

from pathlib import Path
from tqdm import tqdm
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
from sklearn.metrics import classification_report
#from utils.post_processing import post_process


In [2]:
try:    
    assert torch.cuda.is_available() is True
    
except AssertionError:
    
    print("Please set up a GPU before using LLaMA Factory...")

In [3]:
CURRENT_DIR = Path.cwd()
FT_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning"
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning" / "datasets"

ERC_DIR = FT_DIR.parent
LLAMA_FACTORY_DIR = ERC_DIR / "LLaMA-Factory"

BASE_MODEL = "unsloth/Meta-Llama-3.1-70B-bnb-4bit"
LOGGING_DIR = FT_DIR / "training_logs"
OUTPUT_DIR = FT_DIR / "saved_models" / f"""comics35_pg_{BASE_MODEL.split("/")[1]}"""

In [4]:
DATASET_DIR = CURRENT_DIR / "emotion_analysis_comics" / "finetuning" / "datasets"

In [5]:
train_dataset_name = f"""comics35_utterance_pg_train.json"""
test_dataset_name = f"""comics35_utterance_pg_test.json"""

train_dataset_file = DATASET_DIR / train_dataset_name
test_dataset_file = DATASET_DIR / test_dataset_name

In [6]:
train_dataset_file

PosixPath('/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json')

In [7]:

if not os.path.exists(os.path.join(FT_DIR, "model_args")):
    os.mkdir(os.path.join(FT_DIR, "model_args"))

train_file = FT_DIR / "model_args" / f"""{train_dataset_name.split(".")[0].split("train")[0]}{BASE_MODEL.split("/")[1]}.json"""

In [8]:
dataset_info_line =  {
  "file_name": f"{train_dataset_file}",
  "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output"
  }
}

In [9]:
with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "r") as jsonFile:
    data = json.load(jsonFile)

data["comics"] = dataset_info_line

with open(os.path.join(LLAMA_FACTORY_DIR, "data/dataset_info.json"), "w") as jsonFile:
    json.dump(data, jsonFile)

In [10]:
NB_EPOCHS = 0.1

In [11]:
args = dict(
    
  stage="sft",                           # do supervised fine-tuning
  do_train=True,

  model_name_or_path=BASE_MODEL,         # use bnb-4bit-quantized Llama-3-8B-Instruct model
  num_train_epochs=NB_EPOCHS,            # the epochs of training
  output_dir=str(OUTPUT_DIR),                 # the path to save LoRA adapters
  overwrite_output_dir=True,             # overrides existing output contents

  dataset="comics",                      # dataset name
  template="llama3",                     # use llama3 prompt template
  #train_on_prompt=True,
  val_size=0.1,
  max_samples=10000,                       # use 500 examples in each dataset

  finetuning_type="lora",                # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  per_device_train_batch_size=2,         # the batch size
  gradient_accumulation_steps=4,         # the gradient accumulation steps
  lr_scheduler_type="cosine",            # use cosine learning rate scheduler
  loraplus_lr_ratio=16.0,                # use LoRA+ algorithm with lambda=16.0
  #temperature=0.5,
  
  warmup_ratio=0.1,                      # use warmup scheduler    
  learning_rate=5e-5,                    # the learning rate
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  
  fp16=True,                             # use float16 mixed precision training
  quantization_bit=4,                    # use 4-bit QLoRA  
  #use_liger_kernel=True,
  #quantization_device_map="auto",
  
  logging_steps=10,                      # log every 10 steps
  save_steps=5000,                       # save checkpoint every 1000 steps    
  logging_dir=str(LOGGING_DIR),
  
  # use_unsloth=True,
  report_to="tensorboard"                       # discards wandb

)

In [12]:
json.dump(args, open(train_file, "w", encoding="utf-8"), indent=2)

In [13]:
p = subprocess.Popen(["llamafactory-cli", "train", train_file], cwd=LLAMA_FACTORY_DIR)

In [14]:
p.wait()

10/31/2024 10:56:31 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:26860


W1031 10:56:32.611000 140462288172352 torch/distributed/run.py:779] 
W1031 10:56:32.611000 140462288172352 torch/distributed/run.py:779] *****************************************
W1031 10:56:32.611000 140462288172352 torch/distributed/run.py:779] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W1031 10:56:32.611000 140462288172352 torch/distributed/run.py:779] *****************************************


10/31/2024 10:56:42 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.float16
10/31/2024 10:56:42 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.float16
10/31/2024 10:56:42 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.float16


[INFO|configuration_utils.py:733] 2024-10-31 10:56:42,930 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-bnb-4bit/snapshots/7edacd248efbb480633dca1bb4fe8b290072b6be/config.json
[INFO|configuration_utils.py:800] 2024-10-31 10:56:42,931 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtyp

10/31/2024 10:56:44 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
10/31/2024 10:56:44 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
10/31/2024 10:56:45 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
10/31/2024 10:56:45 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
10/31/2024 10:56:46 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
10/31/2024 10:56:46 - INFO - llamafactory.data.loader - Loading dataset /Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/datasets/comics35_utterance_pg_train.json...
training example:
input_ids:
[128000, 128006, 882, 128007, 271, 14711, 5867, 6082, 18825, 33257, 15766, 271, 2675, 527, 459, 11084, 20356, 6492, 6335, 58394, 304, 20303, 2363, 21976, 23692, 13, 4718, 3465, 374, 

[INFO|configuration_utils.py:733] 2024-10-31 10:56:46,726 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-bnb-4bit/snapshots/7edacd248efbb480633dca1bb4fe8b290072b6be/config.json
[INFO|configuration_utils.py:800] 2024-10-31 10:56:46,726 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtyp

10/31/2024 10:56:46 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
10/31/2024 10:56:46 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.
10/31/2024 10:56:46 - INFO - llamafactory.model.model_utils.quantization - Loading ?-bit BITSANDBYTES-quantized model.


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 6/6 [00:25<00:00,  4.24s/it]


10/31/2024 10:57:16 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
10/31/2024 10:57:16 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
10/31/2024 10:57:16 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
10/31/2024 10:57:16 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
10/31/2024 10:57:16 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,q_proj,k_proj,down_proj,gate_proj,o_proj,up_proj


Loading checkpoint shards: 100%|██████████| 6/6 [00:29<00:00,  4.96s/it]
[INFO|modeling_utils.py:4507] 2024-10-31 10:57:19,408 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4515] 2024-10-31 10:57:19,408 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at unsloth/Meta-Llama-3.1-70B-bnb-4bit.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:993] 2024-10-31 10:57:19,538 >> loading configuration file generation_config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-bnb-4bit/snapshots/7edacd248efbb480633dca1bb4fe8b290072b6be/generation_config.json
[INFO|configuration_utils.py:1038] 2024-10-31 10:57:19,538 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": 128001,

10/31/2024 10:57:20 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465
10/31/2024 10:57:20 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
10/31/2024 10:57:20 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
10/31/2024 10:57:20 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
10/31/2024 10:57:20 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
10/31/2024 10:57:20 - INFO - llamafactory.model.model_utils.misc - Found linear modules: k_proj,v_proj,gate_proj,down_proj,up_proj,o_proj,q_proj


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


10/31/2024 10:57:21 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.
10/31/2024 10:57:21 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
10/31/2024 10:57:21 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
10/31/2024 10:57:21 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
10/31/2024 10:57:21 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,k_proj,v_proj,q_proj,o_proj,down_proj,up_proj
10/31/2024 10:57:21 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.
10/31/2024 10:57:24 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
[INFO|trainer.py:648] 2024-10-31 10:57:24,907 >> Using auto half precision backend


10/31/2024 10:57:25 - INFO - llamafactory.model.loader - trainable params: 103,546,880 || all params: 70,657,253,376 || trainable%: 0.1465
10/31/2024 10:57:25 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


10/31/2024 10:57:26 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.


[INFO|trainer.py:2134] 2024-10-31 10:57:31,117 >> ***** Running training *****
[INFO|trainer.py:2135] 2024-10-31 10:57:31,118 >>   Num examples = 646
[INFO|trainer.py:2136] 2024-10-31 10:57:31,118 >>   Num Epochs = 1
[INFO|trainer.py:2137] 2024-10-31 10:57:31,118 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:2140] 2024-10-31 10:57:31,118 >>   Total train batch size (w. parallel, distributed & accumulation) = 24
[INFO|trainer.py:2141] 2024-10-31 10:57:31,118 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:2142] 2024-10-31 10:57:31,118 >>   Total optimization steps = 3
[INFO|trainer.py:2143] 2024-10-31 10:57:31,148 >>   Number of trainable parameters = 103,546,880
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  with torch.enable_grad(), device_autocast_ctx

{'train_runtime': 127.2421, 'train_samples_per_second': 0.508, 'train_steps_per_second': 0.024, 'train_loss': 1.1624692281087239, 'epoch': 0.11}


[INFO|configuration_utils.py:733] 2024-10-31 10:59:38,843 >> loading configuration file config.json from cache at /Utilisateurs/umushtaq/.cache/huggingface/hub/models--unsloth--Meta-Llama-3.1-70B-bnb-4bit/snapshots/7edacd248efbb480633dca1bb4fe8b290072b6be/config.json
[INFO|configuration_utils.py:800] 2024-10-31 10:59:38,844 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/Meta-Llama-3.1-70B",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 8192,
  "initializer_range": 0.02,
  "intermediate_size": 28672,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 64,
  "num_hidden_layers": 80,
  "num_key_value_heads": 8,
  "pad_token_id": 128004,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bflo

***** train metrics *****
  epoch                    =     0.1111
  total_flos               = 15384030GF
  train_loss               =     1.1625
  train_runtime            = 0:02:07.24
  train_samples_per_second =      0.508
  train_steps_per_second   =      0.024


0

In [15]:
from transformers import LlamaForCausalLM, LlamaTokenizer

model = LlamaForCausalLM.from_pretrained(BASE_MODEL)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [16]:
from transformers import PreTrainedTokenizerFast

tokenizer = PreTrainedTokenizerFast.from_pretrained(OUTPUT_DIR, padding_side="left")

In [17]:
tokenizer.pad_token = tokenizer.eos_token
terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

In [18]:
from peft import LoraModel, LoraConfig

In [19]:
lora_config = LoraConfig.from_pretrained(str(OUTPUT_DIR))

In [20]:
lora_config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='unsloth/Meta-Llama-3.1-70B-bnb-4bit', revision=None, task_type='CAUSAL_LM', inference_mode=True, r=8, target_modules={'down_proj', 'v_proj', 'q_proj', 'o_proj', 'up_proj', 'gate_proj', 'k_proj'}, lora_alpha=16, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False))

In [21]:

lora_model = LoraModel(model, lora_config, "/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_Meta-Llama-31-70B-bnb-4bit")


In [22]:
device_count = torch.cuda.device_count()
if device_count > 1:
    lora_model = torch.nn.DataParallel(lora_model, device_ids=list(range(device_count)))
lora_model.to("cuda")

DataParallel(
  (module): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 8192, padding_idx=128004)
        (layers): ModuleList(
          (0-79): 80 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=8192, out_features=8192, bias=False)
                (lora_dropout): ModuleDict(
                  (/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_Meta-Llama-31-70B-bnb-4bit): Identity()
                )
                (lora_A): ModuleDict(
                  (/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_Meta-Llama-31-70B-bnb-4bit): Linear(in_features=8192, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (/Utilisateurs/umushtaq/emotion_analysis_comics/finetuning/saved_models/comics35_pg_Meta-

In [23]:
with open(test_dataset_file, "r+") as fh:
    test_dataset = json.load(fh)

test_prompts = []
test_grounds = []

for sample in test_dataset:
    #print(sample)
    #test_prompts.append(sample["instruction"] + sample["input"])
    #test_prompts.append("user:" + sample["instruction"] + sample["input"])
    test_prompts.append([{"role": "user", "content": sample["instruction"] + sample["input"]}])
    test_grounds.append(sample["output"])

In [24]:
len(test_prompts)

156

In [25]:
test_prompts = test_prompts[:1]

In [26]:
test_prompts

[[{'role': 'user',
   'content': '### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.\n\nINPUT:\n- You will receive a list of utterances from a page in a comic book\n- The utterance may express one or multiple emotions\n\nTASK:\n1. Carefully analyze the emotional context and tone of each utterance in the page\n2. Identify applicable emotions from the following classes:\n   "anger", "disgust", "fear", "sadness", "surprise", "joy", "neutral"\n3. For each utterance in a comic page, identify all emotions present and return an array of emotion arrays in order.\n\nRULES:\n1. Use ONLY the labels listed above\n2. Output must be a JSON with single key "page_utterance_emotions"\n3. Value must be an array where:\n   - Each element is an array of emotions for one utterance\n   - Order matches the input utterances order\n   - Multiple emotions are 

In [27]:
inputs = tokenizer.apply_chat_template(
            test_prompts,
            padding=True,
            truncation=True,
            add_generation_prompt=True,
            return_dict=True,
            return_tensors="pt",
)

In [28]:
inputs

{'input_ids': tensor([[128000, 128006,    882, 128007,    271,  14711,   5867,   6082,  18825,
          33257,  15766,    271,   2675,    527,    459,  11084,  20356,   6492,
           6335,  58394,    304,  20303,   2363,  21976,  23692,     13,   4718,
           3465,    374,    311,  24564,  22256,   3095,    323,  10765,    872,
          14604,   2262,    382,  30521,    512,     12,   1472,    690,   5371,
            264,   1160,    315,  22256,   3095,    505,    264,   2199,    304,
            264,  20303,   2363,    198,     12,    578,  22256,    685,   1253,
           3237,    832,    477,   5361,  21958,    271,  66913,    512,     16,
             13,  10852,   3725,  24564,    279,  14604,   2317,    323,  16630,
            315,   1855,  22256,    685,    304,    279,   2199,    198,     17,
             13,  65647,   8581,  21958,    505,    279,   2768,   6989,    512,
            256,    330,   4091,    498,    330,   4338,     70,    592,    498,
            33

In [29]:
inputs.input_ids[0].shape

torch.Size([381])

In [30]:
tokenizer.decode(inputs.input_ids[0])

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.\n\nINPUT:\n- You will receive a list of utterances from a page in a comic book\n- The utterance may express one or multiple emotions\n\nTASK:\n1. Carefully analyze the emotional context and tone of each utterance in the page\n2. Identify applicable emotions from the following classes:\n   "anger", "disgust", "fear", "sadness", "surprise", "joy", "neutral"\n3. For each utterance in a comic page, identify all emotions present and return an array of emotion arrays in order.\n\nRULES:\n1. Use ONLY the labels listed above\n2. Output must be a JSON with single key "page_utterance_emotions"\n3. Value must be an array where:\n   - Each element is an array of emotions for one utterance\n   - Order matches the input utterances order\

In [31]:
# generated = generation_model.generate(**inputs, max_new_tokens=32, pad_token_id=inference_tokenizer.eos_token_id, do_sample=True,
#      temperature=0.1,
#      top_p=0.9,)

In [47]:
responses = []

def generate_text(prompt, max_new_tokens=512, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=1):
    input_ids = inputs.input_ids.to("cuda")
    output_ids = lora_model.module.generate(input_ids, max_new_tokens=32, pad_token_id=tokenizer.eos_token_id, do_sample=True, eos_token_id=terminators,
     temperature=0.1,
     top_p=0.9,)
    #responses.append(tokenizer.decode(output_ids[0], skip_special_tokens=True))
    responses.append(output_ids)

In [48]:
#prompt = "The quick brown fox"
#generated_text = generate_text(prompt)

In [49]:
for prompt in test_prompts:
    
    generate_text(prompt)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [50]:
len(responses)

1

In [51]:
test_prompts

[[{'role': 'user',
   'content': '### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.\n\nINPUT:\n- You will receive a list of utterances from a page in a comic book\n- The utterance may express one or multiple emotions\n\nTASK:\n1. Carefully analyze the emotional context and tone of each utterance in the page\n2. Identify applicable emotions from the following classes:\n   "anger", "disgust", "fear", "sadness", "surprise", "joy", "neutral"\n3. For each utterance in a comic page, identify all emotions present and return an array of emotion arrays in order.\n\nRULES:\n1. Use ONLY the labels listed above\n2. Output must be a JSON with single key "page_utterance_emotions"\n3. Value must be an array where:\n   - Each element is an array of emotions for one utterance\n   - Order matches the input utterances order\n   - Multiple emotions are 

In [52]:
responses

[tensor([[128000, 128006,    882, 128007,    271,  14711,   5867,   6082,  18825,
           33257,  15766,    271,   2675,    527,    459,  11084,  20356,   6492,
            6335,  58394,    304,  20303,   2363,  21976,  23692,     13,   4718,
            3465,    374,    311,  24564,  22256,   3095,    323,  10765,    872,
           14604,   2262,    382,  30521,    512,     12,   1472,    690,   5371,
             264,   1160,    315,  22256,   3095,    505,    264,   2199,    304,
             264,  20303,   2363,    198,     12,    578,  22256,    685,   1253,
            3237,    832,    477,   5361,  21958,    271,  66913,    512,     16,
              13,  10852,   3725,  24564,    279,  14604,   2317,    323,  16630,
             315,   1855,  22256,    685,    304,    279,   2199,    198,     17,
              13,  65647,   8581,  21958,    505,    279,   2768,   6989,    512,
             256,    330,   4091,    498,    330,   4338,     70,    592,    498,
             330

In [None]:
tokenizer.decode(responses[0][0], skip_special_tokens=True)

'user\n\n### Emotion Analysis Expert Role\n\nYou are an advanced emotion analysis expert specializing in comic book dialogue interpretation. Your task is to analyze utterances and identify their emotional content.\n\nINPUT:\n- You will receive a list of utterances from a page in a comic book\n- The utterance may express one or multiple emotions\n\nTASK:\n1. Carefully analyze the emotional context and tone of each utterance in the page\n2. Identify applicable emotions from the following classes:\n   "anger", "disgust", "fear", "sadness", "surprise", "joy", "neutral"\n3. For each utterance in a comic page, identify all emotions present and return an array of emotion arrays in order.\n\nRULES:\n1. Use ONLY the labels listed above\n2. Output must be a JSON with single key "page_utterance_emotions"\n3. Value must be an array where:\n   - Each element is an array of emotions for one utterance\n   - Order matches the input utterances order\n   - Multiple emotions are allowed per utterance\n4.

: 

In [None]:
# generated = generation_model.generate(**inputs, max_new_tokens=32, pad_token_id=inference_tokenizer.eos_token_id, do_sample=True,
#      temperature=0.1,
#      top_p=0.9,)