In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model
import gc, torch

In [None]:
model_name = "meta-llama/Llama-3.2-3B-Instruct"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="bfloat16"
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="auto",
    quantization_config=bnb_config,
)

In [None]:
dataset = load_dataset("json", data_files={"train": "train_data/ign/train.json", "val": "train_data/ign/test.json"})

In [None]:
ind = 7
name = dataset['val'][ind]['name']
revs = dataset['val'][ind]['users_revs']
verdict = dataset['val'][ind]['verdict']

messages = [
    {"role": "system", "content": "Below are reviews from users. Analyze them and write a short, professional review of the game.\n"},
    {"role": "user", "content": f"Game name: {name}\n" + '\n'.join([r.replace('\n', ' ') for r in revs])}
]

In [None]:
print(tokenizer.apply_chat_template(messages, tokenize=False,  add_generation_prompt=True))

In [None]:
input_ids = tokenizer.apply_chat_template(
    messages, tokenize=True, return_tensors="pt", add_generation_prompt=True 
).to("cuda")

outputs = model.generate(input_ids, max_new_tokens=512)

In [None]:
print(tokenizer.decode(outputs[0][input_ids.size(1) :], skip_special_tokens=True))

In [None]:
print(verdict)

In [None]:
def process_fun(example):
    names = example['name']
    users_revs = example['users_revs']
    pro_revs = example['verdict']

    data = []

    for name, u_revs, pro_rev in zip(names, users_revs, pro_revs):
        u_revs = [rev.replace("\n", " ") for rev in u_revs]
        u_revs = "\n".join(u_revs)
        messages = [
            {"role": "system", "content": "Below are reviews from users. Analyze them and write a short, professional review of the game.\n"},
            {"role": "user", "content": f"Game name: {name} \n" + u_revs},
            {"role": "assistant", "content": pro_rev},
        ]
        message = tokenizer.apply_chat_template(messages, tokenize=False)
        data.append(message)
    return  {"text": data}

In [None]:
training_data = dataset.map(process_fun, batched=True)
training_data = training_data.remove_columns(['url', 'name', 'verdict', 'steam_id', 'users_revs'])

In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ]
)

In [None]:
model = get_peft_model(model, lora_config)

In [None]:
training_arguments = TrainingArguments(
    output_dir="llama-3.2-3B-ign_rev_sft",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    num_train_epochs=15,
    warmup_steps=5,
    fp16=False,
    bf16=True,
    logging_strategy="steps",
    logging_steps=100,
    save_strategy="epoch",
    save_steps=1,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=training_data['train'],
    peft_config=lora_config,
    data_collator=data_collator,
)

In [None]:
gc.collect()
torch.cuda.empty_cache()
model.config.use_cache = False

trainer.train()

In [None]:
input_ids = tokenizer.apply_chat_template(
    messages, tokenize=True, return_tensors="pt", add_generation_prompt=True 
).to("cuda")

outputs = model.generate(input_ids, max_new_tokens=512)