# Finetuning Llama-3.2-1B for finance tasks
Basic deliverable finetuning script based on [ShawhinT finetuning example](https://github.com/ShawhinT/YouTube-Blog/tree/main/LLMs/fine-tuning)

In [1]:
import torch
import evaluate
import numpy as np
from peft import get_peft_model, LoraConfig
from datasets import load_dataset
from transformers import Trainer, TrainingArguments, AutoTokenizer, DataCollatorForLanguageModeling
from transformers.models.llama.modeling_llama import LlamaForCausalLM
from pathlib import Path


model_id = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = LlamaForCausalLM.from_pretrained(model_id, torch_dtype="float16")

In [2]:
dataset = load_dataset("PaulAdversarial/all_news_finance_sm_1h2023", split="train")
dataset = dataset.remove_columns(["_id", "main_domain", "created_at"])
dataset

Dataset({
    features: ['title', 'description'],
    num_rows: 5062
})

In [3]:
def tokenize_function(examples):
    return tokenizer(
        examples["title"],
        examples["description"],
        truncation=True,
        max_length=512,  ## majority of dataset is less than 512 tokens
    )

In [4]:
peft_config = LoraConfig(task_type="CAUSAL_LM",
                         r=4,
                         lora_alpha=32,
                         lora_dropout=0.01,
                         target_modules = ["q_proj", "v_proj"])

In [5]:
peft_model = get_peft_model(base_model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 425,984 || all params: 1,236,240,384 || trainable%: 0.0345


In [6]:
# hyperparameters
lr = 1e-3
batch_size = 4
num_epochs = 10

In [7]:
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred: np.ndarray) -> dict[str, float]:
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    
    accuracy_val = accuracy_metric.compute(predictions=predictions, references=labels)['accuracy']
    f1_val = f1_metric.compute(predictions=predictions, references=labels, average="weighted")['f1']

    return {
        "accuracy": accuracy_val,
        "f1": f1_val,
    }

In [8]:
# define training arguments
out_dir = Path(r"D:\models\basic-Llama-3_2-LoRA")
training_args = TrainingArguments(
    output_dir=str(out_dir),
    num_train_epochs=num_epochs,
    per_device_train_batch_size=batch_size,
    learning_rate=lr,
    weight_decay=0.01,
    warmup_steps=500,
    eval_strategy="no",
    save_strategy="epoch",
    do_train=True,
)

In [None]:
tok_train_dataset = (dataset
                     .map(tokenize_function))

Map:   0%|          | 0/5062 [00:00<?, ? examples/s]

In [18]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # For causal language modeling
)

# creater trainer object
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tok_train_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# train model
trainer.train()

  trainer = Trainer(


  0%|          | 0/12660 [00:00<?, ?it/s]

{'loss': 3.0204, 'grad_norm': 4.897493839263916, 'learning_rate': 0.001, 'epoch': 0.39}
{'loss': 3.051, 'grad_norm': 6.844336986541748, 'learning_rate': 0.0009588815789473685, 'epoch': 0.79}
{'loss': 2.997, 'grad_norm': 4.985720157623291, 'learning_rate': 0.0009177631578947368, 'epoch': 1.18}
{'loss': 2.9422, 'grad_norm': 5.779033184051514, 'learning_rate': 0.0008766447368421053, 'epoch': 1.58}
{'loss': 2.9269, 'grad_norm': 5.083930015563965, 'learning_rate': 0.0008355263157894737, 'epoch': 1.97}
{'loss': 2.8285, 'grad_norm': 4.1062774658203125, 'learning_rate': 0.0007944078947368421, 'epoch': 2.37}
{'loss': 2.8523, 'grad_norm': 5.630640029907227, 'learning_rate': 0.0007532894736842105, 'epoch': 2.76}
{'loss': 2.7999, 'grad_norm': 5.521939277648926, 'learning_rate': 0.000712171052631579, 'epoch': 3.16}
{'loss': 2.7677, 'grad_norm': 5.261638164520264, 'learning_rate': 0.0006710526315789473, 'epoch': 3.55}
{'loss': 2.7697, 'grad_norm': 4.421476364135742, 'learning_rate': 0.00062993421052

TrainOutput(global_step=12660, training_loss=2.6143193731564103, metrics={'train_runtime': 2543.9201, 'train_samples_per_second': 19.898, 'train_steps_per_second': 4.977, 'total_flos': 2.62356261285888e+16, 'train_loss': 2.6143193731564103, 'epoch': 10.0})