<a href="https://colab.research.google.com/github/mightyoctopus/amazon-pricer-model-open-source-fine-tuned-models/blob/main/w7_d3_sft_trainer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Training with SFT Trainer

In [None]:
# pip installs

!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

In [2]:
import os
import re
import math

from tqdm import tqdm
from google.colab import userdata, drive
from huggingface_hub import login

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig

from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
from transformers.utils.import_utils import BASE_FILE_REQUIREMENTS

set_seed(42)


### Constants

BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "product-pricer"
HF_USER = "MightyOctopus"

### Data

# https://huggingface.co/datasets/MightyOctopus/amazon-pricer-dataset-v2-0
DATASET_NAME = f"{HF_USER}/amazon-pricer-dataset-v2-0"
MAX_SEQUENCE_LENGTH = 200

### Run name for saving the model in the hub:
RUN_NAME = f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"

### Saves to the Local Device
G_DRIVE_BASE_DIR = "content/drive/MyDrive/amazon-pricer"
CHECKPOINTS_PATH = f"/{G_DRIVE_BASE_DIR}/{PROJECT_RUN_NAME}/checkpoints"
FINAL_MODEL_PATH = f"/{G_DRIVE_BASE_DIR}/{PROJECT_RUN_NAME}/final-model"

### Checkpoint: Load the latest checkpoint from G-Drive to continue fine tuning from where it left off last time
LATEST_CHECKPOINT = f"/{G_DRIVE_BASE_DIR}/product-pricer-2025-12-09_14.02.37/checkpoints/checkpoint-2100"


### Hyperparameters for QLoRA
LORA_R = 32
LORA_ALPHA = 64
TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj"]
LORA_DROPOUT = 0.0
QUANT_4_BIT = True


### Hyperparameters for Training
EPOCHS = 1 # more than 1 might be overkill
BATCH_SIZE = 16 ### Using A100, faster and more stablized updates, less noise which is great for numeric(price) regression learning.
GRADIENT_ACCUMULATION_STEPS = 2 # Better generalization and stable learning
LEARNING_RATE = 1e-5
LR_SCHEDULER_TYPE = "cosine"
WARMUP_RATIO = 0.05
OPTIMIZER = "paged_adamw_32bit"


### Admin Config:
STEPS = 50 #How often training logs appears in the notebook or W&B
SAVE_STEPS = 1000
LOG_TO_WANDB = True


%matplotlib inline

In [None]:
hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

In [None]:
### Log in to Google Drive (To save checkpoints)
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
### Log in to Weights & Biases
wandb_api_key = userdata.get("WANDB")
os.environ["WANDB"] = wandb_api_key
wandb.login()

### Configure Weights & Biases dto record against the project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"

[34m[1mwandb[0m: Currently logged in as: [33mmh-hong[0m ([33mmh-hong-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
print(os.environ)



In [None]:
# if wishing to reduce the training dataset to 20,000 points instead, then uncomment this line:
# train = train.select(range(20000))

dataset = load_dataset(DATASET_NAME)

splits = dataset["train"].train_test_split(test_size=0.2)
train_ds = splits["train"] # train dataset
val_ds = splits["test"] # valication dataset for 20% (of the whole train data)
test_ds = dataset["test"] # test dataset after fiine tuning

In [None]:
print(splits)

DatasetDict({
    train: Dataset({
        features: ['text', 'price'],
        num_rows: 120000
    })
    test: Dataset({
        features: ['text', 'price'],
        num_rows: 30000
    })
})


In [None]:
print(test_ds)

Dataset({
    features: ['text', 'price'],
    num_rows: 19000
})


In [None]:
if LOG_TO_WANDB:
    wandb.init(project=PROJECT_NAME, name=RUN_NAME)

## Load the Tokenizer and Model

In [None]:
### Pick the right quantization config

if QUANT_4_BIT:
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )
else:
    quant_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )

In [None]:
from transformers.utils import quantization_config
### Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto"
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory Footprint: {base_model.get_memory_footprint() / 1e9:.2f} GB")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Memory Footprint: 5.59 GB


## Data Collator

To ensure during Training that it is not trying to train the model to predict the description of products; only their prices -- specifically, after this line: "Price is $" in the train prompt

This is a super simple helper class from Hugging Face to take care of it:

In [None]:
from trl import DataCollatorForCompletionOnlyLM

response_template = "Price is $"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

## LoRA Config & SFT Config

In [None]:
lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES
)

train_parameters = SFTConfig(
    ### Checkpoint storage
    output_dir=CHECKPOINTS_PATH,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="steps",
    eval_steps=1000, ### perform validation every 1000 steps
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=3,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.0,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="end", ### Push manually after training done
    push_to_hub=False, ### Push manually after training done
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=False
)

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
)

Map:   0%|          | 0/30000 [00:00<?, ? examples/s]

## Fine Tune

In [None]:
fine_tuning.train(resume_from_checkpoint=LATEST_CHECKPOINT) ### set the param True to continue training from the last checkpoint

fine_tuning.model.save_pretrained(FINAL_MODEL_PATH)
tokenizer.save_pretrained(FINAL_MODEL_PATH)

### Push the fine tuned model to HF hub
# fine_tuning.model.push_to_hub(HUB_MODEL_NAME)
# print(f"Saved to the hub: {PROJECT_RUN_NAME}")

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
	eval_steps: 1000 (from args) != 300 (from trainer_state.json)
	save_steps: 1000 (from args) != 300 (from trainer_state.json)
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss,Validation Loss
1800,1.6898,1.694607
2100,1.685,1.689236


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
[34m[1mwandb[0m: Adding directory to artifact (/content/drive/MyDrive/amazon-pricer/product-pricer-2025-12-09_14.02.37/checkpoints/checkpoint-1800)... Done. 0.7s
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
[34m[1mwandb[0m: Adding directory to artifact (/content/drive/MyDrive/amazon-pricer/product-pricer-2025-12-09_14.02.37/checkpoints/checkpoint-2100)... Done. 0.8s


In [None]:
if LOG_TO_WANDB:
    wandb.finish()