Evironment & insatlls

In [None]:
# Install required packages if needed (run only once per environment).
# !pip install -U transformers accelerate peft datasets bitsandbytes tensorboard pynvml huggingface_hub

Imports & reproducibility

In [None]:
import os
import torch
from huggingface_hub import login
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    set_seed,
)
from peft import PeftModel  # imported for potential resume/load checks

# Local modules
from src.config import ModelConfig, LoRAConfigLite, DataConfig, TrainConfig
from src.lora_setup import apply_lora
from src.data import load_and_prepare_dataset
from src.gpu_utils import print_cuda_info

# Enable TF32 on Ampere+ for faster matmul where safe
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

Login (optional) and config

In [None]:
# Optional: Hugging Face login for gated models or pushing checkpoints.
# Do not hardcode tokens. Use getpass or environment variables.
# from getpass import getpass
# login(token=getpass("Enter your HF token: "))

model_cfg = ModelConfig()
lora_cfg = LoRAConfigLite()
data_cfg = DataConfig(
    train_path="<PATH-TO>/finetune_data.jsonl",  # <-- replace with your dataset path
    max_length=1024,
    test_size=0.1,
    seed=27,
    num_proc=None,  # set e.g., 4 on Linux; None recommended on Windows/Jupyter
)
train_cfg = TrainConfig(
    output_dir="outputs/llama3_finetuned",
    logging_dir="outputs/logs"
)

os.makedirs(train_cfg.output_dir, exist_ok=True)
os.makedirs(train_cfg.logging_dir, exist_ok=True)

set_seed(train_cfg.seed)
print_cuda_info()

Load tokenizer

In [None]:
# Load tokenizer with safe defaults.
tokenizer = AutoTokenizer.from_pretrained(
    model_cfg.model_id,
    trust_remote_code=model_cfg.trust_remote_code,
    use_fast=True,
)
# Ensure padding and special tokens
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = model_cfg.padding_side
print("Tokenizer loaded. pad_token:", tokenizer.pad_token)

Load base model in 4-bit

In [None]:
# Configure 4-bit quantization
compute_dtype = torch.float16 if model_cfg.bnb_4bit_compute_dtype == "float16" else torch.bfloat16
bnb_config = BitsAndBytesConfig(
    load_in_4bit=model_cfg.load_in_4bit,
    bnb_4bit_use_double_quant=model_cfg.bnb_4bit_use_double_quant,
    bnb_4bit_quant_type=model_cfg.bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
)

# Load model with device_map="auto" for efficient placement
model = AutoModelForCausalLM.from_pretrained(
    model_cfg.model_id,
    quantization_config=bnb_config,
    device_map=model_cfg.device_map,
    use_safetensors=model_cfg.use_safetensors,
    low_cpu_mem_usage=model_cfg.low_cpu_mem_usage,
    trust_remote_code=model_cfg.trust_remote_code,
)
print("Model loaded.")

Apply LoRA

In [None]:
# Wrap model with LoRA adapters
model = apply_lora(model, lora_cfg)
model.print_trainable_parameters()

Load & tokenize dataset

In [None]:
# The dataset file should be a JSONL with fields: instruction, input (optional), output.
# Example record:
# {"instruction": "Translate to English", "input": "안녕하세요", "output": "Hello."}

dataset_dict = load_and_prepare_dataset(data_cfg, tokenizer)
train_dataset = dataset_dict["train"]
eval_dataset = dataset_dict["test"]
print("Train samples:", len(train_dataset), "Eval samples:", len(eval_dataset))

Data collator

In [None]:
# Use LM collator with causal masking; padding to multiple of 8 helps Tensor Cores.
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

TrainingArguments

In [None]:
training_args = TrainingArguments(
    output_dir=train_cfg.output_dir,
    per_device_train_batch_size=train_cfg.per_device_train_batch_size,
    gradient_accumulation_steps=train_cfg.gradient_accumulation_steps,
    num_train_epochs=train_cfg.num_train_epochs,
    logging_steps=train_cfg.logging_steps,
    save_steps=train_cfg.save_steps,
    eval_steps=train_cfg.eval_steps,
    save_total_limit=train_cfg.save_total_limit,
    evaluation_strategy="steps",
    save_strategy="steps",
    fp16=train_cfg.fp16,
    learning_rate=train_cfg.learning_rate,
    lr_scheduler_type=train_cfg.lr_scheduler_type,
    warmup_ratio=train_cfg.warmup_ratio,
    weight_decay=train_cfg.weight_decay,
    optim=train_cfg.optim,
    report_to=train_cfg.report_to,
    logging_dir=train_cfg.logging_dir,
    load_best_model_at_end=train_cfg.load_best_model_at_end,
    metric_for_best_model=train_cfg.metric_for_best_model,
    greater_is_better=train_cfg.greater_is_better,
    seed=train_cfg.seed,
)

Train

In [None]:
from transformers import EarlyStoppingCallback

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
)

train_result = trainer.train()
print(train_result)

Save final artifacts

In [None]:
# Save adapter + tokenizer locally
trainer.save_model(train_cfg.output_dir)  # saves adapter weights for PEFT
tokenizer.save_pretrained(train_cfg.output_dir)
print("Saved to:", train_cfg.output_dir)

# Optional: push to Hub (requires HF login)
# trainer.push_to_hub()  # uncomment if you want to push

Quick inference smoke test

In [None]:
# Generate a short sample to verify the finetuned adapter works.
from transformers import TextStreamer

model.eval()
streamer = TextStreamer(tokenizer)

prompt = (
    "### Instruction:\nExplain the difference between moisture content and specific gravity in wood.\n\n"
    "### Response:\n"
)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.inference_mode():
    _ = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        streamer=streamer,
        pad_token_id=tokenizer.eos_token_id,
    )

TensorBoard

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir outputs/logs --port 6006