# SFT

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llama-3.2/transformers/3b-instruct/1/model.safetensors.index.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00001-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00002-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/README.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/USE_POLICY.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer_config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/LICENSE.txt
/kaggle/input/llama-3.2/transformers/3b-instruct/1/special_tokens_map.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/.gitattributes
/kaggle/input/llama-3.2/transformers/3b-instruct/1/generation_config.json


In [3]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

In [4]:
%%capture
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes 
%pip install -U wandb

In [5]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [None]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3.2 on Medieval arms and armor', 
    job_type="training", 
    anonymous="allow"
)

In [7]:
base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model = "llama-3.2-3b-medieval-arms-and-armor"
dataset_name = "madks/medieval-qa-dataset"

In [8]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

In [13]:
dataset

Dataset({
    features: ['id', 'context', 'question', 'answers'],
    num_rows: 193
})

In [None]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="train")
samples_count = len(dataset)
dataset = dataset.shuffle(seed=65).select(range(samples_count)) # Only use 1000 samples for quick demo
instruction = """You are a helpful assistant and historian, trained in medieval arms and armor. 
    Be polite to customers and answer all questions.
    """
def format_chat_template(row):
    # Create a prompt that includes the context and question
    user_content = f"Context: {row['context']}\nQuestion: {row['question']}"
    
    row_json = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": row["answers"]}
    ]
    
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= 4,
)

# Create train/test splits
# If the dataset comes as a single 'train' split or just a DatasetDict without splits
if 'train' in dataset and not 'test' in dataset:
    # Split the existing train set
    splits = dataset['train'].train_test_split(test_size=0.1, seed=42)
    dataset = splits  # This now contains 'train' and 'test' splits
else:
    # If it's just a Dataset object, split it directly
    splits = dataset.train_test_split(test_size=0.1, seed=42)
    dataset = splits  # This now contains 'train' and 'test' splits

In [16]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

In [17]:
tokenizer.chat_template = None

In [18]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [19]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    # max_seq_length=None,  # Remove this parameter entirely
    # dataset_text_field="text",
    tokenizer=tokenizer,  # This is still used but deprecated in 0.15.2
    args=training_arguments,
    # packing=False,
)

In [21]:
trainer.train()



Step,Training Loss,Validation Loss
18,1.0915,0.960722
36,0.6905,0.867706
54,1.2094,0.818825
72,0.4512,0.711158


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=86, training_loss=1.0130048803118772, metrics={'train_runtime': 75.2783, 'train_samples_per_second': 2.285, 'train_steps_per_second': 1.142, 'total_flos': 277164354945024.0, 'train_loss': 1.0130048803118772})

In [None]:
wandb.finish()


In [23]:
messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "Describe the parts of armor."}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

 and historian, trained in medieval arms and armor. 
    Be polite to customers and answer all questions.
    
user
Describe the parts of armor.



In [24]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)



adapter_model.safetensors:   0%|          | 0.00/1.67G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/madks/llama-3.2-3b-medieval-arms-and-armor/commit/7dd6828de1ea4b5381df849d67f418c520b7322a', commit_message='Upload model', commit_description='', oid='7dd6828de1ea4b5381df849d67f418c520b7322a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/madks/llama-3.2-3b-medieval-arms-and-armor', endpoint='https://huggingface.co', repo_type='model', repo_id='madks/llama-3.2-3b-medieval-arms-and-armor'), pr_revision=None, pr_num=None)

In [25]:
for dirname, _, filenames in os.walk('/kaggle/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/lib/kaggle/gcp.py
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model.safetensors.index.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00001-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/model-00002-of-00002.safetensors
/kaggle/input/llama-3.2/transformers/3b-instruct/1/README.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/USE_POLICY.md
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/tokenizer_config.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/LICENSE.txt
/kaggle/input/llama-3.2/transformers/3b-instruct/1/special_tokens_map.json
/kaggle/input/llama-3.2/transformers/3b-instruct/1/.gitattributes
/kaggle/input/llama-3.2/transformers/3b-instruct/1/generation_config.json
/kaggle/working/llama-3.2-3b-medieval-arms-and-armor/adapter_config.json
/kaggle/working/llama-3.2-3b-medieval-arms-and-

# Merge finetune

In [26]:
%%capture
%pip install -U bitsandbytes
%pip install transformers==4.44.2
%pip install -U accelerate
%pip install -U peft
%pip install -U trl

In [27]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

In [28]:
# Model
base_model_url = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
# new_model_url = "/kaggle/input/fine-tune-llama-3-2-on-customer-support/llama-3.2-3b-it-Ecommerce-ChatBot/"
# new_model_url = "/kaggle/working/fine-tune-llama-3-2-on-customer-support/llama-3.2-3b-it-Ecommerce-ChatBot/"
# new_model_url = "/kaggle/working/llama-3.2-3b-it-Ecommerce-ChatBot/checkpoint-450/"
new_model_url = "/kaggle/working/llama-3.2-3b-medieval-arms-and-armor/checkpoint-86/"

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
import torch
from trl import setup_chat_format
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model_url)

base_model_reload= AutoModelForCausalLM.from_pretrained(
    base_model_url,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [30]:
tokenizer.chat_template = None

In [31]:
# Merge adapter with base model
base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)
model = PeftModel.from_pretrained(base_model_reload, new_model_url)

model = model.merge_and_unload()

In [32]:
# instruction = """You are a top-rated customer service agent named John. 
#     Be polite to customers and answer all their questions.
#     """
instruction = """You are a helpful assistant and historian, trained in medieval arms and armor. 
    Be polite to customers and answer all questions.
    """

messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "Describe the parts of armor."}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

 and historian, trained in medieval arms and armor. 
    Be polite to customers and answer all questions.
    
user
Describe the parts of armor.



In [33]:
# new_model = "llama-3.2-3b-it-Ecommerce-ChatBot"
new_model = "llama-3.2-3b-medieval-arms-and-armor"

model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('llama-3.2-3b-medieval-arms-and-armor/tokenizer_config.json',
 'llama-3.2-3b-medieval-arms-and-armor/special_tokens_map.json',
 'llama-3.2-3b-medieval-arms-and-armor/tokenizer.json')

In [34]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/madks/llama-3.2-3b-medieval-arms-and-armor/commit/8d8df70451685f8239363e7c67151d8f06c363ff', commit_message='Upload tokenizer', commit_description='', oid='8d8df70451685f8239363e7c67151d8f06c363ff', pr_url=None, repo_url=RepoUrl('https://huggingface.co/madks/llama-3.2-3b-medieval-arms-and-armor', endpoint='https://huggingface.co', repo_type='model', repo_id='madks/llama-3.2-3b-medieval-arms-and-armor'), pr_revision=None, pr_num=None)