In [None]:
# Imports libraries and lists all files in the /kaggle/input directory

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
%pip uninstall -y torch torchvision torchaudio

In [None]:
%pip install torch==2.4.1+cu121 torchvision==0.19.1+cu121 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121

In [None]:
%pip install "transformers==4.45.1" "accelerate==0.34.2" "bitsandbytes==0.44.0"

In [None]:
%pip install "datasets==3.0.1" "peft==0.13.0" "trl==0.11.1"

In [None]:
%pip install "wandb==0.18.2" "pyarrow==17.0.0" "pandas==2.2.3"

In [None]:
%pip install "huggingface_hub"

In [None]:
%pip uninstall -y peft

In [None]:
%pip install "peft==0.17.1"

In [None]:
%pip uninstall -y peft
%pip install "peft==0.15.0"

In [None]:
# Check required packages in Kaggle environment (no flash-attn)
import importlib

required_packages = [
    "transformers",
    "datasets",
    "accelerate",
    "peft",
    "trl",
    "bitsandbytes",
    "wandb",
    "huggingface_hub",
    "kaggle_secrets",
    "torch",
    "os"  # standard library, always present
]

missing_packages = []
for pkg in required_packages:
    try:
        importlib.import_module(pkg)
        print(f"_/ {pkg} is installed")
    except ModuleNotFoundError:
        print(f"X {pkg} is MISSING")
        missing_packages.append(pkg)

if missing_packages:
    print("\nSome packages are missing. Install with:")
    print("!pip install " + " ".join(missing_packages))


In [None]:
# Check required functions/methods exist in installed packages (no flash-attn)
import importlib
import inspect

checks = {
    "transformers.AutoTokenizer": "from_pretrained",
    "transformers.AutoModelForCausalLM": "from_pretrained",
    "transformers.BitsAndBytesConfig": "__init__",
    "transformers.HfArgumentParser": "__init__",
    "transformers.TrainingArguments": "__init__",
    "transformers.pipeline": "__call__",
    "transformers.logging": "set_verbosity",
    "transformers.MllamaForConditionalGeneration": "from_pretrained",
    "transformers.AutoProcessor": "from_pretrained",
    "transformers.PreTrainedTokenizer": "apply_chat_template",
    "transformers.PreTrainedTokenizer": "decode",
    "transformers.PreTrainedModel": "generate",
    "transformers.PreTrainedModel": "save_pretrained",
    "transformers.PreTrainedModel": "push_to_hub",
    "trl.SFTTrainer": "__init__",
    "trl": "setup_chat_format",
    "datasets": "load_dataset",
    "datasets.Dataset": "map",
    "datasets.Dataset": "shuffle",
    "datasets.Dataset": "select",
    "peft.LoraConfig": "__init__",
    "peft": "get_peft_model",
    "peft.PeftModel": "from_pretrained",
    "peft.PeftModel": "merge_and_unload",
    "peft": "prepare_model_for_kbit_training",
    "huggingface_hub": "login",
    "kaggle_secrets.UserSecretsClient": "get_secret",
    "wandb": "login",
    "wandb": "init",
    "wandb": "finish",
    "torch.cuda": "get_device_capability",
    "bitsandbytes.nn": "Linear4bit"
}

for target, func in checks.items():
    try:
        module_name, class_or_func = target.rsplit(".", 1)
        module = importlib.import_module(module_name)
        obj = getattr(module, class_or_func)
        if inspect.isclass(obj) or inspect.ismodule(obj):
            if hasattr(obj, func):
                print(f"_/ {target}.{func} exists")
            else:
                print(f"X {target}.{func} NOT found")
        elif inspect.isfunction(obj):
            print(f"_/ Function {target} is present")
        else:
            print(f"ℹ️ {target} is present but type not checked")
    except Exception as e:
        print(f"X Could not check {target}.{func} — {e}")


In [None]:
# Robust method/function presence checker (handles module-only targets)
import importlib
import inspect

checks = {
    "transformers.AutoTokenizer": "from_pretrained",
    "transformers.AutoModelForCausalLM": "from_pretrained",
    "transformers.BitsAndBytesConfig": "__init__",
    "transformers.HfArgumentParser": "__init__",
    "transformers.TrainingArguments": "__init__",
    "transformers.pipeline": "__call__",  # we'll treat this as "callable"
    "transformers.logging": "set_verbosity",
    "transformers.MllamaForConditionalGeneration": "from_pretrained",
    "transformers.AutoProcessor": "from_pretrained",
    "transformers.PreTrainedTokenizer": "apply_chat_template",
    "transformers.PreTrainedTokenizer": "decode",
    "transformers.PreTrainedModel": "generate",
    "transformers.PreTrainedModel": "save_pretrained",
    "transformers.PreTrainedModel": "push_to_hub",
    "trl": "setup_chat_format",
    "trl.SFTTrainer": "__init__",
    "datasets": "load_dataset",
    "datasets.Dataset": "map",
    "datasets.Dataset": "shuffle",
    "datasets.Dataset": "select",
    "peft.LoraConfig": "__init__",
    "peft": "get_peft_model",
    "peft.PeftModel": "from_pretrained",
    "peft.PeftModel": "merge_and_unload",
    "peft": "prepare_model_for_kbit_training",
    "huggingface_hub": "login",
    "kaggle_secrets.UserSecretsClient": "get_secret",
    "wandb": "login",
    "wandb": "init",
    "wandb": "finish",
    "torch.cuda": "get_device_capability",
    "bitsandbytes.nn": "Linear4bit",
}

for target, func in checks.items():
    try:
        if "." in target:
            module_name, name = target.rsplit(".", 1)
            mod = importlib.import_module(module_name)
            obj = getattr(mod, name)
        else:
            # module-only target: import the module and use it directly
            mod = importlib.import_module(target)
            obj = mod

        # Special case: if they asked for "__call__", verify it's callable
        if func == "__call__":
            if callable(obj):
                print(f"_/ {target} is callable")
            else:
                print(f"X {target} is not callable")
            continue

        # For classes/modules: check attribute presence
        if inspect.isclass(obj) or inspect.ismodule(obj):
            if hasattr(obj, func):
                print(f"_/ {target}.{func} exists")
            else:
                print(f"X {target}.{func} NOT found")
        elif inspect.isfunction(obj):
            # They targeted a function directly; ensure it exists
            print(f"_/ Function {target} is present")
        else:
            # Unknown object type; try generic hasattr
            if hasattr(obj, func):
                print(f"_/ {target}.{func} exists")
            else:
                print(f"X {target}.{func} NOT found")
    except Exception as e:
        print(f"X Could not check {target}.{func} — {e}")


In [None]:
# Special handling for peft.PeftModel.merge_and_unload
try:
    import peft
    from peft.peft_model import PeftModel
    if hasattr(PeftModel, "merge_and_unload"):
        print("_/ peft.PeftModel.merge_and_unload exists")
    else:
        # check LoRA-specific model
        try:
            from peft.tuners.lora import LoraModel
            if hasattr(LoraModel, "merge_and_unload"):
                print("_/ peft.tuners.lora.LoraModel.merge_and_unload exists")
            else:
                print("X merge_and_unload not found on PeftModel or LoraModel")
        except ImportError:
            print("X Could not import LoraModel to check for merge_and_unload")
except Exception as e:
    print(f"X Could not check merge_and_unload — {e}")


In [None]:
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login, whoami

# Get token from Kaggle secrets (label matches the tutorial)
hf_token = UserSecretsClient().get_secret("HUGGINGFACE_TOKEN")

# Login and verify identity
login(token=hf_token)
info = whoami()
print("_/ Hugging Face connected as:", info.get("name") or info.get("username") or "Unknown")


In [None]:
from kaggle_secrets import UserSecretsClient
import wandb

# Get W&B API key from Kaggle secrets (label matches the tutorial)
wb_token = UserSecretsClient().get_secret("wandb")

# Login and do a quick test run
wandb.login(key=wb_token)
run = wandb.init(project="connectivity-check", job_type="test", anonymous="allow")
print("_/ W&B run started:", run.name, "in project:", run.project)
run.finish()
print("_/ W&B run finished.")


In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

In [None]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3.2 on Customer Support Dataset', 
    job_type="training", 
    anonymous="allow"
)

In [None]:
base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model = "llama-3.2-3b-it-Ecommerce-ChatBot"
dataset_name = "bitext/Bitext-customer-support-llm-chatbot-training-dataset"

In [None]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

In [None]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="train")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo
instruction = """You are a top-rated customer service agent named John. 
    Be polite to customers and answer all their questions.
    """
def format_chat_template(row):
    
    row_json = [{"role": "system", "content": instruction },
               {"role": "user", "content": row["instruction"]},
               {"role": "assistant", "content": row["response"]}]
    
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= 4,
)


In [None]:
dataset['text'][3]

In [None]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [None]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [None]:
# Setting sft parameters
from datasets import Dataset, DatasetDict

if isinstance(dataset, DatasetDict):
    train_ds = dataset["train"]
    eval_ds  = dataset.get("validation") or dataset.get("test")
elif isinstance(dataset, Dataset):         # single table
    split = dataset.train_test_split(test_size=0.1, seed=42)
    train_ds, eval_ds = split["train"], split["test"]
else:
    raise TypeError(type(dataset))

trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


In [None]:
trainer.train()

In [None]:
wandb.finish()

In [None]:
messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "I bought the same item twice, cancel order {{Order Number}}"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

In [None]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

In [None]:
# Model
base_model_url = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model_url = "/kaggle/working/llama-3.2-3b-it-Ecommerce-ChatBot/"

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
import torch
from trl import setup_chat_format
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model_url)

base_model_reload= AutoModelForCausalLM.from_pretrained(
    base_model_url,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [None]:
# Merge adapter with base model
base_model_reload, tokenizer = setup_chat_format(base_model_reload, tokenizer)
model = PeftModel.from_pretrained(base_model_reload, new_model_url)

#model = model.merge_and_unload()
model = LoraModel.merge_and_unload(model)

In [None]:
instruction = """You are a top-rated customer service agent named John. 
    Be polite to customers and answer all their questions.
    """

messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "I have to see what payment payment modalities are accepted"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

In [None]:
new_model = "llama-3.2-3b-it-Ecommerce-ChatBot"

model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

In [None]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)