<a href="https://colab.research.google.com/github/omkarwazulkar/GoogleColab/blob/main/H4NoRobots_FineTune_Llama3_1B_LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Reqs**

In [None]:
!pip install -U trl bitsandbytes transformers accelerate datasets

## **Dataset**

In [None]:
!hf auth login

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer

In [None]:
system_message = """You are Llama, an AI assistant created by Omkar to be helpful and honest. Your knowledge spans a wide range of topics, allowing you to engage in substantive conversations and provide analysis on complex subjects."""

In [None]:
dataset = load_dataset("HuggingFaceH4/no_robots")

In [None]:
columns_to_remove = [
    c for c in dataset["train"].column_names if c != "messages"
]

In [None]:
def create_conversation(sample):
    if sample["messages"][0]["role"] == "system":
        return {"messages": sample["messages"]}
    else:
        return {
            "messages": [{"role": "system", "content": system_message}]
            + sample["messages"]
        }

In [None]:
dataset = dataset.map(
    create_conversation,
    remove_columns=columns_to_remove,
)

In [None]:
dataset["train"] = dataset["train"].filter(
    lambda x: len(x["messages"][1:]) % 2 == 0
)
dataset["test"] = dataset["test"].filter(
    lambda x: len(x["messages"][1:]) % 2 == 0
)

In [None]:
train_dataset = dataset["train"]
test_dataset = dataset["test"]

In [None]:
LLAMA_3_CHAT_TEMPLATE = (
    "{% for message in messages %}"
    "{% if message['role'] == 'system' %}"
    "{{ message['content'] }}"
    "{% elif message['role'] == 'user' %}"
    "{{ '\\n\\nHuman: ' + message['content'] + eos_token }}"
    "{% elif message['role'] == 'assistant' %}"
    "{{ '\\n\\nAssistant: ' + message['content'] + eos_token }}"
    "{% endif %}"
    "{% endfor %}"
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    use_fast=True,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.chat_template = LLAMA_3_CHAT_TEMPLATE

In [None]:
def template_dataset(example):
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"],
            tokenize=False,
            add_generation_prompt=False,
        )
    }

In [None]:
train_dataset = train_dataset.map(
    template_dataset, remove_columns=["messages"]
)
test_dataset = test_dataset.map(
    template_dataset, remove_columns=["messages"]
)

In [None]:
print(train_dataset[1]["text"])

## **Model Training**

In [None]:
import torch
from peft import LoraConfig
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments

model_id = "meta-llama/Llama-3.2-1B"

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    use_cache=False,
)

model.gradient_checkpointing_enable()

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)


In [None]:
training_args = TrainingArguments(
    output_dir="/content/llama3.2-1b-lora",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    lr_scheduler_type="constant",
    logging_steps=10,
    save_strategy="epoch",
    max_grad_norm=0.3,
    warmup_ratio=0.03,

    # ðŸ”’ T4 Safe
    fp16=True,
    bf16=False,
    tf32=False,

    gradient_checkpointing=True,
    optim="adamw_torch",
    report_to="tensorboard",
)

In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    processing_class=tokenizer,
    peft_config=peft_config,
)

trainer.model.print_trainable_parameters()

In [None]:
trainer.train()

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

model_id = "meta-llama/Llama-3.2-1B"
lora_path = "/content/llama3.2-1b-lora/checkpoint-1186"

tokenizer = AutoTokenizer.from_pretrained(lora_path, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    device_map="auto",
)

model = PeftModel.from_pretrained(
    base_model,
    lora_path,
)

model.eval()

In [None]:
messages = [
    {
        "role": "system",
        "content": "You are Llama, an AI assistant created by Omkar to be helpful and honest."
    },
    {
        "role": "user",
        "content": "Explain gradient checkpointing in simple terms."
    }
]

In [None]:
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)

In [None]:
prompt

In [None]:
inputs = tokenizer(
    prompt,
    return_tensors="pt",
).to(model.device)

In [None]:
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )

In [None]:
response = tokenizer.decode(
    outputs[0][inputs["input_ids"].shape[-1]:],
    skip_special_tokens=True,
)

In [None]:
print(response)

## **Push LoRA to HF**

In [None]:
hf_repo = "omkarwazulkar/Llama-3.2-1B-LoRA-HuggingFaceH4_50"

In [None]:
model.push_to_hub(hf_repo)
tokenizer.push_to_hub(hf_repo)

## **Inference LoRA Adapter**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

In [None]:
model_id = "meta-llama/Llama-3.2-1B"
lora_path = "omkarwazulkar/Llama-3.2-1B-LoRA-HuggingFaceH4_50"

tokenizer = AutoTokenizer.from_pretrained(lora_path, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    dtype=torch.float16,
    device_map="auto",
)

model = PeftModel.from_pretrained(
    base_model,
    lora_path,
)

model.eval()

In [None]:
messages = [
    {
        "role": "system",
        "content": "You are Llama, an AI assistant created by Omkar to be helpful and honest."
    },
    {
        "role": "user",
        "content": "Explain gradient checkpointing in simple terms."
    }
]

In [None]:
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)

In [None]:
prompt

In [None]:
inputs = tokenizer(
    prompt,
    return_tensors="pt",
).to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

response = tokenizer.decode(
    outputs[0][inputs["input_ids"].shape[-1]:],
    skip_special_tokens=True,
)

print(response)

## **Push Merged to HF**

In [None]:
merged = model.merge_and_unload()

In [None]:
repo_id = "omkarwazulkar/Merged-Llama-3.2-1B-LoRA-HuggingFaceH4_50"

merged.push_to_hub(
    repo_id,
    private=False,
)

tokenizer.push_to_hub(
    repo_id,
)

## **Inference**

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
repo_id = "omkarwazulkar/Merged-Llama-3.2-1B-LoRA-HuggingFaceH4_50"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    dtype=torch.float16,
    device_map="auto",
)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
messages = [
    {
        "role": "system",
        "content": "You are Llama, an AI assistant created by Omkar to be helpful and honest."
    },
    {
        "role": "user",
        "content": "Explain gradient checkpointing in simple terms."
    }
]

In [None]:
prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)

In [None]:
prompt

In [None]:
inputs = tokenizer(
    prompt,
    return_tensors="pt",
).to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

response = tokenizer.decode(
    outputs[0][inputs["input_ids"].shape[-1]:],
    skip_special_tokens=True,
)

print(response)