# Llama-3.2-3B-Instruct Fine-tuning for Odia Language


## Installation

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

## Data Preparation

In [None]:
# Import necessary libraries
import random
import pandas as pd
from datasets import Dataset, load_dataset

In [None]:
# Load the dataset
dataset = load_dataset("sumankumarbhadra/alpaca-odia", split="train")
dataset = dataset.to_pandas()

print(f"Original dataset size: {len(dataset)}")
print("Sample data:")
print(dataset.iloc[0])

In [None]:
# Data augmentation function
def augment_dataset(df):
    augmented_data = []

    # Prompt templates for English instructions
    odia_prompt_templates = [
        "Answer in Odia: {instruction}",
        "Provide your response in Odia: {instruction}",
        "Reply in Odia language: {instruction}",
        "Give an Odia response to this question: {instruction}",
        "Write your answer in Odia: {instruction}",
        "Respond using Odia language: {instruction}",
        "{instruction} Answer in Odia.",
        "{instruction} Respond in Odia only.",
        "Express your answer in Odia: {instruction}",
        "Present the information in Odia: {instruction}",
        "Use Odia language for this response: {instruction}",
        "Output in Odia: {instruction}",
        "{instruction} (Respond in Odia)",
        "{instruction} Please answer using Odia.",
        "Your task: {instruction} Format: Odia language",
        "Communicate this in Odia: {instruction}",
        "Craft an Odia response to: {instruction}",
        "{instruction} → Odia response required",
        "Using Odia script, answer: {instruction}",
        "In ଓଡ଼ିଆ only: {instruction}",
        "{instruction} Return information in Odia.",
        "Answer this question with Odia text: {instruction}",
        "Formulate your response in ଓଡ଼ିଆ: {instruction}",
        "Odia output requested: {instruction}",
        "{instruction} Your response should be in the Odia language.",
        "Generate Odia text for: {instruction}",
        "{instruction} Convey this information in Odia."
    ]

    for _, row in df.iterrows():
        # Case 1: English instruction + English input -> Odia output
        english_inst = row['original_instruction']
        english_inp = row['original_input'] if pd.notna(row['original_input']) else ""
        odia_out = row['output']

        # Format the instruction with a random template
        formatted_inst = random.choice(odia_prompt_templates).format(instruction=english_inst)

        augmented_data.append({
            "instruction": formatted_inst,
            "input": english_inp,
            "output": odia_out
        })

        # Case 2: English instruction + Odia input -> Odia output
        if pd.notna(row['input']):  # If Odia input exists
            augmented_data.append({
                "instruction": english_inst,
                "input": row['input'],
                "output": odia_out
            })

        # Case 3: Odia instruction + Odia input -> Odia output
        if pd.notna(row['instruction']) and pd.notna(row['input']):
            augmented_data.append({
                "instruction": row['instruction'],
                "input": row['input'],
                "output": odia_out
            })
        elif pd.notna(row['instruction']):
            augmented_data.append({
                "instruction": row['instruction'],
                "input": "",
                "output": odia_out
            })

    return pd.DataFrame(augmented_data)

In [None]:
# Augment the dataset
augmented_df = augment_dataset(dataset)
print(f"Augmented dataset size: {len(augmented_df)}")

# Convert to HF dataset
augmented_dataset = Dataset.from_pandas(augmented_df)

In [None]:
# Convert to conversation format
def convert_to_conversations(examples):
    conversations = []

    for i in range(len(examples["instruction"])):
        messages = []

        # Add user message (instruction + input)
        user_msg = examples["instruction"][i]
        if examples["input"][i] and examples["input"][i].strip():
            user_msg += "\n\n" + examples["input"][i]

        messages.append({"role": "user", "content": user_msg})

        # Add assistant message (output)
        messages.append({"role": "assistant", "content": examples["output"][i]})

        conversations.append(messages)

    return {"conversations": conversations}

# Convert dataset to conversation format
conversation_dataset = augmented_dataset.map(convert_to_conversations, batched=True)

In [None]:
from unsloth.chat_templates import get_chat_template
# Set up tokenizer with chat template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",  # Use llama-3.1 template which works for 3.2 as well
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts}

# Format the prompts
formatted_dataset = conversation_dataset.map(formatting_prompts_func, batched=True)

# Show sample formatted text
print("\nSample formatted text:")
print(formatted_dataset[0]["text"])

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = formatted_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 32,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Verify masking

In [None]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

In [None]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])

In [None]:
trainer_stats = trainer.train()

## Inference

In [None]:
# Test inference
from unsloth.chat_templates import get_chat_template

# Enable inference mode
FastLanguageModel.for_inference(model)

# Set tokenizer for generation
tokenizer = get_chat_template(
    tokenizer,
    chat_template="llama-3.1",
)

# Test with a new example
test_examples = [
    "Translate the following to Odia: Hello, how are you today?",
    "ଆପଣଙ୍କ ନାମ କଣ?",
    "Tell me about Odisha in Odia language."
]

for test_example in test_examples:
    messages = [{"role": "user", "content": test_example}]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    from transformers import TextStreamer
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)

    print(f"\nUser: {test_example}")
    print("Assistant: ", end="")

    _ = model.generate(
        input_ids=inputs,
        streamer=text_streamer,
        max_new_tokens=128,
        use_cache=True,
        temperature=0.7,
        top_p=0.9
    )

## Save The Model

## Push to Hugging Face Hub

In [None]:
model_name = "Llama-3.2-3B-Instruct-Odia"
HF_USERNAME = "sumankumarbhadra"
HF_MODEL_NAME = f"{HF_USERNAME}/{model_name}"

from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')


# Merge to 16bit
model.save_pretrained_merged(model_name, tokenizer, save_method = "merged_16bit",)
model.push_to_hub_merged(HF_MODEL_NAME, tokenizer, save_method = "merged_16bit", token = hf_token)

# Just LoRA adapters
model.save_pretrained_merged(model_name, tokenizer, save_method = "lora",)
model.push_to_hub_merged(HF_MODEL_NAME, tokenizer, save_method = "lora", token = hf_token)

model.push_to_hub_gguf(
        HF_MODEL_NAME,
        tokenizer,
        quantization_method = ["q5_k_m", "q8_0"],
        token = hf_toke
    )

print(f"Model pushed to Hugging Face Hub as {HF_MODEL_NAME}")