## 2nd dataset

In [1]:
!pip install -q -U transformers accelerate
!pip install -q -U bitsandbytes
!pip install -q -U peft
!pip install -q -U datasets
!pip install -q -U wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m118.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m120.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m93.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import torch
import gc
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from functools import partial
import wandb
from huggingface_hub import notebook_login

In [3]:
# Log in to Hugging Face
notebook_login()

# Log in to Weights & Biases for experiment tracking
wandb.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mscythe410[0m ([33mscythe410-informatics-institute-of-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# --- Model and Dataset Parameters ---
base_model_name = "RedQueenProtocol/sinhala-wiki-2025-LoRA-merged"

# The adapter you just trained and want to continue training
adapter_to_continue_from = "RedQueenProtocol/sinhala-QA-LoRA"

# The final repo where the adapter, now trained on BOTH datasets, will be saved
final_adapter_repo = "RedQueenProtocol/sinhala-QA-LoRA"

# --- NEW Dataset ---
new_dataset_name = "ihalage/sinhala-finetune-qa-eli5"

# --- Training Parameters (Keep them the same) ---
micro_batch_size = 1
gradient_accumulation_steps = 8
max_seq_length = 512
learning_rate = 2e-5
num_train_epochs = 3

# --- LoRA and Quantization Configs (No changes needed) ---
lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=False,
)

In [5]:
# Load the new dataset from Hugging Face Hub
new_qa_dataset = load_dataset(new_dataset_name, split="train")

# Rename the columns to 'Question' and 'Answer' to reuse the formatting function
new_qa_dataset = new_qa_dataset.rename_column("sinhala_question", "Question")
new_qa_dataset = new_qa_dataset.rename_column("sinhala_answer", "Answer")

# Define the chat template formatting function (this can be the same as before)
def format_qa_prompt(example):
    """Formats a question-answer pair into the Llama 3 chat template."""
    return {
        "text": f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{example['Question']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{example['Answer']}<|eot_id|>"
    }

# Apply the formatting and create a train/test split
formatted_dataset = new_qa_dataset.map(format_qa_prompt, remove_columns=list(new_qa_dataset.features))
split_dataset = formatted_dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

print(f"Training on {len(train_dataset)} new samples, validating on {len(eval_dataset)} new samples.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

sinhala-finetune-qa-eli5.jsonl:   0%|          | 0.00/35.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Training on 9000 new samples, validating on 1000 new samples.


In [7]:
# --- ADD THIS LINE TO VERIFY ---
print("\n--- Example of a formatted prompt from the new dataset ---")
print(train_dataset[13]['text'])


--- Example of a formatted prompt from the new dataset ---
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

ඇමරිකානු සහ සෝවියට් හමුදා මූලධර්ම කෙතරම් වෙනස්ද?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

සෝවියට් ටැංකි මූලධර්මය Blitzkrieg ට සම්පූර්ණයෙන්ම වෙනස් විය; සෝවියට් සංගමය විශ්වාස කළේ අවදානමට ලක්විය හැකි සන්නාහය ඉතා පටු ලෙස මෙහෙයවන අසාර්ථක පද්ධතියකි. මේ සම්බන්ධයෙන් ඔවුන්ගේ අත්දැකීම් සහ අදහස් නිසැකව ම සැකසෙන්නේ WWII හි ඔවුන්ගේ අත්දැකීම් සහ ගැඹුරින් ආරක්ෂා කිරීම වඩාත් බලවත් සන්නාහ කේන්ද්‍රීය ප්‍රහාරය පවා කඩාකප්පල් කළ හැකි බව පිළිගැනීමෙනි. &quot;Blitzkrieg&quot; යුද්ධයේදී, බොහෝ විට ඉදිරි ගමනක් සිදු වන්නේ සන්නාහයෙනි, එනම්, සන්නාහය ප්‍රහාරයට නායකත්වය දෙයි; සහ බොහෝ විට පටු සංකේන්ද්රිත ප්රදේශයක. කම්පනය සහ වෙඩි බලයෙන් අදහස් කරන්නේ සතුරු රේඛා හරහා ඉදිරියට යාමට සහ ඔහුව ස්ථීර ලෙස සමතුලිත කිරීම සඳහා සාන්ද්‍රගත ආධාරක ආයුධ සමඟින් ටැංකිවලට තල්ලුව ලබා දීමයි. සංස්කරණය: මම ටිකක් ළඟා විය හැක, නමුත් අපි සීතල යුද්ධ යුගය හරහා ගමන් කරන විට සන්නාහ-පළමු ප්‍රහාරයක් භයානක විය හැකි දෝෂ ඇත

In [8]:
# Note: We don't need to import PeftModel directly anymore for this cell
# from peft import PeftModel

# Load the base model (your Wikipedia-tuned model)
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# --- NEW AND IMPROVED LOADING STRATEGY ---

# 1. Attach a brand new LoRA adapter first.
# This correctly sets up the model with trainable parameters.
print("Attaching a new LoRA adapter structure...")
model = get_peft_model(model, lora_config)
print("New adapter attached. Parameters are now set to be trainable:")
model.print_trainable_parameters()


# 2. Now, load the weights from your previously trained QA adapter into this structure.
print(f"\nLoading saved weights from adapter: {adapter_to_continue_from}")
model.load_adapter(adapter_to_continue_from, adapter_name="default")
print("Adapter weights loaded successfully.")

# Final check (should show the same trainable parameter count)
print("\nFinal parameter check after loading weights:")
model.print_trainable_parameters()

config.json:   0%|          | 0.00/873 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/325 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/3.83k [00:00<?, ?B/s]

Attaching a new LoRA adapter structure...
New adapter attached. Parameters are now set to be trainable:
trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511

Loading saved weights from adapter: RedQueenProtocol/sinhala-QA-LoRA


adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Adapter weights loaded successfully.

Final parameter check after loading weights:
trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511


In [9]:
# Initialize W&B run
wandb.init(project="sinhala-qa-finetune", name="run-3-qa-lora-ihalage")

# Define tokenizer function for the Trainer
def tokenize_text(examples, tokenizer, max_length):
    tokenized_inputs = tokenizer(examples["text"], truncation=True, max_length=max_length, padding="max_length")
    # Add labels
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
    return tokenized_inputs

# Tokenize the datasets
tokenized_train_dataset = train_dataset.map(partial(tokenize_text, tokenizer=tokenizer, max_length=max_seq_length), batched=True)
tokenized_eval_dataset = eval_dataset.map(partial(tokenize_text, tokenizer=tokenizer, max_length=max_seq_length), batched=True)

# Set up TrainingArguments
training_args = TrainingArguments(
    output_dir="./qa_lora_checkpoints",
    per_device_train_batch_size=micro_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim="paged_adamw_8bit",
    learning_rate=learning_rate,
    num_train_epochs=num_train_epochs,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    fp16=True,
    load_best_model_at_end=True,
    report_to="wandb",
)

# Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Train the model
trainer.train()

# End the W&B run
wandb.finish()

Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,0.5324,0.534252
2,0.5056,0.524914
3,0.4883,0.523303


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


0,1
eval/loss,█▂▁
eval/runtime,▁▁█
eval/samples_per_second,██▁
eval/steps_per_second,██▁
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇█
train/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
train/grad_norm,▂▂▃▁▂▂▃▂▃▂▄▃▃▄▃▄▅▄▄▅▆▆▅▅▆▆▅▆▆▆▆▆▇▆▇▆██▇▇
train/learning_rate,█████▇▇▇▇▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁
train/loss,█▆▅▅▅▅▄▄▄▄▄▃▃▃▄▃▃▃▃▃▂▂▃▃▂▃▁▂▂▂▂▂▂▁▂▂▁▂▂▂

0,1
eval/loss,0.5233
eval/runtime,44.0538
eval/samples_per_second,22.7
eval/steps_per_second,2.837
total_flos,2.35814872154112e+17
train/epoch,3.0
train/global_step,3375.0
train/grad_norm,0.7933
train/learning_rate,0.0
train/loss,0.4883


In [10]:
# Push the newly trained QA adapter to the Hub
print(f"Pushing the new QA LoRA adapter to: {final_adapter_repo}")
trainer.model.push_to_hub(final_adapter_repo, commit_message="Fine-tuned on Sinhala QA dataset")

print("--- Done! ---")

Pushing the new QA LoRA adapter to: RedQueenProtocol/sinhala-QA-LoRA


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...pd1qvaz58/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

--- Done! ---


## 3rd dataset

In [9]:
import os
import torch
import gc
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from functools import partial
from huggingface_hub import HfApi
import wandb

# --- Configuration & Setup ---
base_model_name = "RedQueenProtocol/llama-3.2-3b-it-sinhala-rq"
qa_dataset_name = "janani-rane/SiQuAD"
hf_qa_lora_repo_id = "RedQueenProtocol/sinhala-QA-LoRA"
output_dir = "./qa_lora_local_checkpoints"

# Training Parameters
micro_batch_size = 4
gradient_accumulation_steps = 2
learning_rate = 2e-4
num_train_epochs = 3
logging_steps = 10
eval_steps = 500
warmup_ratio = 0.03
lr_scheduler_type = "cosine"
max_seq_length = 512

# LoRA Configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# Quantization Configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

# Initialize Hugging Face API
api = HfApi()

def clear_gpu_memory():
    """Clears GPU memory and performs garbage collection."""
    print("Attempting to clear GPU memory...")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print("CUDA cache emptied.")
    gc.collect()
    print("Garbage collection performed.")

def tokenize_qa_function(examples, tokenizer, max_seq_length):
    """
    Tokenizes the QA dataset into a structured prompt format.
    """
    full_text = []
    for context, question, answer in zip(examples['context'], examples['question'], examples['answer_text']):
        if isinstance(answer, list) and len(answer) > 0:
            answer = answer[0]
        elif not isinstance(answer, str):
            answer = str(answer)

        full_text.append(f"Context: {context}\nQuestion: {question}\nAnswer: {answer}")

    tokenized_output = tokenizer(
        full_text,
        truncation=True,
        max_length=max_seq_length,
        padding="max_length"
    )

    tokenized_output['labels'] = tokenized_output['input_ids'].copy()

    return tokenized_output

def run_qa_fine_tuning():

    # Initialize W&B run
    wandb.init(project="sinhala-qa-finetune", name="run-4-qa-lora-janani")

    clear_gpu_memory()

    print(f"\n--- Loading and processing the QA dataset: {qa_dataset_name} ---")

    # Load each split individually using their file names, and then create a DatasetDict
    train_dataset = load_dataset("csv", data_files=f"hf://datasets/{qa_dataset_name}/train.csv", split="train")
    validation_dataset = load_dataset("csv", data_files=f"hf://datasets/{qa_dataset_name}/dev.csv", split="train")

    full_dataset = DatasetDict({
        "train": train_dataset,
        "validation": validation_dataset
    })

    print(f"Loaded train split with {len(full_dataset['train'])} samples.")
    print(f"Loaded validation split with {len(full_dataset['validation'])} samples.")

    # Load base model and tokenizer
    print(f"\nLoading base model: {base_model_name}...")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
    )
    base_model.config.use_cache = False
    base_model.config.pretraining_tp = 1

    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Prepare base model for k-bit training
    base_model = prepare_model_for_kbit_training(base_model)

    # Load the existing LoRA adapter
    print(f"Loading previous QA LoRA adapter from Hub: {hf_qa_lora_repo_id}")
    peft_model = PeftModel.from_pretrained(base_model, hf_qa_lora_repo_id)

    # Re-initialize the PEFT model to ensure trainable parameters are properly set
    # This is the same fix from your previous incremental training.
    print("Re-initializing LoRA adapter to ensure trainable parameters are detected.")
    model = get_peft_model(peft_model, lora_config)

    # Verify trainable parameters
    model.train()
    print("LoRA adapter attached to the model:")
    model.print_trainable_parameters()

    # Tokenize the QA datasets
    print("\nTokenizing datasets...")
    tokenize_func_partial = partial(tokenize_qa_function, tokenizer=tokenizer, max_seq_length=max_seq_length)

    tokenized_train_dataset = full_dataset['train'].map(tokenize_func_partial, batched=True, num_proc=os.cpu_count(), remove_columns=full_dataset['train'].column_names)
    tokenized_validation_dataset = full_dataset['validation'].map(tokenize_func_partial, batched=True, num_proc=os.cpu_count(), remove_columns=full_dataset['validation'].column_names)

    tokenized_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    tokenized_validation_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        gradient_checkpointing=True,
        optim="paged_adamw_8bit",
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        logging_steps=logging_steps,
        save_strategy="epoch",
        eval_strategy="epoch",
        dataloader_num_workers=os.cpu_count(),
        report_to="wandb",
        fp16=False,
        bf16=True,
        push_to_hub=False,
        lr_scheduler_type=lr_scheduler_type,
        warmup_ratio=warmup_ratio,
    )

    # Create and run Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_validation_dataset,
        data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
    )

    print("\n--- Starting fine-tuning on the SiQuAD dataset ---")
    trainer.train()

    # Save the final LoRA adapter locally
    final_lora_save_path = os.path.join(output_dir, "final_qa_adapter")
    trainer.model.save_pretrained(final_lora_save_path)
    print(f"\nFinal QA LoRA adapter saved locally to: {final_lora_save_path}")

    # Push to Hugging Face Hub to update the QA adapter
    print(f"Pushing updated QA LoRA adapter to Hugging Face Hub: {hf_qa_lora_repo_id}")
    trainer.model.push_to_hub(
        hf_qa_lora_repo_id,
        private=False,
        commit_message=f"Fine-tuned on {qa_dataset_name} for {num_train_epochs} epochs"
    )
    tokenizer.push_to_hub(hf_qa_lora_repo_id)
    print("QA LoRA adapter successfully pushed to Hub.")

    # End the W&B run
    wandb.finish()

# Execute the fine-tuning process
run_qa_fine_tuning()

Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Loading and processing the QA dataset: janani-rane/SiQuAD ---
Loaded train split with 13500 samples.
Loaded validation split with 1250 samples.

Loading base model: RedQueenProtocol/llama-3.2-3b-it-sinhala-rq...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading previous QA LoRA adapter from Hub: RedQueenProtocol/sinhala-QA-LoRA
Re-initializing LoRA adapter to ensure trainable parameters are detected.




LoRA adapter attached to the model:
trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511

Tokenizing datasets...


Map (num_proc=12):   0%|          | 0/13500 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/1250 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



--- Starting fine-tuning on the SiQuAD dataset ---


Epoch,Training Loss,Validation Loss
1,0.4314,0.450069
2,0.2841,0.296332
3,0.1474,0.242371



Final QA LoRA adapter saved locally to: ./qa_lora_local_checkpoints/final_qa_adapter
Pushing updated QA LoRA adapter to Hugging Face Hub: RedQueenProtocol/sinhala-QA-LoRA


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...prp_730ko/adapter_model.safetensors:   1%|          |  554kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmp5nq3ezkl/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

QA LoRA adapter successfully pushed to Hub.


0,1
eval/loss,█▃▁
eval/runtime,▁██
eval/samples_per_second,█▁▁
eval/steps_per_second,█▁▁
train/epoch,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇▇████
train/grad_norm,█▁▁▁▂▁▁▁▁▁▁▁▂▁▁▂▁▂▁▂▂▁▂▂▂▂▂▂▂▂▂▃▂▂▁▂▁▂▂▂
train/learning_rate,▄▇███████▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁
train/loss,██▇▇▆▆▅▅▅▅▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▁▂▂▁▁▂▁▂▁▁▁▁▂▁▁

0,1
eval/loss,0.24237
eval/runtime,52.3135
eval/samples_per_second,23.894
eval/steps_per_second,3.001
total_flos,3.53722308231168e+17
train/epoch,3.0
train/global_step,5064.0
train/grad_norm,0.57472
train/learning_rate,0.0
train/loss,0.1474


In [21]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Configuration
base_model_name = "RedQueenProtocol/llama-3.2-3b-it-sinhala-rq"
lora_adapter_repo_id = "RedQueenProtocol/sinhala-QA-LoRA"
merged_model_repo_id = "RedQueenProtocol/llama-3.2-3b-it-sinhala-qa-merged" # New repo for the merged model

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 1. Load the base model
print(f"Loading base model from: {base_model_name}...")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16,
    device_map=device
)

# 2. Load the LoRA adapter
print(f"Loading LoRA adapter from: {lora_adapter_repo_id}...")
model = PeftModel.from_pretrained(model, lora_adapter_repo_id)

# 3. Merge and unload the LoRA weights
print("Merging LoRA weights into the base model...")
merged_model = model.merge_and_unload()
print("Merging complete.")

# 4. Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# 5. Push the merged model and tokenizer to the Hugging Face Hub
print(f"\nPushing merged model and tokenizer to Hub: {merged_model_repo_id}")
merged_model.push_to_hub(merged_model_repo_id, private=False)
tokenizer.push_to_hub(merged_model_repo_id, private=False)
print("Model successfully pushed to Hub!")

Using device: cuda
Loading base model from: RedQueenProtocol/llama-3.2-3b-it-sinhala-rq...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading LoRA adapter from: RedQueenProtocol/sinhala-QA-LoRA...
Merging LoRA weights into the base model...
Merging complete.
Loading tokenizer...

Pushing merged model and tokenizer to Hub: RedQueenProtocol/llama-3.2-3b-it-sinhala-qa-merged


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...u_/model-00002-of-00002.safetensors:   3%|2         | 41.8MB / 1.46GB            

  ...u_/model-00001-of-00002.safetensors:   1%|          | 41.9MB / 4.97GB            

No files have been modified since last commit. Skipping to prevent empty commit.


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmp2jxde7dk/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Model successfully pushed to Hub!
