## Install Libraries (with Flash Attention)

In [1]:
# Install the main libraries (flash-attn has been removed)
!pip install -q -U bitsandbytes
!pip install -q -U transformers
!pip install -q -U peft
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U wandb

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m85.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# 1. Force uninstall flash-attn to be certain it's gone
#!pip uninstall -y flash-attn

# 2. Upgrade transformers and accelerate to the latest versions
# This often resolves underlying import conflicts.
#!pip install -q -U transformers accelerate

## Import Necessary Packages

In [2]:
import os
import torch
import gc
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from functools import partial
import wandb
from huggingface_hub import notebook_login, HfApi

## Logins for Hugging Face and W&B

In [3]:
# Log in to Hugging Face
notebook_login()

# Log in to Weights & Biases for experiment tracking
wandb.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mscythe410[0m ([33mscythe410-informatics-institute-of-technology[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Configuration and Parameters (Updated for A100)

In [4]:
# --- Model and Dataset Parameters ---
base_model_name = "RedQueenProtocol/llama-3.2-3b-it-sinhala-rq"
dataset_name = "RedQueenProtocol/all-articles-from-sinhala-wikipedia-2025-parquet"

# --- LoRA Adapter Repository on Hugging Face Hub ---
hf_repo_id = "RedQueenProtocol/sinhala-wiki-2025-LoRA" # Replace with your HF username/repo

# --- Output Directories ---
output_dir = "./incremental_lora_local_checkpoints"
final_merged_model_output_dir = "./final_merged_sinhala_model"

# --- Training Parameters (Tuned for A100) ---
# ✅ With an A100, you can increase batch size and sequence length for better quality
micro_batch_size = 8
gradient_accumulation_steps = 2
learning_rate = 2e-4
num_train_epochs_per_file = 1
logging_steps = 10
max_seq_length = 512 # The length of each chunk
chunk_stride = 256    # The overlap between chunks

# --- LoRA, Quantization, and Memory Cleanup (No changes needed here) ---
lora_config = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=False,
)

def clear_gpu_memory():
    print("Attempting to clear GPU memory...")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print("CUDA cache emptied.")
    gc.collect()
    print("Garbage collection performed.")

# --- ✨ New Chunking and Tokenization Function ---
def chunk_and_tokenize(examples, tokenizer, max_seq_length, stride):
    """Process a batch of examples by tokenizing and creating overlapping chunks."""
    all_chunks = {'input_ids': [], 'attention_mask': []}

    # Tokenize the entire batch of articles at once
    tokenized_articles = tokenizer(examples["article"], padding=False, truncation=False)

    for input_ids in tokenized_articles['input_ids']:
        # If the article is shorter than our chunk size, just pad it and add it
        if len(input_ids) <= max_seq_length:
            padded_ids = input_ids + [tokenizer.pad_token_id] * (max_seq_length - len(input_ids))
            attention_mask = [1] * len(input_ids) + [0] * (max_seq_length - len(input_ids))
            all_chunks['input_ids'].append(padded_ids)
            all_chunks['attention_mask'].append(attention_mask)
            continue

        # For long articles, create overlapping chunks
        for i in range(0, len(input_ids), stride):
            # Extract a chunk
            chunk = input_ids[i : i + max_seq_length]

            # If the last chunk is too short, we can ignore it or pad it. Here, we ignore.
            if len(chunk) < max_seq_length:
                continue

            all_chunks['input_ids'].append(chunk)
            all_chunks['attention_mask'].append([1] * max_seq_length)

    return all_chunks

## Load Model and LoRA Adapter (with Flash Attention)

In [5]:
# --- 1. Load Model and Tokenizer ---
print("--- Loading Base Model ---")

# The attn_implementation flag has been removed
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

base_model = prepare_model_for_kbit_training(base_model)

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# --- 2. Load or Initialize LoRA Adapter ---
api = HfApi()
adapter_exists_on_hub = api.file_exists(repo_id=hf_repo_id, filename="adapter_config.json")

if adapter_exists_on_hub:
    print(f"Loading existing LoRA adapter from Hub: {hf_repo_id}")
    model = PeftModel.from_pretrained(base_model, hf_repo_id)
else:
    print("Initializing a new LoRA adapter.")
    model = get_peft_model(base_model, lora_config)

print("--- Initial LoRA Adapter Loaded ---")
model.print_trainable_parameters()
model.train()

--- Loading Base Model ---


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/872 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/2.92G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/3.83k [00:00<?, ?B/s]

Initializing a new LoRA adapter.
--- Initial LoRA Adapter Loaded ---
trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072)
        (layers): ModuleList(
          (0-27): 28 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

## Incremental Training Loop (with Chunking)

In [6]:
# Initialize wandb ONCE before the training loop
import wandb
wandb.init(project="sin-lora-incremental", name="A100-Chunking-Run")

# --- Incremental Training Loop ---
parquet_file_names = [f"sinhala_articles_part{i}_cleaned.parquet" for i in range(1, 8)]

for i, file_name in enumerate(parquet_file_names):
    print(f"\n--- Processing file {i+1}/{len(parquet_file_names)}: {file_name} ---")

    current_dataset = load_dataset(dataset_name, data_files=file_name, split="train")

    # ✨ Use the new chunking function here
    # The map function will now create a much larger dataset of smaller chunks
    print("Chunking and tokenizing dataset...")
    chunked_dataset = current_dataset.map(
        partial(chunk_and_tokenize, tokenizer=tokenizer, max_seq_length=max_seq_length, stride=chunk_stride),
        batched=True,
        num_proc=os.cpu_count(),
        remove_columns=current_dataset.column_names
    )

    split_dataset = chunked_dataset.train_test_split(test_size=0.05, seed=42) # Use a smaller eval set
    tokenized_train_dataset = split_dataset['train']
    tokenized_eval_dataset = split_dataset['test']

    print(f"Original samples: {len(current_dataset)}. Chunked samples: {len(chunked_dataset)}")

    segment_output_dir = os.path.join(output_dir, f"segment_{i+1}")
    training_args = TrainingArguments(
        output_dir=segment_output_dir,
        per_device_train_batch_size=micro_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        gradient_checkpointing=True,
        optim="paged_adamw_8bit",
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs_per_file,
        logging_steps=logging_steps,
        save_strategy="epoch",
        eval_strategy="epoch",
        report_to="wandb",
        fp16=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_eval_dataset,
        data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
    )

    print("Starting training for this segment...")
    trainer.train()
    print("Segment training complete.")

    print(f"Pushing updated LoRA adapter to Hub: {hf_repo_id}")
    model.push_to_hub(
        hf_repo_id,
        commit_message=f"Training on {file_name} (Segment {i+1}/{len(parquet_file_names)})",
        private=False
    )
    tokenizer.push_to_hub(hf_repo_id)

    # --- Aggressive Memory Cleanup ---
    del trainer, current_dataset, chunked_dataset, split_dataset
    del tokenized_train_dataset, tokenized_eval_dataset
    clear_gpu_memory()

print("\n--- Incremental training loop finished! ---")
wandb.finish()


--- Processing file 1/7: sinhala_articles_part1_cleaned.parquet ---


README.md:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

sinhala_articles_part1_cleaned.parquet:   0%|          | 0.00/7.82M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3435 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (143868 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (148456 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3435. Chunked samples: 46769
Starting training for this segment...




Epoch,Training Loss,Validation Loss
1,0.6258,0.611063


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...p4mr3cwd7/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpx0rdhbes/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 2/7: sinhala_articles_part2_cleaned.parquet ---


sinhala_articles_part2_cleaned.parquet:   0%|          | 0.00/7.86M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3418 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (133574 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (133982 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3418. Chunked samples: 47674
Starting training for this segment...


Epoch,Training Loss,Validation Loss
1,0.5486,0.561616


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...p8gwjzkoo/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpqo1q3ten/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 3/7: sinhala_articles_part3_cleaned.parquet ---


sinhala_articles_part3_cleaned.parquet:   0%|          | 0.00/9.39M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3588 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1038005 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3588. Chunked samples: 56952
Starting training for this segment...


Epoch,Training Loss,Validation Loss
1,0.5878,0.555507


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...pa9f6y0ae/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpt08sreqz/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 4/7: sinhala_articles_part4_cleaned.parquet ---


sinhala_articles_part4_cleaned.parquet:   0%|          | 0.00/7.95M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3737 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (132279 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3737. Chunked samples: 47759
Starting training for this segment...


Epoch,Training Loss,Validation Loss
1,0.541,0.551042


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...ph9xvqieg/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpgzte0y38/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 5/7: sinhala_articles_part5_cleaned.parquet ---


sinhala_articles_part5_cleaned.parquet:   0%|          | 0.00/8.65M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3680 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (141854 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (133831 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (131970 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (157588 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used

Original samples: 3680. Chunked samples: 52534
Starting training for this segment...


Epoch,Training Loss,Validation Loss
1,0.524,0.52346


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...p1czo1mpb/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpztgu6f48/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 6/7: sinhala_articles_part6_cleaned.parquet ---


sinhala_articles_part6_cleaned.parquet:   0%|          | 0.00/8.19M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3573 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (566646 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3573. Chunked samples: 49294
Starting training for this segment...


Epoch,Training Loss,Validation Loss
1,0.5356,0.53067


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...p34i8v0k5/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpzpk02upy/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Processing file 7/7: sinhala_articles_part7_cleaned.parquet ---


sinhala_articles_part7_cleaned.parquet:   0%|          | 0.00/9.13M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Chunking and tokenizing dataset...


Map (num_proc=12):   0%|          | 0/3736 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (152796 > 131072). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (137785 > 131072). Running this sequence through the model will result in indexing errors
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Original samples: 3736. Chunked samples: 55193
Starting training for this segment...


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss
1,0.5491,0.528974


Segment training complete.
Pushing updated LoRA adapter to Hub: RedQueenProtocol/sinhala-wiki-2025-LoRA


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...p0plmak7u/adapter_model.safetensors:   0%|          | 45.8kB / 97.3MB            

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpskolvt7e/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Incremental training loop finished! ---


0,1
eval/loss,█▄▄▃▁▂▁
eval/runtime,▁▂█▂▅▃▇
eval/samples_per_second,▃█▁▂▅▇█
eval/steps_per_second,▅▅▁▃▆█▅
train/epoch,▁▂▂▃▆█▁▃▄▄▅▅▅▇▇▂▄▄▅▆▇▇▁▃▆▇▂▃▃▅▅▆█▁▂▃▁▂▃▇
train/global_step,▁▂▅▅▇▃▃▄▄▄▆▇▇▁▁▃▅▅▆█▂▅▆▆▃▇▇██▃▁▁▂▃▄▆▆▆▆█
train/grad_norm,▃▃▆▇▂▇▅▇▅▂▆▄▃▃▆▅▆█▆▄▃▃▆▄▆▄▅▆▁▇▂▆▆▄▃▇▂▆▄█
train/learning_rate,██▂▁█▅▄▃▂▁▅▅▃▃▂▂▁█▇▇▅▅███▇▆▆▅▅▄▁▄▃▁▇▆▅▄▂
train/loss,██▇▆▅▅▆▅▆▄▃▃▃▃▅▃▃▂▆▄▃▃▂▂▂▁▅▃▄▃▁▃▃▁▄▃▂▂▂▂

0,1
eval/loss,0.52897
eval/runtime,118.0783
eval/samples_per_second,23.374
eval/steps_per_second,2.922
total_flos,4.5794374783913165e+17
train/epoch,1.0
train/global_step,3278.0
train/grad_norm,0.42761
train/learning_rate,0.0
train/loss,0.5491


## Final Step - Merge LoRA and Save Full Model

In [7]:
# --- 4. Final Step: Merge LoRA and Save/Push the Full Model ---
print("\n--- Freeing up memory before final merge ---")
# Clean up the model used for training to free VRAM
del model
del base_model
clear_gpu_memory()

# Load the original base model again, this time in higher precision for a clean merge
print("\n--- Merging final LoRA weights into the base model ---")
base_model_for_merge = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
)

# Load the final, fully trained LoRA adapter from the Hugging Face Hub
print(f"Loading final LoRA adapter from {hf_repo_id} for merging...")
final_peft_model = PeftModel.from_pretrained(base_model_for_merge, hf_repo_id)

# Merge the adapter weights into the base model and unload the PEFT model
merged_model = final_peft_model.merge_and_unload()
print("LoRA weights merged successfully.")

# Save the complete, merged model locally
print(f"Saving final merged model locally to: {final_merged_model_output_dir}")
os.makedirs(final_merged_model_output_dir, exist_ok=True)
merged_model.save_pretrained(final_merged_model_output_dir)
tokenizer.save_pretrained(final_merged_model_output_dir)
print("Final model saved.")


--- Freeing up memory before final merge ---
Attempting to clear GPU memory...
CUDA cache emptied.
Garbage collection performed.

--- Merging final LoRA weights into the base model ---


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading final LoRA adapter from RedQueenProtocol/sinhala-wiki-2025-LoRA for merging...


adapter_config.json:   0%|          | 0.00/926 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

LoRA weights merged successfully.
Saving final merged model locally to: ./final_merged_sinhala_model
Final model saved.


## (Optional) Push Final Merged Model to Hugging Face Hub

In [8]:
# This pushes the complete, standalone model to a new repository.
# We create a new repo name to avoid overwriting the LoRA adapter repo.
hf_merged_repo_id = f"{hf_repo_id}-merged"
print(f"Pushing final merged model to new Hub repo: {hf_merged_repo_id}")

# Push the model and tokenizer to the new repository
merged_model.push_to_hub(hf_merged_repo_id, private=False, commit_message="Final merged model after incremental LoRA training")
tokenizer.push_to_hub(hf_merged_repo_id)

print(f"Final merged model successfully pushed to: https://huggingface.co/{hf_merged_repo_id}")

Pushing final merged model to new Hub repo: RedQueenProtocol/sinhala-wiki-2025-LoRA-merged


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...e1/model-00002-of-00002.safetensors:   0%|          |  608kB / 1.46GB            

  ...e1/model-00001-of-00002.safetensors:   1%|          | 33.5MB / 4.97GB            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  /tmp/tmpvp2xytiw/tokenizer.json       : 100%|##########| 17.2MB / 17.2MB            

Final merged model successfully pushed to: https://huggingface.co/RedQueenProtocol/sinhala-wiki-2025-LoRA-merged
