In [None]:
# %% [code] Install dependencies with precise versions
!pip install -q -U \
    transformers==4.40.0 \
    datasets==2.18.0 \
    accelerate==0.29.2 \
    peft==0.10.0 \
    bitsandbytes==0.43.0 \
    trl==0.8.0 \
    sentencepiece==0.1.99 \
    sacrebleu==2.3.1 \
    comet-ml==3.35.0 \
    tensorboardX==2.6.2.2

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m98.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m580.1/580.1 kB[0m [31m40.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m101.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 3.3.1 requires transformers<5.0.0,>=4.41.0, but you have transformers 4.40.0 which is incompatible.[0m[31m
[0m

In [None]:
# %% [code] Imports
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import load_dataset
import numpy as np
from huggingface_hub import notebook_login, HfApi
import gc
import os

comet_ml is installed but `COMET_API_KEY` is not set.


In [None]:
# Evaluation metrics
from sacrebleu import corpus_bleu, CHRF
# from comet import download_model, load_from_checkpoint

In [None]:
# %% [code] Dataset loading
def load_itihasa_dataset():
    dataset = load_dataset("rahular/itihasa")
    print(f"Train samples: {len(dataset['train'])}")
    print(f"Validation samples: {len(dataset['validation'])}")
    print(f"Test samples: {len(dataset['test'])}")
    return dataset

dataset = load_itihasa_dataset()



Train samples: 75162
Validation samples: 6149
Test samples: 11722


In [None]:
#login

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# %% [code] Model setup with QLoRA
model_name = "meta-llama/Meta-Llama-3-8B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"




config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# %% [code] Optimized LoRA config
peft_config = LoraConfig(
    r=32,  # Reduced from 64
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],  # Only critical layers
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# Prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
# %% [code] Training arguments optimized for speed
training_arguments = TrainingArguments(
    output_dir="./llama3-itihasa",
    per_device_train_batch_size=4,  # Increased batch size
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,  # Reduced accumulation
    optim="paged_adamw_8bit",
    learning_rate=1e-3,
    lr_scheduler_type="linear",
    num_train_epochs=2,
    weight_decay=0.01,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    max_grad_norm=0.5,
    warmup_ratio=0.1,
    logging_steps=50,
    save_strategy="steps",
    save_steps=1000,
    eval_steps=500,
    evaluation_strategy="steps",
    report_to="tensorboard",
    gradient_checkpointing=True,
    group_by_length=True,
    push_to_hub=True,
    hub_model_id="sarveshchaudhari/llama3-8b-itihasa-optimized",
    load_best_model_at_end=True
)

In [None]:
# %% [code] Corrected Dataset Handling
def format_translation(examples):
    # Ensure we return a list of strings, not dictionaries
    texts = []
    for trans in examples['translation']:
        texts.append(
            f"Translate Sanskrit to English:\n{trans['sn']}\nEnglish: {trans['en']}"
        )
    return {"text": texts}

# Apply formatting correctly
train_dataset = dataset["train"].map(
    format_translation,
    batched=True,
    remove_columns=["translation"]  # Remove original columns
)
val_dataset = dataset["validation"].map(
    format_translation,
    batched=True,
    remove_columns=["translation"]
)

Map:   0%|          | 0/75162 [00:00<?, ? examples/s]

Map:   0%|          | 0/6149 [00:00<?, ? examples/s]

In [None]:
# %% [code] Revised Trainer Configuration
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=True,
    # Critical formatting adjustment
    formatting_func=lambda examples: examples["text"]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
# %% [code] Train with memory cleanup
print(f"Total trainable params: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

print("Starting training...")
trainer.train()
print("Training complete!")

# Cleanup VRAM
del trainer
gc.collect()
torch.cuda.empty_cache()


Total trainable params: 13,631,488
Starting training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
500,1.9819,1.97412
1000,1.8903,1.919243
1500,1.8491,1.876195
2000,1.8106,1.836914
2500,1.7183,1.808461
3000,1.7073,1.77706
3500,1.6737,1.749729
4000,1.6584,1.729505


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Training complete!


In [None]:
# %% [code] Corrected Save & Push
from huggingface_hub import create_repo, HfApi, notebook_login

# 1. Authenticate properly
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# 2. Create repository first
repo_id = "sarch7040/llama3-8b-itihasa-optimized"
create_repo(
    repo_id=repo_id,
    repo_type="model",
    private=True,  # Set to False if public
    exist_ok=True,
    token=True  # Use your access token
)

# 3. Save model
model.save_pretrained("./llama3-itihasa")
tokenizer.save_pretrained("./llama3-itihasa")

# 4. Upload with progress
api = HfApi()
api.upload_folder(
    folder_path="./llama3-itihasa",
    repo_id=repo_id,
    commit_message="Optimized fine-tuned model",
    commit_description="Llama 3 8B fine-tuned on Itihasa dataset",
    token=True
)


Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B is restricted and you are not in the authorized list. Visit https://huggingface.co/meta-llama/Meta-Llama-3-8B to ask for access. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B.


adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/27.5M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

Upload 25 LFS files:   0%|          | 0/25 [00:00<?, ?it/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/27.5M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/27.5M [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/54.5M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/27.5M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

events.out.tfevents.1745476365.578579d8d773.33255.0:   0%|          | 0.00/6.38k [00:00<?, ?B/s]

events.out.tfevents.1745476921.578579d8d773.36032.0:   0%|          | 0.00/6.06k [00:00<?, ?B/s]

events.out.tfevents.1745478414.578579d8d773.38709.0:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/sarch7040/llama3-8b-itihasa-optimized/commit/5b61575b255287788640113458f18f032303f87c', commit_message='Optimized fine-tuned model', commit_description='Llama 3 8B fine-tuned on Itihasa dataset', oid='5b61575b255287788640113458f18f032303f87c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/sarch7040/llama3-8b-itihasa-optimized', endpoint='https://huggingface.co', repo_type='model', repo_id='sarch7040/llama3-8b-itihasa-optimized'), pr_revision=None, pr_num=None)

In [None]:
# %% [code] Evaluation with METEOR and TER
!pip install -q nltk pyter3
import nltk
from nltk.translate.meteor_score import meteor_score
import pyter

nltk.download('wordnet')

def generate_translation(model, tokenizer, input_text):
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("English:")[-1].strip()

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
# %% [code] Corrected Model Loading
import os
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM

# Create offload directory
os.makedirs("./offload", exist_ok=True)

# Load base model with proper offloading
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    offload_folder="./offload",
    offload_state_dict=True,
    low_cpu_mem_usage=True
)

# Load PEFT adapter with matching offload config
model = PeftModel.from_pretrained(
    base_model,
    "./llama3-itihasa",
    device_map="auto",
    offload_folder="./offload",
    torch_dtype=torch.bfloat16
)
model.eval()


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



ValueError: You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.

In [None]:
# Prepare test data
test_data = dataset["test"].map(format_translation)

# Run evaluation
predictions = []
references = []

for idx, example in enumerate(test_data.select(range(1000))):  # Use enumerate for proper error reporting
    try:
        input_text = example['text'].split("English:")[0]
        ref_text = example['text'].split("English:")[1].strip()

        pred = generate_translation(model, tokenizer, input_text)
        predictions.append(pred)
        references.append(ref_text)
    except Exception as e:
        print(f"Error on example {idx}: {str(e)}")

Map:   0%|          | 0/11722 [00:00<?, ? examples/s]

TypeError: string indices must be integers

In [None]:
# Calculate metrics
chrf = CHRF(word_order=2)
bleu = corpus_bleu(predictions, [references]).score
meteor_scores = []
ter_scores = []

for pred, ref in zip(predictions, references):
    # METEOR
    meteor_scores.append(meteor_score([ref.split()], pred.split()))

    # TER
    ter_scores.append(pyter.ter(pred.split(), ref.split()))

metrics = {
    "bleu": bleu,
    "chrf++": chrf.corpus_score(predictions, [references]).score * 100,
    "meteor": np.mean(meteor_scores) * 100,
    "ter": np.mean(ter_scores) * 100
}

print(f"BLEU: {metrics['bleu']:.2f}")
print(f"chrF++: {metrics['chrf++']:.2f}")
print(f"METEOR: {metrics['meteor']:.2f}")
print(f"TER: {metrics['ter']:.2f}")

NameError: name 'predictions' is not defined

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

def load_model_no_offloading(adapter_path, base_model_name="meta-llama/Meta-Llama-3-8B"):
    """Load model with proper 4-bit quantization configuration"""
    # Configure quantization
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    # Load tokenizer with same special tokens
    tokenizer = AutoTokenizer.from_pretrained(adapter_path)
    tokenizer.pad_token = tokenizer.eos_token

    # Load base model with 4-bit config only
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

    # Load PEFT adapter
    model = PeftModel.from_pretrained(
        base_model,
        adapter_path,
        device_map="auto"
    )

    # Resize embeddings if special tokens were added
    model.resize_token_embeddings(len(tokenizer))

    return model, tokenizer

# Usage
model, tokenizer = load_model_no_offloading("./llama3-itihasa")

# Test inference
input_text = "सर्वे मानवाः स्वतन्त्राः समुत्पन्नाः वर्तन्ते।"
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
