In [1]:
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json


data_path = "dataset.json"
with open(data_path, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)


qa_pairs = [{
    "prompt": f"প্রশ্ন: {item['input']}\nউত্তরের জন্য প্রাসঙ্গিক পাঠ্য:\n{item['instruction']}",
    "answer": item["output"]
} for item in raw_data]

dataset = Dataset.from_list(qa_pairs)


model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
model = prepare_model_for_kbit_training(model)


lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


def tokenize(example):
    full_input = f"{example['prompt']}\nউত্তর: {example['answer']}"
    return tokenizer(full_input, truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize)


training_args = TrainingArguments(
    output_dir="./llama3-qa-checkpoints",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-5,
    fp16=True,
    report_to="none"
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)


trainer.train()

  from .autonotebook import tqdm as notebook_tqdm
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Downloading shards: 100%|██████████| 4/4 [59:51<00:00, 897.86s/it]   
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.36s/it]
Map:   0%|          | 0/4385 [00:00<?, ? examples/s]


ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.

In [4]:
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json

# Load dataset
data_path = "dataset.json"
with open(data_path, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

# Prepare prompt-answer pairs
qa_pairs = [{
    "prompt": f"প্রশ্ন: {item['input']}\nউত্তরের জন্য প্রাসঙ্গিক পাঠ্য:\n{item['instruction']}",
    "answer": item["output"]
} for item in raw_data]

dataset = Dataset.from_list(qa_pairs)

# Load tokenizer and add pad token if missing BEFORE loading the model
model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = '[PAD]'

# Save tokenizer with pad token so model loads it correctly
tokenizer.save_pretrained("./llama3-tokenizer")

# Load model in 4-bit with CPU offloading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True  # optional but good for Meta models
)


# Prepare model for k-bit training and apply LoRA
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Tokenization
def tokenize(example):
    full_input = f"{example['prompt']}\nউত্তর: {example['answer']}"
    return tokenizer(full_input, truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize)

# Training arguments
training_args = TrainingArguments(
    output_dir="./llama3-qa-checkpoints",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-5,
    fp16=True,
    report_to="none"
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# Start training
trainer.train()


Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.51s/it]
Map: 100%|██████████| 4385/4385 [00:06<00:00, 703.01 examples/s]
  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

In [6]:
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json

# Load dataset
data_path = "dataset.json"
with open(data_path, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

# Prepare prompt-answer pairs
qa_pairs = [{
    "prompt": f"প্রশ্ন: {item['input']}\nউত্তরের জন্য প্রাসঙ্গিক পাঠ্য:\n{item['instruction']}",
    "answer": item["output"]
} for item in raw_data]

dataset = Dataset.from_list(qa_pairs)

# Load tokenizer and add pad token if missing
model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = '[PAD]'

# Save tokenizer with pad token
tokenizer.save_pretrained("./llama3-tokenizer")

# Load model in 4-bit with CPU offloading
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": torch.cuda.current_device()},
    trust_remote_code=True
)

# Resize model embeddings in case pad_token was added
model.resize_token_embeddings(len(tokenizer))

# Prepare model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Tokenization with labels for training
def tokenize(example):
    full_input = f"{example['prompt']}\nউত্তর: {example['answer']}"
    tokenized = tokenizer(full_input, truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()  # Essential for loss computation
    return tokenized

tokenized_dataset = dataset.map(tokenize, remove_columns=dataset.column_names)

# Training arguments
training_args = TrainingArguments(
    output_dir="./llama3-qa-checkpoints",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-5,
    fp16=True,
    report_to="none"
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# Start training
trainer.train()


Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.56s/it]
Map: 100%|██████████| 4385/4385 [00:06<00:00, 689.69 examples/s]
  trainer = Trainer(
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Step,Training Loss
10,0.7889
20,0.7492
30,0.7584
40,0.7434
50,0.7362
60,0.6986
70,0.6473
80,0.6771
90,0.5829
100,0.5601


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=1644, training_loss=0.43994754763125216, metrics={'train_runtime': 8513.6035, 'train_samples_per_second': 1.545, 'train_steps_per_second': 0.193, 'total_flos': 3.0303598625685504e+17, 'train_loss': 0.43994754763125216, 'epoch': 2.9958960328317374})

In [1]:
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    BitsAndBytesConfig, EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from tqdm.auto import tqdm

torch.cuda.empty_cache()

# Load dataset
data_path = "dataset.json"
with open(data_path, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

# Prepare prompt-answer pairs
qa_pairs = [{
    "prompt": f"প্রশ্ন: {item['input']}\nউত্তরের জন্য প্রাসঙ্গিক পাঠ্য:\n{item['instruction']}",
    "answer": item["output"]
} for item in raw_data]

dataset = Dataset.from_list(qa_pairs)

# Train-test split
dataset = dataset.train_test_split(test_size=0.1)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

# Load tokenizer
model_id = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    tokenizer.pad_token = '[PAD]'
tokenizer.save_pretrained("./llama3-tokenizer")

# Load model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": torch.cuda.current_device()},
    trust_remote_code=True
)
model.resize_token_embeddings(len(tokenizer))
model = prepare_model_for_kbit_training(model)

# Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Tokenization
def tokenize(example):
    full_input = f"{example['prompt']}\nউত্তর: {example['answer']}"
    tokenized = tokenizer(
        full_input,
        truncation=True,
        padding="max_length",
        max_length=256  # reduced from 512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Add progress bar using tqdm
print("Tokenizing training dataset...")
tokenized_train = train_dataset.map(tokenize, remove_columns=train_dataset.column_names, desc="Tokenizing train")

print("Tokenizing evaluation dataset...")
tokenized_eval = eval_dataset.map(tokenize, remove_columns=eval_dataset.column_names, desc="Tokenizing eval")

# Metric function
def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = np.argmax(preds, axis=-1)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)

    pred_flat = preds.flatten()
    label_flat = labels.flatten()

    precision, recall, f1, _ = precision_recall_fscore_support(label_flat, pred_flat, average='macro')
    acc = accuracy_score(label_flat, pred_flat)

    # Clear cache to prevent OOM
    torch.cuda.empty_cache()

    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

# Training configuration
training_args = TrainingArguments(
    output_dir="./llama3-qa-checkpoints",
    per_device_train_batch_size=1,  # Reduce batch size
    gradient_accumulation_steps=4,
    num_train_epochs=5,
    learning_rate=2e-5,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    eval_accumulation_steps=1,  # Prevents memory overflow during eval
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    report_to="none"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Start training
trainer.train()

torch.cuda.empty_cache()


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00,  2.58s/it]
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Tokenizing training dataset...
Tokenizing evaluation dataset...


  trainer = Trainer(


ValueError: No columns in the dataset match the model's forward method signature. The following columns have been ignored: []. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`.