In [1]:
!pip install -q -U bitsandbytes  datasets evaluate accelerate peft transformers trl

In [2]:
!pip install -q -U langchain

In [3]:
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    default_data_collator,
    pipeline
)
from peft import LoraConfig, get_peft_model, LoftQConfig
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
import gc, torch
from evaluate import load

# Load dataset (SQuAD example)
dataset = load_dataset("squad")

# Define LangChain prompt template
qa_template = """### Question: {question}
### Context: {context}
### Answer: """

prompt_template = PromptTemplate(
    input_variables=["question", "context"],
    template=qa_template
)

# Preprocessing function with LangChain template
def preprocess_function(examples):
    questions = examples['question']
    contexts = examples['context']
    answers = [a['text'][0] for a in examples['answers']]

    input_prompts = []
    full_texts = []
    for q, c, a in zip(questions, contexts, answers):
        input_prompt = prompt_template.format(question=q, context=c)
        full_text = input_prompt + a
        input_prompts.append(input_prompt)
        full_texts.append(full_text)

    # Tokenization
    tokenized_data = tokenizer(
        full_texts,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt",
    ).to("cuda")

    # Create labels by masking input prompts
    labels = tokenized_data["input_ids"].clone()
    for i, input_prompt in enumerate(input_prompts):
        prompt_length = len(tokenizer(input_prompt, return_tensors="pt").input_ids[0])
        labels[i, :prompt_length-1] = -100

    return {
        'input_ids': tokenized_data['input_ids'],
        'attention_mask': tokenized_data['attention_mask'],
        'labels': labels,
        'question': questions,
        'context': contexts,
        'answers': answers,
        'id': examples['id']
    }

# Model configuration
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True
)
loftq_config = LoftQConfig(loftq_bits=4)           # set 4bit quantization

lora_config = LoraConfig(
    r=32,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


# Modified LoRA config to target all layer types

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Process dataset with preserved columns for LangChain
tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=['title', 'answers']  # Preserve original question/context
)

# Training arguments
training_args = TrainingArguments(
    output_dir="../models/deepseek-r1-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="none",
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=default_data_collator,
)
gc.collect()
torch.cuda.empty_cache()
model.config.use_cache = False
# Start training
trainer.train()

# Save adapter
model.save_pretrained("deepseek-r1-1.5B-squad-lora")

# Create LangChain pipeline for evaluation
model.eval()
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=model.device.index,
    max_new_tokens=100,
    pad_token_id=tokenizer.eos_token_id,
    no_repeat_ngram_size=2,
    do_sample=False
)

llm = HuggingFacePipeline(pipeline=pipe)
qa_chain = LLMChain(llm=llm, prompt=prompt_template)

# QA Evaluation using LangChain
squad_metric = load("squad")
tokenized_val = tokenized_dataset["validation"]

predictions = []
references = []

for example in tokenized_val:
    response = qa_chain.run(
        question=example["question"],
        context=example["context"]
    )
    answer = response.split("### Answer: ")[-1].strip()

    predictions.append({
        "prediction_text": answer,
        "id": example["id"]
    })

    references.append({
        "answers": {"text": [example["answers"]], "answer_start": [0]},
        "id": example["id"]
    })

# Calculate metrics
metrics = squad_metric.compute(predictions=predictions, references=references)
print(f"\nFinal Evaluation Metrics:")
print(f"Exact Match: {metrics['exact_match']:.2f}%")
print(f"F1 Score: {metrics['f1']:.2f}%")

Some parameters are on the meta device because they were offloaded to the cpu.


trainable params: 4,358,144 || all params: 1,781,446,144 || trainable%: 0.2446


Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.