# GeneticLLM Fine-tuning with QLoRA

Fine-tune a small LLM on genetic research Q&A using QLoRA (4-bit quantization + LoRA).

**Requirements:** Free Google Colab with T4 GPU

**Base Model:** Qwen2-1.5B-Instruct (small, capable, Apache 2.0 license)

**Dataset:** [sachinbkale27/genetics-qa](https://huggingface.co/datasets/sachinbkale27/genetics-qa) (89k samples)

## 1. Setup Environment

In [None]:
# Install dependencies
!pip install -q unsloth
!pip install -q --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q datasets transformers trl peft accelerate bitsandbytes

In [None]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

# Check GPU
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 2. Load Base Model with 4-bit Quantization

In [None]:
# Model configuration
MODEL_NAME = "unsloth/Qwen2-1.5B-Instruct-bnb-4bit"
MAX_SEQ_LENGTH = 2048

# Load model with Unsloth (2x faster, 50% less memory)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,  # Auto-detect
    load_in_4bit=True,
)

print(f"Model loaded: {MODEL_NAME}")

## 3. Configure LoRA Adapters

In [None]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank (higher = more capacity, more memory)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # Long context support
    random_state=42,
)

# Print trainable parameters
model.print_trainable_parameters()

## 4. Load Dataset from HuggingFace

In [None]:
# Load dataset directly from HuggingFace Hub
DATASET_NAME = "sachinbkale27/genetics-qa"

print(f"Loading dataset: {DATASET_NAME}")
dataset = load_dataset(DATASET_NAME)

print(f"\nDataset loaded!")
print(f"Training samples: {len(dataset['train'])}")
print(f"Validation samples: {len(dataset['validation'])}")

In [None]:
# Format for training
def format_prompt(sample):
    """Convert messages format to training text."""
    messages = sample["messages"]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": text}

# Format the training split
train_dataset = dataset["train"].map(format_prompt)

print(f"Formatted {len(train_dataset)} training samples")
print(f"\nSample prompt:\n{train_dataset[0]['text'][:500]}...")

## 5. Train the Model

In [None]:
# Training configuration
# Note: With 89k samples, we use 1 epoch and adjust batch size
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        num_train_epochs=1,  # 1 epoch with 89k samples is sufficient
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=50,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=42,
        output_dir="outputs",
        save_strategy="steps",
        save_steps=500,
    ),
)

In [None]:
# Start training
print("Starting fine-tuning...")
print(f"Total training samples: {len(train_dataset)}")
print(f"Batch size: 4 x 4 (gradient accumulation) = 16 effective")
print(f"Estimated steps: {len(train_dataset) // 16}")
print("-" * 50)

trainer_stats = trainer.train()

print(f"\nTraining complete!")
print(f"Total steps: {trainer_stats.global_step}")
print(f"Training loss: {trainer_stats.training_loss:.4f}")

## 6. Test the Fine-tuned Model

In [None]:
# Switch to inference mode
FastLanguageModel.for_inference(model)

def ask_genetics_question(question: str, context: str = "") -> str:
    """Query the fine-tuned model."""
    messages = [
        {"role": "system", "content": "You are a genetic research assistant with expertise in molecular biology, genomics, and genetic analysis."},
        {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}" if context else question}
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda")
    
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract assistant response
    return response.split("assistant")[-1].strip()

In [None]:
# Test questions
test_questions = [
    "What is the role of CRISPR-Cas9 in gene editing?",
    "How do single nucleotide polymorphisms (SNPs) affect disease risk?",
    "Explain the process of DNA methylation and its impact on gene expression.",
    "What is the difference between genotype and phenotype?",
    "How does mRNA translation work?",
]

for q in test_questions:
    print(f"Q: {q}")
    print(f"A: {ask_genetics_question(q)}")
    print("-" * 80)

## 7. Save the Model

In [None]:
# Save LoRA adapters only (small, ~50MB)
model.save_pretrained("genetic-llm-lora")
tokenizer.save_pretrained("genetic-llm-lora")

print("LoRA adapters saved to genetic-llm-lora/")

In [None]:
# Download the LoRA adapters
!zip -r genetic-llm-lora.zip genetic-llm-lora/

from google.colab import files
files.download("genetic-llm-lora.zip")

## 8. Push to Hugging Face Hub (Recommended)

In [None]:
# Push your fine-tuned model to HuggingFace for easy sharing
from huggingface_hub import login

# Login with your token
login(token="YOUR_HF_TOKEN")  # Replace with your token

# Push the model
model.push_to_hub("YOUR_USERNAME/genetic-llm", tokenizer=tokenizer)
print("Model pushed to Hugging Face Hub!")