# GeneticLLM Fine-tuning with QLoRA

Fine-tune Qwen2-1.5B on genetic research Q&A using QLoRA.

**GPU:** T4 (Free Colab) or V100/A100

**Dataset:** [sachinbkale27/genetics-qa](https://huggingface.co/datasets/sachinbkale27/genetics-qa) (89k samples)

**Tracking:** Weights & Biases (optional)

In [None]:
# Install dependencies
!pip install -q unsloth wandb
!pip install -q --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q datasets transformers trl peft accelerate bitsandbytes

In [None]:
# Setup Weights & Biases from Colab Secrets
import wandb
from google.colab import userdata

try:
    wandb_api_key = userdata.get('WANDB_API_KEY')
    wandb.login(key=wandb_api_key)
    USE_WANDB = True
    print("âœ“ W&B login successful!")
except Exception as e:
    print(f"W&B login failed: {e}")
    print("Training will continue without W&B logging.")
    print("To enable: Add WANDB_API_KEY to Colab Secrets (ðŸ”‘ icon in sidebar)")
    USE_WANDB = False

In [None]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
import time

# Check GPU
gpu_name = torch.cuda.get_device_name(0)
gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f"GPU: {gpu_name}")
print(f"Memory: {gpu_mem:.1f} GB")

# Auto-detect GPU and set batch sizes
if "A100" in gpu_name:
    print("âœ“ A100 detected")
    BATCH_SIZE = 16
    GRAD_ACCUM = 4
elif "V100" in gpu_name:
    print("âœ“ V100 detected")
    BATCH_SIZE = 8
    GRAD_ACCUM = 4
else:
    print("âœ“ T4/Other detected")
    BATCH_SIZE = 4
    GRAD_ACCUM = 4

In [None]:
# Model configuration
MODEL_NAME = "unsloth/Qwen2-1.5B-Instruct-bnb-4bit"
MAX_SEQ_LENGTH = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
)

print(f"Model loaded: {MODEL_NAME}")

In [None]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

model.print_trainable_parameters()

In [None]:
# Load dataset
DATASET_NAME = "sachinbkale27/genetics-qa"

print(f"Loading dataset: {DATASET_NAME}")
dataset = load_dataset(DATASET_NAME)

print(f"\nTraining samples: {len(dataset['train'])}")
print(f"Validation samples: {len(dataset['validation'])}")

In [None]:
# Format for training
def format_prompt(sample):
    messages = sample["messages"]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": text}

train_dataset = dataset["train"].map(format_prompt, num_proc=2)
print(f"Formatted {len(train_dataset)} samples")

In [None]:
# Training configuration with W&B
effective_batch = BATCH_SIZE * GRAD_ACCUM
total_steps = len(train_dataset) // effective_batch

print(f"Effective batch size: {effective_batch}")
print(f"Total steps: {total_steps}")
print(f"W&B logging: {'Enabled' if USE_WANDB else 'Disabled'}")

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        warmup_steps=100,
        num_train_epochs=1,
        max_steps=5000,  # Safety limit for T4
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=50,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=42,
        output_dir="outputs",
        save_strategy="steps",
        save_steps=1000,
        report_to="wandb" if USE_WANDB else "none",
        run_name="genetic-llm" if USE_WANDB else None,
    ),
)

In [None]:
# Start training
print("Starting fine-tuning...")
start_time = time.time()

trainer_stats = trainer.train()

elapsed = (time.time() - start_time) / 60
print(f"\nTraining complete!")
print(f"Total time: {elapsed:.1f} minutes")
print(f"Total steps: {trainer_stats.global_step}")
print(f"Final loss: {trainer_stats.training_loss:.4f}")

if USE_WANDB:
    wandb.finish()

In [None]:
# Test the model
FastLanguageModel.for_inference(model)

def ask(question):
    messages = [
        {"role": "system", "content": "You are a genetic research assistant."},
        {"role": "user", "content": question}
    ]
    inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids=inputs, max_new_tokens=256, temperature=0.7, do_sample=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("assistant")[-1].strip()

print("Q: What is CRISPR-Cas9?")
print(f"A: {ask('What is CRISPR-Cas9?')}")

In [None]:
# Save and download model
model.save_pretrained("genetic-llm-lora")
tokenizer.save_pretrained("genetic-llm-lora")
print("Model saved!")

!zip -r genetic-llm-lora.zip genetic-llm-lora/
from google.colab import files
files.download("genetic-llm-lora.zip")

In [None]:
# Optional: Push to HuggingFace
# from huggingface_hub import login
# login(token="YOUR_HF_TOKEN")
# model.push_to_hub("sachinbkale27/genetic-llm", tokenizer=tokenizer)