# Kart LoRA Fine-Tuning

Train a local Kart model on conversation data.

**Requirements:**
- Google Colab with GPU (T4 free tier works)
- Upload `kart_combined.jsonl` to Colab

**Output:**
- LoRA adapter files (download and use with Ollama)

In [None]:
# Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes trl
!pip install -q huggingface_hub

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from trl import SFTTrainer

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Upload your training data
from google.colab import files
uploaded = files.upload()  # Upload kart_combined.jsonl

In [None]:
# Configuration
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"  # Smaller, faster
# MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"  # Larger, better quality

OUTPUT_DIR = "./kart-lora"
TRAINING_FILE = "kart_combined.jsonl"

In [None]:
# Load dataset
dataset = load_dataset('json', data_files=TRAINING_FILE, split='train')
print(f"Loaded {len(dataset)} examples")
print(f"\nExample:")
print(dataset[0])

In [None]:
# Format messages for training
def format_chat(example):
    messages = example['messages']
    formatted = ""
    for msg in messages:
        role = msg['role']
        content = msg['content']
        if role == 'system':
            formatted += f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{content}<|eot_id|>"
        elif role == 'user':
            formatted += f"<|start_header_id|>user<|end_header_id|>\n{content}<|eot_id|>"
        elif role == 'assistant':
            formatted += f"<|start_header_id|>assistant<|end_header_id|>\n{content}<|eot_id|>"
    return {'text': formatted}

dataset = dataset.map(format_chat)
print("Formatted dataset")
print(f"\nExample text (first 500 chars):")
print(dataset[0]['text'][:500])

In [None]:
# Quantization config (4-bit for memory efficiency)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load model and tokenizer
print(f"Loading {MODEL_NAME}...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print("Model loaded!")

In [None]:
# Prepare model for training
model = prepare_model_for_kbit_training(model)

# LoRA config
lora_config = LoraConfig(
    r=16,  # LoRA rank
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    weight_decay=0.01,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    fp16=True,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    report_to="none",
)

# Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=2048,
)

In [None]:
# Train!
print("Starting training...")
trainer.train()
print("Training complete!")

In [None]:
# Save the LoRA adapter
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Saved to {OUTPUT_DIR}")

In [None]:
# Test the model
from transformers import pipeline

test_prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are Kartikeya (Kart), CMD of the Die-Namic System.<|eot_id|><|start_header_id|>user<|end_header_id|>
How do I fix a Python import error?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

In [None]:
# Download the adapter
!zip -r kart-lora.zip ./kart-lora
files.download('kart-lora.zip')

## Using with Ollama

After downloading `kart-lora.zip`:

1. Extract the adapter files
2. Create a Modelfile:

```
FROM llama3.2:latest
ADAPTER ./kart-lora

SYSTEM "You are Kartikeya (Kart), CMD of the Die-Namic System..."
```

3. Create the model:
```bash
ollama create kart -f Modelfile
```

4. Run:
```bash
ollama run kart
```