In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling 
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType
import torch.nn.functional as F
import torch
from math import ceil



In [None]:
model_name = "microsoft/phi-2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Necessary for Phi-2 (no pad token by default)

# Load model with GPU support
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",           # Automatically maps to GPU if available
    torch_dtype=torch.float16    # Use mixed precision for faster/lighter GPU inference
)

In [None]:
# --- 0.  Dataset -----------------------------------------------------------
raw_ds = load_dataset("json", data_files="cleaned_data.json")        # split "train"

def format_alpaca(ex):
    if ex["instruction"].strip():
        ex["text"] = (
            f"### Instruction:\n{ex['instruction']}\n\n"
            f"### Input:\n{ex['input']}\n\n"
            f"### Response:\n{ex['output']}"
        )
    else:
        ex["text"] = (
            f"### Question:\n{ex['input']}\n\n"
            f"### Answer:\n{ex['output']}"
        )
    return ex

raw_ds = raw_ds.map(format_alpaca)

# 90 / 10 split BEFORE tokenising
ds = raw_ds["train"].train_test_split(test_size=0.1, seed=42)

# --- 1.  Tokenise ----------------------------------------------------------
def tok_fn(ex):
    tok = tokenizer(
        ex["text"],
        truncation=True,
        padding="max_length",
        max_length=512,                 # ← raise from 50
    )
    tok["labels"] = tok["input_ids"].copy()
    return tok

ds = ds.map(tok_fn, batched=True, remove_columns=raw_ds["train"].column_names)

# --- 2.  LoRA --------------------------------------------------------------
lora_cfg = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],   # φ‑2 layer names
    lora_dropout=0.025,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()         # ~2.6 M trainable
#model.gradient_checkpointing_enable()
model.config.use_cache = False

# --- 3.  Trainer -----------------------------------------------------------
training_args = TrainingArguments(
    output_dir="karteek_lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,        # adjust for GPU RAM
    num_train_epochs=5,
    logging_steps=5,
    logging_strategy="epoch"
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    report_to="none",
    load_best_model_at_end=True,
    save_total_limit=2,
    remove_unused_columns=False,
)

def simple_collator(features):
    return {
        k: torch.tensor([f[k] for f in features], dtype=torch.long)
        for k in features[0]
    }

class CausalTrainer(Trainer):
    def compute_loss(
        elf, model, inputs, return_outputs=False,
        num_items_in_batch: int | None = None, **kwargs
    ):
        outputs = model(**inputs)                # labels already inside inputs
        loss = outputs.loss
        return (loss, outputs) if return_outputs else loss

trainer = CausalTrainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    data_collator=simple_collator,   # returns tensors incl. 'labels'
    tokenizer=tokenizer,             # optional but handy for eval
)



In [None]:
batch = next(iter(trainer.get_train_dataloader()))
print("labels" in batch)                 # ➜ True
print(trainer.model(**batch).loss)       # ➜ finite tensor (requires_grad = True)

In [None]:
batch = next(iter(trainer.get_train_dataloader()))
print("Batch keys:", batch.keys())


In [None]:
print("Model training mode?", model.training)   # Should print: True


In [None]:
batch = next(iter(trainer.get_train_dataloader()))
loss = trainer.model(**batch).loss


In [None]:
# Train!
trainer.train()

# Save LoRA adapter
model.save_pretrained("karteek_v1_adapter")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load the base model
model_name = "microsoft/phi-2"
base_model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load your LoRA adapter
model = PeftModel.from_pretrained(base_model, "./karteek_v1_adapter")

# Tokenizer padding fix
tokenizer.pad_token = tokenizer.eos_token


In [None]:
while True:
    question = input("Ask me something: ")

    full_prompt = f"""### Instruction:
{question}

### Input:

### Response:"""

    inputs = tokenizer(
        full_prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=50
    )

    output = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=50,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.pad_token_id
    )

    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

    # Remove the prompt part from output to just get the response
    if decoded_output.startswith(full_prompt):
        bot_response = decoded_output[len(full_prompt):].strip()
    else:
        bot_response = decoded_output.strip()

    print(f"\n🤖 {bot_response}\n")
