In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, default_data_collator
from peft import PeftModel, get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import torch

# Load dataset
dataset = load_dataset("json", data_files="testalpaca_ready.json")

# Format prompt template
def format_alpaca(ex):
    if ex["input"].strip():
        text = (
            f"### Instruction:\n{ex['instruction']}\n\n"
            f"### Input:\n{ex['input']}\n\n"
            f"### Response:\n{ex['output']}"
        )
    else:
        text = (
            f"### Instruction:\n{ex['instruction']}\n\n"
            f"### Response:\n{ex['output']}"
        )
    return {"text": text}


dataset = dataset.map(format_alpaca)


tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
#model = PeftModel.from_pretrained(model, "./karteek_v1_adapter")
model.config.pad_token_id = tokenizer.pad_token_id

model.config.use_cache = False  # Disable cache for training


# LoRA Config
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"], 
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

def tokenize_function(example):
    tokens = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Training setup
training_args = TrainingArguments(
    output_dir="karteek_lora",
    per_device_train_batch_size=1,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    learning_rate=1e-4,
    fp16=False
)

trainer = Trainer ( 
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer
    data_collator=default_data_collator,
    label_names=["labels"]
)

# Train!
trainer.train()

# Save LoRA adapter
model.save_pretrained("karteek_v2_adapter")


SyntaxError: invalid syntax. Perhaps you forgot a comma? (2202914353.py, line 77)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

# 1. Prepare device & quant config
device = "cuda" if torch.cuda.is_available() else "cpu"
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,            # 8‑bit weight quantization
    llm_int8_threshold=6.0,       # when to switch to FP16 for very large weights
    llm_int8_has_fp16_weight=False
)

# 2. Load base model quantized
base = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    quantization_config=bnb_config,
    device_map="auto",            # auto‑map layers to devices / offload
)

# 3. Load tokenizer & LoRA adapter
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(
    base,
    "./karteek_v2_adapter",
    device_map="auto"
)
model.config.pad_token_id = tokenizer.eos_token_id

# 4. Move to device & set eval
model.to(device)
model.config.use_cache = True
model.eval()

# 5. Inference
inputs = tokenizer(
    "Who is Karteek Varma?",
    return_tensors="pt",
    truncation=True,
    max_length=512
).to(device)

with torch.no_grad():
    out = model.generate(
        **inputs,
        max_new_tokens=100,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

# Strip prompt tokens
gen = out[0]
prompt_len = inputs["input_ids"].shape[-1]
print(tokenizer.decode(gen[prompt_len:], skip_special_tokens=True))


In [None]:
#REPL loop
try:
    while True:
        prompt = input("Ask me something: ")

        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        )
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=100,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                do_sample=True,
                top_k=50,
                top_p=0.95,
                temperature=0.7,
            )

        gen = output[0]
        prompt_len = inputs["input_ids"].shape[-1]
        new_tokens = gen[prompt_len:]
        response = tokenizer.decode(new_tokens, skip_special_tokens=True)

        print(f"\n🤖 {response}\n")

except KeyboardInterrupt:
    print("\nBye!")

