In [None]:
!pip install -q transformers peft accelerate bitsandbytes torch

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

In [None]:
# Base model (4-bit)
BASE_MODEL = "unsloth/gemma-3-12b-it-bnb-4bit"

# Choose ONE LoRA at a time
EXAM_LORA = "walterwhite91/ask-m-gemma3-exam-lora"
GUIDED_LORA = "walterwhite91/ask-m-gemma3-guide-lora"

USE_LORA = EXAM_LORA  # or GUIDED_LORA

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    BASE_MODEL,
    trust_remote_code=True,
)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
)


model.eval()
print("Model + LoRA loaded")

In [None]:
# Load Exam LoRA
model = PeftModel.from_pretrained(model, EXAM_LORA, adapter_name="exam")

# Load Guided LoRA
model.load_adapter(GUIDED_LORA, adapter_name="guided")

print("Adapters loaded: exam, guided")

In [None]:
def build_exam_prompt(subject: str, question: str, marks: int) -> str:
    return f"""You are an exam-answering assistant. Write an answer appropriate for a {marks}-mark question. Be clear, correct, and concise. Do not add follow-up questions.

Subject: {subject}
Marks: {marks}
Question: {question}

Answer:
"""


def build_guided_prompt(subject: str, question: str) -> str:
    return f"""You are a tutor. Explain the concept clearly and step-by-step for learning. Use simple language and structure the explanation well.

Subject: {subject}
Question: {question}

Explanation:
"""

In [None]:
import torch

def generate(
    prompt,
    max_new_tokens=300,
    temperature=0.7,
    top_p=0.9,
):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            eos_token_id=tokenizer.eos_token_id,
        )

    return tokenizer.decode(output[0], skip_special_tokens=True)


In [None]:
prompt = build_exam_prompt(
    subject="PHYS101",
    marks=5,
    question="State and derive Bernoulli's equation for fluid flow."
)

print(generate(prompt, max_new_tokens=250))


In [None]:
prompt = build_guided_prompt(
    subject="PHYS101",
    question="Explain Bernoulli's principle with intuition."
)

print(generate(prompt, max_new_tokens=500))
