In [None]:
!pip install -q --upgrade transformers accelerate bitsandbytes peft sentence-transformers faiss-cpu gradio

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

model_name = "mistralai/Mistral-7B-Instruct-v0.2"


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)


base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)


tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


adapter_path = "/content"

model = PeftModel.from_pretrained(base_model, adapter_path)



In [None]:
merged_model = model.merge_and_unload()

In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import gradio as gr

In [None]:
index = faiss.read_index("/content/faiss.index")

with open("/content/chunks.pkl", "rb") as f:
    chunks = pickle.load(f)

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


In [None]:
def retrieve(query: str, k: int = 5):
    q = embedder.encode([query], normalize_embeddings=True)
    D, I = index.search(q, k)
    out = []
    for score, idx in zip(D[0].tolist(), I[0].tolist()):
        if idx == -1:
            continue
        out.append({"id": chunks[idx].get("id", f"chunk-{idx:05d}"),
                    "text": chunks[idx]["text"],
                    "score": float(score)})
    return out

In [None]:
K_INTERNAL   = 5
MAX_NEW_TOKS = 600
MIN_NEW_TOKS = 50

TUTOR_SYSTEM = (
    "You ou are AlgoTutor, an algorithms & data-structures tutor.\n"
   "Answer in detail USING ONLY the provided context passages.\n"
    "If the answer is not in the context, say you don't know."
)

def build_prompt_no_cite(question: str, passages: list[str]) -> str:
    ctx = "\n\n".join(passages)
    return (
        f"<s>[INST] <<SYS>>\n{TUTOR_SYSTEM}\n<</SYS>>\n\n"
        f"Question: {question}\n\n"
        f"Context:\n{ctx}\n\n"
        "Now answer using only the context above. [/INST]"
    )

@torch.inference_mode()
def generate_attached(prompt: str, temperature: float = 0.3) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    out = model.generate(
        **inputs,
        min_new_tokens=int(MIN_NEW_TOKS),
        max_new_tokens=int(MAX_NEW_TOKS),
        do_sample=True,
        temperature=float(temperature),
        top_p=0.9,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text.split("[/INST]")[-1].strip()

def tutor_answer_attached(question: str, temperature: float = 0.3) -> str:
    topk = retrieve(question, k=K_INTERNAL)
    passages = [c["text"] for c in topk]
    prompt = build_prompt_no_cite(question, passages)
    return generate_attached(prompt, temperature=temperature)


In [None]:
import os, time, torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from gradio.themes import Soft

theme = Soft(primary_hue="indigo", secondary_hue="violet", neutral_hue="slate")
CSS = """
footer {visibility: hidden;}
#header {
  background: linear-gradient(135deg, #eef2ff, #f5f3ff);
  border: 1px solid #ececf6; padding: 16px 18px; border-radius: 16px;
  box-shadow: 0 6px 20px rgba(60,64,67,.08);
}
.gradio-container { font-size: 16px; }
button, .gr-box, .gr-panel, .gr-textbox, .gr-slider { border-radius: 14px !important; }
.gr-group, .gr-row, .gr-column { gap: 14px; }
#askbtn { min-height: 44px; font-weight: 600; }
"""

def stream_answer(history, user_text, temperature):
    history = (history or []) + [[user_text, ""]]
    full = tutor_answer_attached(user_text, temperature)
    acc = ""
    for token in full.split():
        acc += token + " "
        history[-1][1] = acc
        time.sleep(0.012)
        yield history

with gr.Blocks(theme=theme, css=CSS, title="AlgoTutor — LoRA Attached") as demo:
    gr.HTML("<div id='header'><h2>📚 AlgoBot</h2>"
            "<div>Clear, grounded explanations for any algorithm question</div></div>")
    with gr.Row():
        with gr.Column(scale=3):
            q = gr.Textbox(label="Ask the Tutor", placeholder="e.g., Explain amortized analysis…", lines=3)
            temp = gr.Slider(
            minimum=0.1, maximum=1.0, value=0.1, step=0.1,
            label="Temperature"
        )
            with gr.Row():
                ask = gr.Button("Ask Tutor", variant="primary", elem_id="askbtn")
                clear = gr.Button("Clear")
            gr.Examples(
                examples=[
                    "What is amortized analysis for dynamic arrays?",
                    "Explain union-find with path compression and union by rank.",
                    "When is BFS different from Dijkstra’s algorithm?"
                ],
                inputs=[q],
                label="Try these"
            )
        with gr.Column(scale=4):
            chat = gr.Chatbot(label="Conversation", height=420, render_markdown=True, show_copy_button=True)

    ask.click(stream_answer, inputs=[chat, q, temp], outputs=chat)
    clear.click(lambda: [], None, chat)
    clear.click(lambda: "", None, q)

demo.launch(debug=True)


In [None]:
model.eval()