<a href="https://colab.research.google.com/github/mesami8/ArchInternship/blob/main/task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Setup: Install Required Packages ---
!pip install -U unsloth bitsandbytes transformers faiss-cpu sentence-transformers
# Re-run this cell first
!pip install --upgrade --quiet unsloth bitsandbytes transformers faiss-cpu sentence-transformers



Collecting unsloth
  Downloading unsloth-2025.8.1-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.3/47.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting transformers
  Downloading transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting unsloth_zoo>=2025.8.1 (from unsloth)
  Downloading unsloth_zoo-2025.8.1-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.31.post1-cp39-abi3-many

In [None]:
# --- Imports ---
import torch
import faiss
import numpy as np
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from IPython.display import display, Markdown
# from unsloth.hf_trainer import load_model # Removed as it's not available
from unsloth import FastLanguageModel # Use FastLanguageModel

In [None]:
# --- Load Unsloth 4-bit Quantized Model ---

# from unsloth.hf_trainer import load_model # Removed as it's not available
from unsloth import FastLanguageModel # Use FastLanguageModel

# model, tokenizer = load_model( # Removed as it's not available
model, tokenizer = FastLanguageModel.from_pretrained( # Use FastLanguageModel
    model_name="unsloth/mistral-7b-bnb-4bit",  # You can change this to another 4bit model from Unsloth
    max_seq_length=4096,
    dtype=None,  # Let Unsloth decide
    load_in_4bit=True,
)

In [None]:
# --- Load Embedding Model ---
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Small and fast


In [None]:
# --- Your Domain-Specific Docs ---
documents = [
    "The mitochondria is the powerhouse of the cell.",
    "Transformers are models based on self-attention mechanisms.",
    "Python is a programming language used for many AI tasks.",
    "Unsloth is a library that enables loading 4-bit quantized models efficiently.",
    "Retrieval-Augmented Generation (RAG) combines search and generation."
]


In [None]:
# --- Chunking Logic (optional if documents are small) ---
# You can add chunking if you're processing long docs.

# --- Embed Documents ---
doc_embeddings = embedding_model.encode(documents, convert_to_numpy=True)

In [None]:
# --- FAISS Indexing ---
dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(doc_embeddings)

In [None]:
# --- Mapping Index to Document Chunks ---
index_to_doc = {i: doc for i, doc in enumerate(documents)}

In [None]:

# --- RAG Function ---
def rag_query(query, top_k=2):
    # Step 1: Embed the query
    query_embedding = embedding_model.encode([query])

    # Step 2: Retrieve top-k similar docs
    D, I = index.search(np.array(query_embedding), top_k)

    # Step 3: Get relevant context
    retrieved_docs = [index_to_doc[i] for i in I[0]]
    context = "\n".join(retrieved_docs)

    # Step 4: Build Prompt
    prompt = f"""Answer the question using the information below:\n\n{context}\n\nQuestion: {query}\nAnswer:"""

    # Step 5: Generate Answer
    output = text_gen(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)[0]["generated_text"]

    # Only return the answer portion
    answer = output.split("Answer:")[-1].strip()

    return answer


In [None]:
# --- Example Query ---
query = "The mitochondria is the powerhouse of the cell?"
answer = rag_query(query)
display(Markdown(f"**Q:** {query}\n\n**A:** {answer}"))

In [None]:
# --- Create Text Generation Pipeline ---
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=100,
    do_sample=True,
    temperature=0.7,
)