# ✅ RAG Chatbot
# Uses FREE HuggingFace LLM (Mistral-7B-Instruct)

In [6]:
# ✅ Install dependencies
!pip install -q transformers accelerate sentence-transformers faiss-cpu PyPDF2

In [7]:
# ✅ Imports
import torch
import faiss
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from PyPDF2 import PdfReader

In [8]:
# ✅ Load the embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
# ✅ Load the local HuggingFace LLM
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2.
401 Client Error. (Request ID: Root=1-6885b20e-53678ad90bd542bb10684594;759edb0b-efa0-4bbd-ae00-fceafa53f618)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.2 is restricted. You must have access to it and be authenticated to access it. Please log in.

In [None]:
# ✅ Load and chunk PDF
from google.colab import files
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

In [None]:
reader = PdfReader(pdf_path)
doc_text = "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])

In [None]:
# ✅ Chunking
def chunk_text(text, max_len=500):
    sentences = text.split(". ")
    chunks, chunk = [], ""
    for s in sentences:
        if len(chunk) + len(s) <= max_len:
            chunk += s + ". "
        else:
            chunks.append(chunk.strip())
            chunk = s + ". "
    if chunk:
        chunks.append(chunk.strip())
    return chunks

chunks = chunk_text(doc_text)

In [None]:
# ✅ Create embeddings and FAISS index
embeddings = embed_model.encode(chunks)
index = faiss.IndexFlatL2(embeddings[0].shape[0])
index.add(embeddings)

In [None]:
# ✅ Ask a question
question = "What is this document about?"
q_embed = embed_model.encode([question])
_, I = index.search(q_embed, k=3)
retrieved = [chunks[i] for i in I[0]]

context = "\n".join(retrieved)
prompt = f"<s>[INST] You are a helpful assistant. Answer the question using the context.\n\nContext:\n{context}\n\nQuestion: {question} [/INST]"


In [None]:
# ✅ Generate answer
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=300, do_sample=True, temperature=0.7)

answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n\n📌 Answer:\n", answer)