# libraries

In [None]:
!pip install colab-xterm

Collecting colab-xterm
  Downloading colab_xterm-0.2.0-py3-none-any.whl.metadata (1.2 kB)
Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.6/115.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: colab-xterm
Successfully installed colab-xterm-0.2.0


In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m66.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [None]:
!pip install langchain_ollama

Collecting langchain_ollama
  Downloading langchain_ollama-0.3.3-py3-none-any.whl.metadata (1.5 kB)
Collecting ollama<1.0.0,>=0.4.8 (from langchain_ollama)
  Downloading ollama-0.5.1-py3-none-any.whl.metadata (4.3 kB)
Downloading langchain_ollama-0.3.3-py3-none-any.whl (21 kB)
Downloading ollama-0.5.1-py3-none-any.whl (13 kB)
Installing collected packages: ollama, langchain_ollama
Successfully installed langchain_ollama-0.3.3 ollama-0.5.1


In [7]:
!pip install langchain langchain_community unsloth[cu118] accelerate bitsandbytes transformers huggingface-hub -U

Collecting langchain_community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting transformers
  Downloading transformers-4.53.0-py3-none-any.whl.metadata (39 kB)
Collecting huggingface-hub
  Downloading huggingface_hub-0.33.1-py3-none-any.whl.metadata (14 kB)
Collecting unsloth[cu118]
  Downloading unsloth-2025.6.8-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none

# Loading Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading ollama

In [9]:
%load_ext colabxterm
%xterm

The colabxterm extension is already loaded. To reload it, use:
  %reload_ext colabxterm


Launching Xterm...

<IPython.core.display.Javascript object>

In [None]:
%reload_ext colabxterm

# RAG ChatBot

In [None]:
import json
import numpy as np
import faiss
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate

# --- Load JSON dataset ---
def load_dataset(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

# --- Chunk text + label + id ---
def prepare_chunks(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=200)
    chunks = []
    for item in data:
        split_texts = splitter.split_text(item["text"])
        for chunk in split_texts:
            chunks.append({
                "text": chunk,
                "label": item["label"],

            })
    return chunks

# --- Embed all chunks ---
def embed_chunks(embedding_model, chunks):
    texts = [chunk["text"] for chunk in chunks]
    embeddings = embedding_model.embed_documents(texts)
    return np.array(embeddings).astype("float32")

# --- Build FAISS index ---
def build_faiss_index(embeddings):
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index

# --- Retrieve top-k relevant chunks ---
def retrieve_chunks(embedding_model, query, chunks, index, k=3):
    q_emb = np.array([embedding_model.embed_query(query)]).astype("float32")
    distances, indices = index.search(q_emb, k)
    results = []
    max_distance = max(distances[0]) if len(distances[0]) > 0 else 1
    for rank, i in enumerate(indices[0]):
        score = 1 - distances[0][rank] / max_distance if max_distance > 0 else 1
        results.append({
            "chunk": chunks[i],
            "score": round(score * 100, 2)
        })
    return results

# --- QA Prompt Template ---
qa_prompt_template = PromptTemplate(
    template="""
You are an intelligent assistant answering questions in Farsi using ONLY the context below.

Rules:
- You must answer **strictly and only** based on the provided context.
- Do NOT use prior knowledge.
- If the answer is not in the context, respond with: "پاسخ در متن موجود نیست."
-The answer might be in a different phrasing.
-Look for answer carefully
- Answer clearly and accurately, in Farsi only.


Context:
{context}

Question:
{question}

Answer:
""",
    input_variables=["context", "question"]
)

# --- Main QA Loop ---
def main():
    json_path = "/content/drive/MyDrive/Farsi_Sports_Dataset/Final/voting_classifier_small.json"
    print("Loading dataset...")
    data = load_dataset(json_path)

    print("Preparing chunks...")
    chunks = prepare_chunks(data)
    print(f"Total chunks: {len(chunks)}")

    print("Initializing model...")
    llm = ChatOllama(model="llama3.1", temperature=0.3)
    embedding_model = OllamaEmbeddings(model="llama3.1")

    print("Embedding chunks...")
    embeddings = embed_chunks(embedding_model, chunks)

    print("Building FAISS index...")
    index = build_faiss_index(embeddings)

    while True:
        query = input("❓ Ask your question . Type 'exit' to quit: ").strip()
        if query.lower() == "exit":
            print("Goodbye!")
            break

        print("🔎 Retrieving best matching chunks...")
        top_chunks = retrieve_chunks(embedding_model, query, chunks, index, k=5)  # k=5 برای نتایج بهتر

        context = "\n\n".join([c['chunk']['text'] for c in top_chunks])

        prompt = qa_prompt_template.format(context=context, question=query)

        print("🧠 Generating answer...")
        response = llm.invoke(prompt).content

        print("\n📘 Answer:", response)
        print("🔍 Match Info:")
        for i, result in enumerate(top_chunks, 1):
            chunk_meta = result['chunk']
            print(f"\nResult {i}:")
            print(f"🔹 Label: {chunk_meta['label']}")
            print(f"🔹 Score: {result['score']}%")
            print(f"🔹 Matched Text:\n{chunk_meta['text'][:500]}...")
            print("-" * 40)

if __name__ == "__main__":
    main()

Loading dataset...
Preparing chunks...
Total chunks: 100
Initializing model...
Embedding chunks...
Building FAISS index...
❓ Ask your question . Type 'exit' to quit: تیم ملی فوتبال ایران در مسابقات مقدماتی جام جهانی چه نتیجه‌ای گرفت؟
🔎 Retrieving best matching chunks...
🧠 Generating answer...

📘 Answer: شاگردان با نتیجه ۲ بر ۰ برنده شدند.
🔍 Match Info:

Result 1:
🔹 Label: ورزشی
🔹 Score: 2.809999942779541%
🔹 Matched Text:
عنوان: تیم ملی فوتبال ایران در مسابقات مقدماتی جام جهانی به پیروزی رسید.
خلاصه: شاگردان با نتیجه ۲ بر ۰ برنده شدند.
متن: این دیدار حساس در ورزشگاه آزادی برگزار شد و بازیکنان با تلاش فراوان موفق شدند....
----------------------------------------

Result 2:
🔹 Label: ورزشی
🔹 Score: 1.4900000095367432%
🔹 Matched Text:
عنوان: مسابقات تیراندازی با کمان در تهران برگزار شد.
خلاصه: ورزشکاران ملی‌پوش در این مسابقات شرکت کردند.
متن: رقابت‌ها با حضور داوران بین‌المللی برگزار شد....
----------------------------------------

Result 3:
🔹 Label: ورزشی
🔹 Score: 1.4900000095367432%
🔹 Mat

KeyboardInterrupt: Interrupted by user