In [1]:
from datasets import load_dataset
from langchain.schema import Document
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

dataset = load_dataset("neural-bridge/rag-dataset-12000", split="train")

contexts = [
    Document(page_content=item["context"], metadata={"orig_id": str(i)})
    for i, item in enumerate(dataset)
]

print("Chunking...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=120,
    length_function=len
)
docs = text_splitter.split_documents(contexts)

W0822 11:54:33.241000 25400 torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.



Chunking...


In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# 3) Sparse retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = 20


# 4) Dense retriever (OpenAI Embeddings + FAISS)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

DB_FAISS_PATH = "faiss_index_hybrid"  # eski indeks metadata'sızsa bu klasörü silin ve tekrar oluşturun

if os.path.exists(DB_FAISS_PATH):
    print("Mevcut FAISS veritabanı yükleniyor...")
    vectorstore = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
else:
    print("Yeni FAISS veritabanı oluşturuluyor...")
    vectorstore = FAISS.from_documents(docs, embeddings)
    vectorstore.save_local(DB_FAISS_PATH)

faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 20})



Mevcut FAISS veritabanı yükleniyor...


In [4]:
hybrid_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.5, 0.5]
    )

### Reranking

In [6]:
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

def rerank_with_crossencoder(query, docs, top_k=5):
    pairs = [[query, d.page_content] for d in docs]
    scores = cross_encoder.predict(pairs)
    ranked = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True)
    return [doc for doc, _ in ranked[:top_k]]

In [7]:
template = """
You are a powerful assistant. Use the following context to answer the question.
If the answer is not present, say you don't know.


Context:
{context}


Question: {question}
Answer:
"""
rag_prompt = PromptTemplate(
template=template, input_variables=["context", "question"]
)


llm = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash", 
            temperature=0,
            max_tokens=4000
        )


def hybrid_rag_with_rerank(query: str):

    initial_docs = hybrid_retriever.get_relevant_documents(query)

    reranked_docs = rerank_with_crossencoder(query, initial_docs, top_k=3)

    context = "\n\n".join([d.page_content for d in reranked_docs])

    prompt = rag_prompt.format(context=context, question=query)
    response = llm.invoke(prompt)
    return response.content, context

In [8]:
query = "What is the author's opinion about small-time politics?"
answer, context = hybrid_rag_with_rerank(query)
print("\n--- Final Answer ---\n")
print(answer)


--- Final Answer ---

The author finds small-time politics amusing and charming, like an off-Broadway show. They believe it's entertaining to watch puffed-up citizens participate in local government.


In [9]:
print(context)

Wars in Iraq aside, there is nothing more amusing than governmental ineptitude on a federal level. Let's make that clear up front. The programming on C-Spans 1 and 2 is testament to that. But just like some off-Broadway shows are worth the trip downtown, so too does small-time politics have its charms.
It's the beauty of democracy, really: Every day, puffed-up, self-important citizens are invited to participate in local government, and every day, these mentally imbalanced people accept the challenge. Whether playing the role of maniacal elected official or just answering the call of duty as an unreasonably concerned citizen, there is a place where you can leave your unique footprint in the annals of "things that won't make much of a difference in the long run anyway."
But if it's simply entertainment you're after, you could do a lot worse than kicking back with a bucket of popcorn at your next city council meeting. With any luck, someone there will be as certifiable as the folks below.

### DeepEval Değerlendirme

In [20]:
from deepeval import evaluate
from deepeval.models import OllamaModel
from deepeval.metrics import FaithfulnessMetric, AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase
import random

print("\n--- DeepEval Değerlendirme Süreci Başlatılıyor ---")

# 1) Değerlendirme verisini hazırla
df = dataset.to_pandas()
sample_df = df.sample(n=10, random_state=42)

# 2) DeepEval için değerlendirme modelini ve metrikleri tanımla
print("Değerlendirme modeli ve metrikler hazırlanıyor...")
try:
    # **ÖNEMLİ**: Terminalde `ollama run ibm/granite-code:3b` komutunu çalıştırın.
    eval_model = OllamaModel(model="granite3.3:8b")
    
    faithfulness_metric = FaithfulnessMetric(model=eval_model, threshold=0.7)
    answer_relevancy_metric = AnswerRelevancyMetric(model=eval_model, threshold=0.7)
    
except Exception as e:
    print(f"HATA: Ollama modeli yüklenemedi. {e}")
    exit()

# 3) Test caselerini oluştur
print("Test case'leri oluşturuluyor...")
test_cases = []
for index, row in sample_df.iterrows():
    question = row['question']
    expected_output = row['answer']

    print(f"\nİşleniyor: Soru {index+1}/10 -> '{question[:50]}...'")

    actual_output, retrieval_context = hybrid_rag_with_rerank(question)

    # DÜZELTME: TestCase yerine LLMTestCase kullanıldı.
    test_case = LLMTestCase(
        input=question,
        actual_output=actual_output,
        expected_output=expected_output,
        retrieval_context=[retrieval_context]
    )
    test_cases.append(test_case)

# 4) Değerlendirmeyi çalıştır
print("\n--- Değerlendirme Başlatılıyor ---")
results = evaluate(test_cases=test_cases, metrics=[faithfulness_metric, answer_relevancy_metric])


--- DeepEval Değerlendirme Süreci Başlatılıyor ---
Değerlendirme modeli ve metrikler hazırlanıyor...
Test case'leri oluşturuluyor...

İşleniyor: Soru 1319/10 -> 'What is the reaction of the user upon discovering ...'

İşleniyor: Soru 9505/10 -> 'What is the purpose of split testing in marketing ...'

İşleniyor: Soru 7223/10 -> 'What measures are being taken by major Asian econo...'

İşleniyor: Soru 5392/10 -> 'What is the central setting of the movie "BREAKER ...'

İşleniyor: Soru 9167/10 -> 'What was the main issue in the 2006 lawsuit filed ...'

İşleniyor: Soru 8941/10 -> 'What is Ed Yardeni's prediction for the S&P 500 by...'

İşleniyor: Soru 3709/10 -> 'What is the Equinox Ski Challenge?...'

İşleniyor: Soru 3574/10 -> 'What is Boz Scaggs' current fascination in music s...'

İşleniyor: Soru 684/10 -> 'What is the main character of the context doing?...'

İşleniyor: Soru 9038/10 -> 'What are some of the features and services offered...'

--- Değerlendirme Başlatılıyor ---


Output()



Metrics Summary

  - ❌ Faithfulness (score: 0.5, threshold: 0.7, strict: False, evaluation model: granite3.3:8b (Ollama), reason: The score is 0.50 because the actual output uses the term 'much' to describe the extent of flashbacks in the story, which contradicts the retrieval context stating 'The story is told in flashbacks'. The context implies a significant portion, not just 'much', thus causing a partial misalignment., error: None)
  - ❌ Answer Relevancy (score: 0.5, threshold: 0.7, strict: False, evaluation model: granite3.3:8b (Ollama), reason: The score is 0.50 because although the response correctly identifies 'Breaker Morant' as a movie, it fails to directly address the central setting as requested in the input., error: None)

For test case:

  - input: What is the central setting of the movie "BREAKER MORANT"?
  - actual output: The central setting is a military court room with much of the story told in flashback.
  - expected output: The central setting is a military court