In [1]:
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from datasets import load_dataset
from sentence_transformers import CrossEncoder, SentenceTransformer
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

W0822 11:36:04.556000 24464 torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.



In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def create_faiss_db_from_dataset(chunk_size, chunk_overlap, embeddings):
    DB_FAISS_PATH = "faiss_index"  # eski indeks metadata'sızsa bu klasörü silin ve tekrar oluşturun

    if os.path.exists(DB_FAISS_PATH):
        print("Mevcut FAISS veritabanı yükleniyor...")
        return FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)

    print("Veriseti indiriliyor: 'neural-bridge/rag-dataset-12000'...")
    dataset = load_dataset("neural-bridge/rag-dataset-12000", split="train")
    print("Veriseti başarıyla indirildi.")

    # IMPORTANT: orijinal doküman kimliği (doc_id) metadata'ya yazılıyor
    contexts = [
        Document(page_content=item["context"], metadata={"orig_id": str(i)})
        for i, item in enumerate(dataset)
    ]

    print("Chunking...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    docs = text_splitter.split_documents(contexts)
    print(f"{len(docs)} adet doküman parçası oluşturuldu.")

    # (RecursiveCharacterTextSplitter metadata'yı taşır, ekstra işlem gerekmiyor)
    print("FAISS veritabanı oluşturuluyor ve kaydediliyor...")
    db = FAISS.from_documents(docs, embeddings)
    db.save_local(DB_FAISS_PATH)
    print("FAISS başarıyla oluşturuldu ve kaydedildi.")
    return db


In [4]:
class Retrievers:
    def __init__(self, chunk_size=1000, chunk_overlap=120):
        
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash", 
            temperature=0,
            max_tokens=4000
        )

        self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
        
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap

        self.vectorstore = create_faiss_db_from_dataset(
            chunk_size=self.chunk_size, 
            chunk_overlap=self.chunk_overlap,
            embeddings=self.embeddings
        )
    
        self.hyde_prompt = PromptTemplate(
            input_variables=["query", "chunk_size"],
            template="""Given the question <query>'{query}'</query>, generate a hypothetical document that directly answers this question. The document should be detailed and in-depth. The document size has be exactly {chunk_size} characters.""",
        )
        
        self.query_rewrite_prompt = PromptTemplate(
            input_variables=["original_query"],
            template="""You are an AI assistant tasked with reformulating user queries to improve retrieval in a RAG system. 
            Given the original query, rewrite it to be more specific, detailed, and likely to retrieve relevant information.

            Original query: {original_query}

            Rewritten query:"""
        )
        
        rag_prompt = PromptTemplate(
            input_variables=["context", "question"],
            template="""
                You are a helpful assistant. Use the following context to answer the question.
                If the answer cannot be found in the context, say "I could not find the answer in the provided context."

                Context:
                {context}

                Question:
                {question}

                Answer:
                """
        )

        self.rag_chain = rag_prompt | self.llm
        self.hyde_chain = self.hyde_prompt | self.llm
        self.query_rewriter_chain = self.query_rewrite_prompt | self.llm

    def query_rewrite_retrieve(self, query, k=3):
        # Query Rewrite
        rewritten_query = self.query_rewriter_chain.invoke({"original_query": query}).content

        #print("\n--- Üretilen Sorgu ---\n")
        #print(rewritten_query)
        #print("\n----------------------------------\n")
        
        similar_docs_with_scores = self.vectorstore.similarity_search_with_score(rewritten_query, k=k)

        return similar_docs_with_scores

    def hyde_retrieve(self, query, k=3):
        input_variables = {"query": query, "chunk_size": self.chunk_size}
        hypothetical_doc = self.hyde_chain.invoke(input_variables).content

        print("\n--- Üretilen Hipotetik Doküman ---\n")
        print(hypothetical_doc)
        print("\n----------------------------------\n")
        
        similar_docs_with_scores = self.vectorstore.similarity_search_with_score(hypothetical_doc, k=k)
        
        return similar_docs_with_scores

    def reranking(self, query, initial_docs_with_scores, n=5):
        cross_inp = [[query, doc.page_content] for doc, score in initial_docs_with_scores]
        
        cross_scores = self.cross_encoder.predict(cross_inp)

        reranked_docs = []
        for i in range(len(initial_docs_with_scores)):
            doc, old_score = initial_docs_with_scores[i]
            new_score = cross_scores[i]
            reranked_docs.append((doc, new_score))
        reranked_docs.sort(key=lambda x: x[1], reverse=True)

        return reranked_docs[:n]



In [5]:
import os
from dotenv import load_dotenv

# .env dosyasını yükle
load_dotenv()

True

In [6]:
retriever = Retrievers()

Mevcut FAISS veritabanı yükleniyor...


### Basic Retrieve

In [7]:
soru = "What is the author's opinion about small-time politics?"

doc_basic = retriever.vectorstore.similarity_search_with_score(soru, k=10)
for i, (doc, score) in enumerate(doc_basic):
    print(f"Doküman {i+1} | Skor: {score:.4f}\n")
    print(f"{doc.page_content}\n")
    print("-" * 20)

Doküman 1 | Skor: 0.6268

Those of you that know me at all know that I’m not big on voicing my political opinions on things. It must be the Yankee in me (see, I’m not totally Southern), but I consider one’s politics about like one’s genitals: I’m happy you’ve got them, and am sure that you are proud of them, but don’t go sticking them in my face (no matter how well I know you) or waving them around in public (except at certain kinds of parties, likely involving alcohol).
It isn’t shame, or feeling unsure, or anything like that. It comes from a very pragmatic place. When entering into any political debate, imagine the chances your opponent has of convincing you that you’re wrong. Those are the same chances (more or less) you have for changing their mind. You are probably not going to convince them of anything, so the whole thing is rather masturbatory ((as a side note, this metaphor for politics and genitals keeps amusing me)).
And yet.

--------------------
Doküman 2 | Skor: 0.6507

I 

In [8]:
soru = "What is the author's opinion about small-time politics?"

doc_basic = retriever.vectorstore.similarity_search_with_score(soru, k=3)

# sadece textleri birleştirelim
context = "\n\n".join([doc.page_content for doc, _ in doc_basic])

# 4. RAG çalıştır
cevap = retriever.rag_chain.invoke({
    "context": context,
    "question": soru
})
print(cevap.content)


I could not find the answer in the provided context.


### HYDE

In [9]:
soru = "What is the author's opinion about small-time politics?"

docs_with_scores = retriever.hyde_retrieve(soru, k=10)

print("--- Bulunan Benzer Dokümanlar (Metrik: L2 Mesafesi, Düşük = Daha İyi) ---\n")

for i, (doc, score) in enumerate(docs_with_scores):
    print(f"Doküman {i+1} | Skor: {score:.4f}\n")
    print(f"{doc.page_content}\n")
    print("-" * 20)


--- Üretilen Hipotetik Doküman ---

```text
Author views small-time politics with cynical amusement. Sees it as a microcosm of larger issues, but amplified by petty grievances and personal vendettas. Believes genuine progress is often sacrificed for ego and short-term gains. Finds the earnestness of participants both endearing and frustrating, highlighting the disconnect between lofty ideals and the reality of local power struggles. Ultimately, the author considers it a necessary, albeit flawed, component of democracy.
```

----------------------------------

--- Bulunan Benzer Dokümanlar (Metrik: L2 Mesafesi, Düşük = Daha İyi) ---

Doküman 1 | Skor: 0.6645

Time, as a possible element, often serves being a source regarding conflict involving the press as well as the government. The latter would like an interval when policy could possibly be resolved just before being declared, while the particular former works on desperation and can so right away. Nevertheless, it really is this “ann

In [None]:
soru = "What is the author's opinion about small-time politics?"


context = "\n\n".join([doc.page_content for doc, _ in docs_with_scores])

cevap = retriever.rag_chain.invoke({
    "context": context,
    "question": soru
})
print(f"Cevap: {cevap.content}")


--- Üretilen Hipotetik Doküman ---

```text
The author views small-time politics with disdain. They see it as breeding ground for petty squabbles, prioritizing personal gain over community welfare. Local issues become weaponized, fostering division. Honest debate is rare, replaced by self-serving agendas. The author believes this hinders progress, creating apathy.
```

----------------------------------

Cevap: The author finds small-time politics amusing and charming, like some off-Broadway shows. They believe it's the beauty of democracy that allows ordinary citizens to participate in local government, even if they are mentally imbalanced. They suggest that attending city council meetings can be entertaining.


### Query Rewriting

In [13]:
soru = "What is the author's opinion about small-time politics?"

docs_with_scores = retriever.query_rewrite_retrieve(soru, k=10)

print("--- Bulunan Benzer Dokümanlar (Metrik: L2 Mesafesi, Düşük = Daha İyi) ---\n")

for i, (doc, score) in enumerate(docs_with_scores):
    print(f"Doküman {i+1} | Skor: {score:.4f}\n")
    print(f"{doc.page_content}\n")
    print("-" * 20)

--- Bulunan Benzer Dokümanlar (Metrik: L2 Mesafesi, Düşük = Daha İyi) ---

Doküman 1 | Skor: 0.6810

Those of you that know me at all know that I’m not big on voicing my political opinions on things. It must be the Yankee in me (see, I’m not totally Southern), but I consider one’s politics about like one’s genitals: I’m happy you’ve got them, and am sure that you are proud of them, but don’t go sticking them in my face (no matter how well I know you) or waving them around in public (except at certain kinds of parties, likely involving alcohol).
It isn’t shame, or feeling unsure, or anything like that. It comes from a very pragmatic place. When entering into any political debate, imagine the chances your opponent has of convincing you that you’re wrong. Those are the same chances (more or less) you have for changing their mind. You are probably not going to convince them of anything, so the whole thing is rather masturbatory ((as a side note, this metaphor for politics and genitals keep

### HYDE + Reranking

In [14]:
soru = "What is the author's opinion about small-time politics?"

docs_hyde = retriever.hyde_retrieve(soru, k=20)
results = retriever.reranking(soru, docs_hyde)

for doc, score in results:
    print(f"Score: {score:.4f}")
    print(doc.page_content)
    print("="*100)


--- Üretilen Hipotetik Doküman ---

```text
Author views small-time politics with cynical amusement. Sees it as a microcosm of larger issues, but amplified by petty grievances and personal vendettas. Believes genuine progress is often sacrificed for ego and short-term gains. Finds the earnestness of participants both endearing and frustrating, highlighting the disconnect between lofty ideals and the reality of local power struggles. Ultimately, the author considers it a necessary, albeit flawed, component of democracy.
```

----------------------------------

Score: 1.1510
Wars in Iraq aside, there is nothing more amusing than governmental ineptitude on a federal level. Let's make that clear up front. The programming on C-Spans 1 and 2 is testament to that. But just like some off-Broadway shows are worth the trip downtown, so too does small-time politics have its charms.
It's the beauty of democracy, really: Every day, puffed-up, self-important citizens are invited to participate in l

In [15]:
# sadece textleri birleştirelim
context = "\n\n".join([doc.page_content for doc, _ in results])

# 4. RAG çalıştır
cevap = retriever.rag_chain.invoke({
    "context": context,
    "question": soru
})
print(cevap.content)


The author finds small-time politics amusing and charming, comparing it to off-Broadway shows that are worth the trip downtown. They also suggest that city council meetings can be entertaining.


### Evaluation DeepEval(Vectorstore + Rerank)

In [16]:
import pandas as pd
from datasets import load_dataset

# Değerlendirme için test verisetini ayır (200 örnek)
# Not: Bu, modelinizin daha önce görmediği veriler olmalı.
dataset = load_dataset("neural-bridge/rag-dataset-12000", split="train")
test_df = dataset.to_pandas().sample(n=10, random_state=42)
test_data = test_df.to_dict('records')

print(f"Değerlendirme için {len(test_data)} adet test örneği hazırlandı.")

Değerlendirme için 10 adet test örneği hazırlandı.


In [17]:
from deepeval.metrics import FaithfulnessMetric, AnswerRelevancyMetric
from deepeval import evaluate
from deepeval.test_case import LLMTestCase
import time
from deepeval.models import OllamaModel
# RAG zincirini DeepEval ile uyumlu hale getiren sarmalayıcı sınıf
class RAGSystem:
    def __init__(self, retriever_instance):
        self.retriever = retriever_instance

    def run_rag_chain(self, question, k=20, n=3):
        # 1. HyDE ile ilgili dökümanları çek
        initial_docs = self.retriever.vectorstore.similarity_search_with_score(question, k=k)
        #retriever.vectorstore.similarity_search_with_score
        # 2. Rerank ile dökümanları yeniden sırala ve en iyi n tanesini al
        reranked_docs = self.retriever.reranking(question, initial_docs, n=n)

        # 3. Bağlamı oluşturmak için en iyi dökümanları birleştir
        context = "\n\n".join([doc.page_content for doc, _ in reranked_docs])

        # 4. Soru-cevap zincirini çalıştır ve yanıtı al
        answer = self.retriever.rag_chain.invoke({
            "context": context,
            "question": question
        })
        
        # Yanıt metnini, bağlamı ve çekilen dökümanları döndür
        return answer.content, context, reranked_docs

def run_deepeval_evaluation(rag_system, test_dataset):
    print("DeepEval değerlendirmesi başlatılıyor...")
    test_cases = []

    for item in test_dataset:
        question = item['question']
        ground_truth_answer = item['answer']

        # RAG sisteminizden yanıt ve bağlamı alın
        generated_answer, retrieved_context_str, retrieved_docs = rag_system.run_rag_chain(question)
        time.sleep(20)
        # DeepEval test durumu oluştur
        test_case = LLMTestCase(
            input=question,
            actual_output=generated_answer,
            retrieval_context=[doc.page_content for doc, _ in retrieved_docs],
            expected_output=ground_truth_answer # <-- Hatanın çözümü için "ground_truth" yerine "expected_output" kullanıldı.
        )
        test_cases.append(test_case)

    # Değerlendirme metriklerini tanımla
    
    model = OllamaModel(model="granite3.3:8b")
    faithfulness_metric = FaithfulnessMetric(model=model, threshold=0.7)

    answer_relevancy_metric = AnswerRelevancyMetric(model=model, threshold=0.7)

    # Değerlendirmeyi çalıştır
    evaluate(
        test_cases=test_cases,
        metrics=[faithfulness_metric, answer_relevancy_metric]
    )
    print("Değerlendirme tamamlandı!")

retriever_instance = Retrievers()
rag_system_wrapper = RAGSystem(retriever_instance)

# Değerlendirme fonksiyonunu çağır
run_deepeval_evaluation(rag_system_wrapper, test_data)

Mevcut FAISS veritabanı yükleniyor...
DeepEval değerlendirmesi başlatılıyor...


Output()



Metrics Summary

  - ✅ Faithfulness (score: 1.0, threshold: 0.7, strict: False, evaluation model: granite3.3:8b (Ollama), reason: The score is 1.00 because there are no contradictions detected between the actual output and the retrieval context., error: None)
  - ✅ Answer Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: granite3.3:8b (Ollama), reason: The score is 1.00 because the response directly and accurately addresses the question by providing a concise summary of the user's potential reaction upon discovering the site, without any irrelevant information., error: None)

For test case:

  - input: What is the reaction of the user upon discovering the site?
  - actual output: The user is very happy to discover the site.
  - expected output: The user is very happy to discover the site.
  - context: None
  - retrieval context: ['In the Name of Allah, the Most Gracious, Most Merciful\nWisdom ‘Anqa’)\n(From Islamic Topics)\nDecember 26, 2008 at 11:39\nAssalamu A

Değerlendirme tamamlandı!
