In [None]:
def build_retrieval_eval_chain(retriever, prompt_template, temperature, deployment_name):
    """
    Baut eine modulare RetrievalQA-Chain für Evaluation.
    """
    llm = AzureChatOpenAI(
        api_key=AZURE_OPENAI_API_KEY,
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_version="2023-05-15",
        deployment_name=deployment_name,
        temperature=temperature
    )

    retrieval_chain = (
        RunnableMap({
            "retrieved_documents": lambda x: retriever.invoke(x["question"]),
            "question": lambda x: x["question"]
        })
        | RunnableMap({
            "retrieved_texts": lambda x: [doc.page_content for doc in x["retrieved_documents"]],
            "question": lambda x: x["question"]
        })
        | RunnableMap({
            "context": lambda x: "\n\n".join(x["retrieved_texts"]),
            "retrieved_texts": lambda x: x["retrieved_texts"],
            "question": lambda x: x["question"]
        })
        | RunnableMap({
            "llm_response": lambda x: llm.invoke(prompt_template.format(context=x['context'], question=x['question'])),
            "retrieved_texts": lambda x: x["retrieved_texts"],
            "context": lambda x: x["context"],
            "question": lambda x: x["question"]
        })
    )
    return retrieval_chain


def run_batch_evaluation(df, retrieval_chain):
    """
    Läuft alle Fragen im Batch (Eval_dataset) durch und sammelt die Ergebnisse.
    """
    results = []

    for idx, row in df.iterrows():
        query = row["question"]
        relevant_text = row["relevant_text"]
        expected_answer = row["answer"]

        output = retrieval_chain.invoke({"question": query})

        result = {
            "question": query,
            "relevant_text": relevant_text,
            "relevant_text_llm": output["retrieved_texts"],  # Liste der Chunks
            "retrieved_context": output["context"],          # Zusammengesetzter Kontext
            "answer": expected_answer,
            "answer_llm": output["llm_response"]
        }

        results.append(result)

    return pd.DataFrame(results)

In [None]:
# Varianten an retriever
retriever_similarity_k3 = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3} # top-k # Parameter zum anpassen
)

retriever_similarity_k5 = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5} # top-k # Parameter zum anpassen
)

retriever_mmr_k5_fetch20 = vector_store.as_retriever(
    search_type="mmr",  # Maximal Marginal Relevance
    search_kwargs={"k": 5, "fetch_k": 20}  # etwas mehr holen und diverse auswählen # Parameter zum anpassen
)

retriever_mmr_k7_fetch20 = vector_store.as_retriever(
    search_type="mmr",  # Maximal Marginal Relevance
    search_kwargs={"k": 7, "fetch_k": 20}  # etwas mehr holen und diverse auswählen # Parameter zum anpassen
)

# Varianten an Prompts
prompt_simple = ChatPromptTemplate.from_template("""
Answer the following question based on the context provided.
If you cannot find an answer, please state that you do not know.

Kontext:
{context}

Frage:
{question}
""")

prompt_chain_of_thought = ChatPromptTemplate.from_template("""
Given the context below, think carefully and step-by-step before answering the question.
If the answer cannot be found explicitly, clearly state that.

Kontext:
{context}

Frage:
{question}

Please explain your reasoning before giving a final answer.
""")

# Save der Varianten als Dictionary für leichteres speichern
retrievers = {
    "similarity_k3": retriever_similarity_k3,
    "similarity_k5": retriever_similarity_k5,
    "mmr_k5": retriever_mmr_k5_fetch20,
    "mmr_k7": retriever_mmr_k7_fetch20
}

prompts = {
    "simple": prompt_simple,
    "chain_of_thought": prompt_chain_of_thought
}

In [None]:
results = []
temperature=0.5 # Parameter zum anpassen 

for retriever_name, retriever in retrievers.items():
    for prompt_name, prompt in prompts.items():
        
        print(f"Starte Kombination: {retriever_name} + {prompt_name}")
        
        # Chain bauen
        retrieval_eval_chain = build_retrieval_eval_chain(
            retriever=retriever,
            prompt_template=prompt,
            temperature=temperature,   
            deployment_name="gpt-4o" # zur eval ev. mini version verwenden
        )

        # Evaluation durchführen
        df_result = run_batch_evaluation(df_eval, retrieval_eval_chain)
        
        # Sicherstellen, dass answer_llm als reiner Text gespeichert wird
        df_result["answer_llm"] = df_result["answer_llm"].apply(lambda x: x.content if hasattr(x, "content") else x)

        # Ergebnis speichern
        temperature_str = str(temperature).replace(".", "")
        file_name = f"../data_mc1/data_eval/df_results_{retriever_name}_{prompt_name}_temp{temperature_str}.parquet"
        df_result.to_parquet(file_name, index=False)
        print(f"Gespeichert unter: {file_name}")
        
        results.append(df_result)


In [None]:
df_test_print = pd.read_parquet("../data_mc1/data_eval/df_results_similarity_simple.parquet")
df_test_print.head()