In [None]:
%pip install -qqqU matplotlib

In [None]:
from dotenv import load_dotenv

load_dotenv('../.env')

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from langchain_aws import BedrockEmbeddings, ChatBedrock
from qdrant_client.http.models import Distance, VectorParams
from langchain_core.rate_limiters import InMemoryRateLimiter

In [None]:
embeddings = BedrockEmbeddings()

In [None]:
import os

client = QdrantClient(
    location=os.environ["VECTOR_STORE_URL"],
    api_key=os.environ["VECTOR_STORE_API_KEY"]
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="regulamento-semantic",
    embedding=embeddings
)

In [None]:
from datasets import Dataset

def pandas_to_ragas(df):
    '''
    Converts a Pandas DataFrame into a Ragas-compatible dataset
    
    Inputs:
        - df (Pandas DataFrame): The input DataFrame to be converted
        
    Returns:
        - ragas_testset (Hugging Face Dataset): A Hugging Face dataset compatible with the Ragas framework
    '''
    # Ensure all text columns are strings and handle NaN values
    text_columns = df.columns
    for col in text_columns:
        df[col] = df[col].fillna('').astype(str)
        
    # Convert 'contexts' to a list of lists
    df['reference_contexts'] = df['reference_contexts'].fillna('').astype(str).apply(eval)
    df['retrieved_contexts'] = df['retrieved_contexts'].fillna('').astype(str).apply(eval)
    
    # Converting the DataFrame to a dictionary
    data_dict = df.to_dict('list')
    
    # Loading the dictionary as a Hugging Face dataset
    ragas_testset = Dataset.from_dict(data_dict)
    
    return ragas_testset

In [None]:
import pandas as pd
from tqdm import tqdm
from ragas import evaluate
from ragas.metrics import NonLLMContextRecall, NonLLMContextPrecisionWithReference

ks = []
recalls =  []
all_result_df = pd.DataFrame([])

for k in [2, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:

    df = pd.read_csv('../data/dataset_potiguana.csv')

    retrieved_contexts = []
    responses = []

    for _, row in tqdm(df.iterrows()):
        query = row['user_input']
        context_docs = vector_store.similarity_search(query, k=k)
        contexts = [c.page_content for c in context_docs]
        retrieved_contexts.append(contexts)

    df['retrieved_contexts'] = pd.Series(retrieved_contexts)
    eval_dataset = pandas_to_ragas(df)

    metrics = [
        NonLLMContextPrecisionWithReference(threshold=0.95),
        NonLLMContextRecall(threshold=0.95)
        ]


    results = evaluate(dataset=eval_dataset, metrics=metrics)
    result_df = results.to_pandas()
    result_df['k'] = k
    all_result_df = pd.concat([all_result_df, result_df], ignore_index=True)

    non_llm_context_recall = result_df['non_llm_context_recall'].mean()

    ks.append(k)
    recalls.append(non_llm_context_recall)

In [None]:
import matplotlib.pyplot as plt

# Calcule a média de NonLLMContextRecall para cada valor de K
mean_recall = all_result_df.groupby('k')['non_llm_context_recall'].mean()
mean_precision = all_result_df.groupby('k')['non_llm_context_precision_with_reference'].mean()


plt.figure(figsize=(10, 6))
plt.plot(mean_recall.index, mean_recall.values, marker='o', linestyle='-', color='b', label='Mean Context Recall')
plt.plot(mean_precision.index, mean_precision.values, marker='s', linestyle='--', color='r', label='Mean Context Precision')
plt.title('Mean Context Recall and Precision vs. K', fontsize=16)
plt.xlabel('K', fontsize=14)
plt.ylabel('Mean Score', fontsize=14)
plt.grid(True)
plt.legend(fontsize=12)

plt.xticks(mean_recall.index)  # Mostra todos os valores de K no eixo x
plt.show()