In [None]:
%pip install -qqqU matplotlib

In [None]:
from dotenv import load_dotenv

load_dotenv('../.env')

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from langchain_aws import BedrockEmbeddings, ChatBedrock
from qdrant_client.http.models import Distance, VectorParams
from langchain_core.rate_limiters import InMemoryRateLimiter

In [None]:
embeddings = BedrockEmbeddings()

In [None]:
import os

client = QdrantClient(
    location=os.environ["VECTOR_STORE_URL"],
    api_key=os.environ["VECTOR_STORE_API_KEY"]
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="regulamento-semantic",
    embedding=embeddings
)

In [None]:
from datasets import Dataset

def pandas_to_ragas(df):
    '''
    Converts a Pandas DataFrame into a Ragas-compatible dataset
    
    Inputs:
        - df (Pandas DataFrame): The input DataFrame to be converted
        
    Returns:
        - ragas_testset (Hugging Face Dataset): A Hugging Face dataset compatible with the Ragas framework
    '''
    # Ensure all text columns are strings and handle NaN values
    text_columns = df.columns
    for col in text_columns:
        df[col] = df[col].fillna('').astype(str)
        
    # Convert 'contexts' to a list of lists
    df['reference_contexts'] = df['reference_contexts'].fillna('').astype(str).apply(eval)
    df['retrieved_contexts'] = df['retrieved_contexts'].fillna('').astype(str).apply(eval)
    
    # Converting the DataFrame to a dictionary
    data_dict = df.to_dict('list')
    
    # Loading the dictionary as a Hugging Face dataset
    ragas_testset = Dataset.from_dict(data_dict)
    
    return ragas_testset

In [None]:
import pandas as pd
from tqdm import tqdm
from ragas import evaluate
from ragas.metrics import NonLLMContextRecall, NonLLMContextPrecisionWithReference
from rerank import BedrockRerank

models = [
    'amazon.rerank-v1:0',
    'cohere.rerank-v3-5:0',
    ]

all_result_df = pd.read_csv("../rerank_models_in_k_eval.csv")

for model_name in models:
    for k in [60, 70, 80, 90, 100]:
        df = pd.read_csv('../data/dataset_potiguana.csv')

        retrieved_contexts = []
        responses = []

        for _, row in tqdm(df.iterrows()):
            query = row['user_input']
            context_docs = vector_store.similarity_search(query, k=k)
            contexts = [c.page_content for c in context_docs]

            # Rerank
            reranker = BedrockRerank()
            reranked_contexts = reranker.rerank(query=query, documents=contexts, top_k=k, model=model_name)

            retrieved_contexts.append(reranked_contexts)

        df['retrieved_contexts'] = pd.Series(retrieved_contexts)
        eval_dataset = pandas_to_ragas(df)

        metrics = [
            NonLLMContextPrecisionWithReference(threshold=0.95),
            NonLLMContextRecall(threshold=0.95)
            ]


        results = evaluate(dataset=eval_dataset, metrics=metrics)
        result_df = results.to_pandas()
        result_df['k'] = k
        result_df['model'] = model_name
        all_result_df = pd.concat([all_result_df, result_df], ignore_index=True)

all_result_df.to_csv('../rerank_models_in_k_eval.csv')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Carregar os resultados consolidados
all_result_df = pd.read_csv("../rerank_models_in_k_eval.csv")
all_result_df = all_result_df[all_result_df['non_llm_context_recall'] == 1.0]

# Criar um único gráfico com subplots para cada modelo
def plot_metrics_with_subplots():
    models = all_result_df['model'].unique()
    num_models = len(models)
    
    # Configurar o espaço para os subplots
    fig, axes = plt.subplots(1, num_models, figsize=(16, 6), sharey=True)
    
    for i, model_name in enumerate(models):
        model_data = all_result_df[all_result_df['model'] == model_name]
        
        # Calcular médias por grupo de k
        mean_recall = model_data.groupby('k')['non_llm_context_recall'].mean()
        mean_precision = model_data.groupby('k')['non_llm_context_precision_with_reference'].mean()
        
        # Plotar as métricas no subplot correspondente
        # axes[i].plot(mean_recall.index, mean_recall.values, marker='o', linestyle='-', color='blue', label='Context Recall')
        axes[i].plot(mean_precision.index, mean_precision.values, marker='s', linestyle='--', color='green', label='Context Precision')
        
        # Configurações do subplot
        axes[i].set_title(f'{model_name} - Metrics vs. K', fontsize=14)
        axes[i].set_xlabel('K', fontsize=12)
        if i == 0:  # Adicionar rótulo ao eixo Y apenas no primeiro subplot
            axes[i].set_ylabel('Score', fontsize=12)
        axes[i].grid(True)
        axes[i].legend(fontsize=10)
    
    # Ajustar layout geral
    plt.tight_layout()
    plt.show()

# Gerar os subplots
plot_metrics_with_subplots()
