### Imports and Functions

In [1]:
import json
from datasets import Dataset  # Ensure the `datasets` library is installed
from ragas.metrics import LLMContextRecall, Faithfulness, LLMContextPrecisionWithReference, ResponseRelevancy, FactualCorrectness
from ragas import evaluate, EvaluationDataset
from ragas.cost import get_token_usage_for_openai
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_openai import OpenAIEmbeddings
import os
from dotenv import load_dotenv

import logging
import sys

logging.basicConfig(stream=sys.stderr, level=logging.INFO, force=True)

# Load environment variables
load_dotenv()

def evaluate_json_path(json_path):
    """
    Load a JSON dataset from the specified path, convert it to the required EvaluationDataset format,
    and evaluate it using Ragas metrics. 

    Parameters:
        json_path (str): Path to the JSON file containing the data to be evaluated.

    Returns:
        dict: A dictionary of evaluation results.
    """
    # Define the load_and_convert_to_dataset function
    def load_and_convert_to_dataset(json_path):
        # Load data from the JSON file
        with open(json_path, 'r') as f:
            data = json.load(f)
        
        # Extract fields from the JSON data and rename the columns
        user_input = [item["question"] for item in data["results"]]
        response = [item.get("response", "No response generated") for item in data["results"]]
        retrieved_contexts = [[context["text"] for context in item["retrieved_context"]] for item in data["results"]]
        reference = [item.get("gt_answer", "No ground truth provided") for item in data["results"]]

        # Structure the data into a dictionary with the new column names
        formatted_data = {
            "user_input": user_input,
            "retrieved_contexts": retrieved_contexts,
            "response": response,
            "reference": reference
        }

        # Convert dictionary to Dataset
        dataset = Dataset.from_dict(formatted_data)
        return dataset

    # Load and convert your custom dataset
    dataset = load_and_convert_to_dataset(json_path)

    # Convert to EvaluationDataset required by Ragas
    eval_dataset = EvaluationDataset.from_hf_dataset(dataset)

    # Instantiate LLM and embeddings wrappers
    evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
    evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

    # Define metrics for evaluation
    metrics = [
        LLMContextPrecisionWithReference(llm=evaluator_llm),
        LLMContextRecall(llm=evaluator_llm),
        ResponseRelevancy(llm=evaluator_llm),
        Faithfulness(llm=evaluator_llm),
        FactualCorrectness(llm=evaluator_llm)
    ]

    # Perform the evaluation
    results = evaluate(dataset=eval_dataset, metrics=metrics, llm=evaluator_llm, token_usage_parser=get_token_usage_for_openai)

    return results

  from .autonotebook import tqdm as notebook_tqdm


### OpenParse

#### Self-RAG

In [2]:
# Example usage:
json_path = 'eval_results/selfRAG_openParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/selfRAG_openParser.csv", index=False)

Evaluating:   0%|          | 0/195 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Evaluating:   1%|          | 1/195 [00:02<09:26,  2.92s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 

Evaluation Results: {'llm_context_precision_with_reference': 0.5245, 'context_recall': 0.2725, 'answer_relevancy': 0.3648, 'faithfulness': 0.6598, 'factual_correctness': 0.2800}


In [3]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=686763, output_tokens=155110, model='')

#### Basic RAG

In [5]:
# Example usage:
json_path = 'eval_results/basic_rag_openParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/basic_rag_openParser.csv", index=False)

Evaluating:   0%|          | 0/190 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Evaluating:   1%|          | 1/190 [00:02<09:10,  2.91s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 

Evaluation Results: {'llm_context_precision_with_reference': 0.5217, 'context_recall': 0.2531, 'answer_relevancy': 0.3743, 'faithfulness': 0.6228, 'factual_correctness': 0.2729}


In [6]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=721692, output_tokens=151121, model='')

### EnhancedParser

#### Self RAG

In [2]:
# Example usage:
json_path = 'eval_results/selfRAG_enhancedParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/selfRAG_enhancedParser.csv", index=False)

Evaluating:   0%|          | 0/195 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Evaluating:   1%|          | 1/195 [00:02<08:58,  2.78s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Evaluating:   1%|          | 2/195 [00:07<12:10,  3.79s/it]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:H

Evaluation Results: {'llm_context_precision_with_reference': 0.7785, 'context_recall': 0.3644, 'answer_relevancy': 0.7880, 'faithfulness': 0.5991, 'factual_correctness': 0.3508}


In [3]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=650575, output_tokens=150721, model='')

#### Basic RAG

In [4]:
# Example usage:
json_path = 'eval_results/basic_rag_enhancedParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/basic_rag_enhancedParser.csv", index=False)

Evaluating:   0%|          | 0/195 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://a

Evaluation Results: {'llm_context_precision_with_reference': 0.7782, 'context_recall': 0.3557, 'answer_relevancy': 0.7119, 'faithfulness': 0.7859, 'factual_correctness': 0.4236}


In [5]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=755838, output_tokens=166100, model='')

### HiRes Parser

#### Self-RAG

In [2]:
# Example usage:
json_path = 'eval_results/selfRAG_hiResParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/selfRAG_hiResParser.csv", index=False)

Evaluating:   0%|          | 0/195 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.ope

Evaluation Results: {'llm_context_precision_with_reference': 0.4994, 'context_recall': 0.2589, 'answer_relevancy': 0.2697, 'faithfulness': 0.5964, 'factual_correctness': 0.2692}


In [3]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=649022, output_tokens=145929, model='')

#### Basic RAG

In [4]:
# Example usage:
json_path = 'eval_results/basic_rag_hiResParser.json'  # Path to your JSON file
evaluation_results = evaluate_json_path(json_path)

# Print the evaluation results
print("Evaluation Results:", evaluation_results)
temp = evaluation_results.to_pandas()
temp.to_csv("eval_results/basic_rag_hiResParser.csv", index=False)

Evaluating:   0%|          | 0/195 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://a

Evaluation Results: {'llm_context_precision_with_reference': 0.5241, 'context_recall': 0.3135, 'answer_relevancy': 0.3192, 'faithfulness': 0.5029, 'factual_correctness': 0.2756}


In [5]:
evaluation_results.total_tokens()

TokenUsage(input_tokens=697460, output_tokens=156432, model='')