In [5]:
from indoxJudge.piplines import RagEvaluator

In [6]:
import os
from dotenv import load_dotenv

load_dotenv("api.env")
INDOX_API_KEY = os.getenv("INDOX_API_KEY")
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# from indoxJudge import Evaluator
from indoxJudge.models import IndoxApi
# from indoxJudge.models import OpenAi
# from indoxJudge.metrics import bias, fairness, accuracy, precision, recall, f1_score, roc_auc_score, confusion_matrix
model = IndoxApi(api_key=INDOX_API_KEY)

In [7]:
query = "What are the main benefits and drawbacks of remote work?"
retrieval_context = [
        "Remote work allows employees to work from anywhere, reducing the need for commuting. However, it may lead to a sense of isolation and reduced team cohesion.",    "Studies have shown that remote work can lead to increased productivity in some cases. However, it can also result in longer working hours and a blurred line between work and personal life.",    "Managers often believe that remote workers are less productive, which can result in biased evaluations and reduced career advancement opportunities for these employees.",    "Some people, such as parents with young children, find remote work challenging due to the need to balance work and family responsibilities.",    "While remote work offers flexibility, it is not suitable for all types of jobs, particularly those requiring high levels of collaboration or access to specialized equipment."]
response ="Remote work offers flexibility and reduced commute time, which can lead to a better work-life balance. However, it can also lead to isolation, reduced collaboration, and a lack of clear boundaries between work and personal life. Some studies suggest that remote work can reduce productivity, especially in creative fields. There is also a concern about managers having biased views towards remote workers, assuming they are less committed or productive. Additionally, certain groups may face challenges, such as parents with young children or individuals with disabilities."

In [8]:
evaluator = RagEvaluator(model, response, retrieval_context, query)

[32mINFO[0m: [1mRagEvaluator initialized with model and metrics.[0m
[32mINFO[0m: [1mModel set for all metrics.[0m


In [None]:
result = evaluator.judge()

[32mINFO[0m: [1mEvaluating metric: Faithfulness[0m
[32mINFO[0m: [1mCompleted evaluation for metric: Faithfulness[0m
[32mINFO[0m: [1mEvaluating metric: AnswerRelevancy[0m
[32mINFO[0m: [1mCompleted evaluation for metric: AnswerRelevancy[0m
[32mINFO[0m: [1mEvaluating metric: ContextualRelevancy[0m
[32mINFO[0m: [1mCompleted evaluation for metric: ContextualRelevancy[0m
[32mINFO[0m: [1mEvaluating metric: GEval[0m


In [6]:
result

{'Faithfulness': {'claims': ['Remote work offers flexibility and reduced commute time.',
   'Remote work can lead to a better work-life balance.',
   'Remote work can lead to isolation, reduced collaboration, and a lack of clear boundaries between work and personal life.',
   'Some studies suggest that remote work can reduce productivity, especially in creative fields.',
   'There is a concern about managers having biased views towards remote workers, assuming they are less committed or productive.',
   'Certain groups may face challenges with remote work, such as parents with young children or individuals with disabilities.'],
  'truths': ['Remote work offers flexibility and reduced commute time.',
   'Remote work can lead to a better work-life balance.',
   'Remote work can lead to isolation.',
   'Remote work can lead to reduced collaboration.',
   'Remote work can lead to a lack of clear boundaries between work and personal life.',
   'Some studies suggest that remote work can redu

In [7]:
scores = evaluator.metrics_score
scores

{'Faithfulness': 0.4,
 'AnswerRelevancy': 1.0,
 'ContextualRelevancy': 1.0,
 'GEval': 0.75,
 'Hallucination': 0.19999999999999996,
 'KnowledgeRetention': 0.0,
 'precision': 0.65,
 'recall': 0.77,
 'f1_score': 0.71,
 'METEOR': 0.57}

In [8]:
from indoxJudge.graph.visualization import Visualization

In [9]:
models = [{'name': 'Model_1',
  'score': 0.50,
  'metrics': {'Faithfulness': 0.55,
   'AnswerRelevancy': 1.0,
   'Bias': 0.45,
   'Hallucination': 0.8,
   'KnowledgeRetention': 0.0,
   'Toxicity': 0.0,
   'precision': 0.64,
   'recall': 0.77,
   'f1_score': 0.70,
   'BLEU': 0.11}},
 {'name': 'Model_2',
  'score': 0.61,
  'metrics': {'Faithfulness': 1.0,
   'AnswerRelevancy': 1.0,
   'Bias': 0.0,
   'Hallucination': 0.8,
   'KnowledgeRetention': 1.0,
   'Toxicity': 0.0,
   'precision': 0.667,
   'recall': 0.77,
   'f1_score': 0.71,
   'BLEU': 0.14}},
 {'name': 'Model_3',
  'score': 0.050,
  'metrics': {'Faithfulness': 1.0,
   'AnswerRelevancy': 1.0,
   'Bias': 0.0,
   'Hallucination': 0.83,
   'KnowledgeRetention': 0.0,
   'Toxicity': 0.0,
   'precision': 0.64,
   'recall': 0.76,
   'f1_score': 0.70,
   'BLEU': 0.10}},
]
viz = Visualization(models, mode='llm')
viz.plot()

Dash app running on http://127.0.0.1:8050/


In [10]:
rag_data = {
    'Precision': 0.85,
    'Recall': 0.78,
    'F1 Score': 0.81,
    'Accuracy': 0.89,
    'METEOR': 0.82,
}
viz = Visualization(rag_data, mode='rag')
viz.plot()

Dash app running on http://127.0.0.1:8050/


In [11]:
safety_data = [
    {
        'name': 'Model A',
        'metrics': {
            'Toxicity': 0.2,
            'Bias': 0.3,
            'Misinformation': 0.1
        },
        'score': 0.8
    },
    {
        'name': 'Model B',
        'metrics': {
            'Toxicity': 0.3,
            'Bias': 0.2,
            'Misinformation': 0.2
        },
        'score': 0.7
    }
]

viz = Visualization(safety_data, mode='safety')
viz.plot()

Dash app running on http://127.0.0.1:8050/
