In [2]:
from raga_llm_hub import RagaLLMEval

evaluator = RagaLLMEval(
    api_keys={"OPENAI_API_KEY": ""}
)

  from .autonotebook import tqdm as notebook_tqdm



🌟 Welcome to RagaLLMHub! 🌟
The most comprehensive LLM (Large Language Models) testing library at your service.

Launching Evaluation: '1d5f0018-c740-4fdd-8d1f-992c99feba7f'
Keep this identifier handy for tracking your progress!



In [4]:
# Add tests with custom data
evaluator.add_test(
    test_names=["relevancy_test"], # an example evaluation test name
    data={
        "prompt": ["How are you?"],
        "context": ["You are a student, answering your teacher."],
        "response": ["I am fine. Thank you"],
    },
    arguments={"model": "gpt-4", "threshold": 0.6}, # Arguments
).run() # executes the test

evaluator.print_results() # print test results in a pretty format


🚀 Starting execution of 1 tests...

🔍 Test 1 of 1: relevancy_test starts...
🚫 Error while running the tests. Resetting...
Test run details saved/updated under eval name '1d5f0018-c740-4fdd-8d1f-992c99feba7f' with timestamp 2024-06-23T18:30:21.767590.


ValueError: 🚫 No results to print.

In [5]:
# Chunk Impact Test
contexts = [
    [
        "Leonardo da Vinci's engineering designs were visionary, encompassing ideas for flying machines, military weaponry, and architectural innovations. While many of his inventions were not realized in his lifetime, they continue to inspire scientists and inventors today."
    ],
    [
        "Leonardo's interdisciplinary approach to knowledge and his relentless curiosity exemplify Renaissance humanism, emphasizing the potential of human intellect and creativity. His legacy continues to captivate people worldwide, leaving an enduring mark on Western culture and inspiring generations beyond his death in 1519."
    ],
    [
        "Leonardo da Vinci (1452–1519) was an Italian polymath of the Renaissance period, renowned for his diverse talents in painting, sculpture, architecture, engineering, science, and invention."
    ],
    [
        "Born in Vinci, Italy, in 1452, Leonardo's artistic prowess is epitomized by iconic works such as the Mona Lisa and The Last Supper, which are globally recognized masterpieces."
    ],
    [
        "Apart from his artistic achievements, Leonardo made significant contributions to science, conducting pioneering studies in anatomy, engineering, mathematics, and physics. His anatomical drawings, ahead of their time, remain invaluable to medical science."
    ],
]
response = "Leonardo Da Vinci was born in Vinci, Italy in 1452."

evaluator.add_test(
    test_names=["chunk_impact_test"],
    data={"context": contexts, "response": response},
    arguments={"threshold": 0.6},
).run()

evaluator.print_results()

🚀 Starting execution of 1 tests...

🔍 Test 1 of 1: chunk_impact_test starts...
✅ Test completed: chunk_impact_test.
✨ All tests completed. Total tests executed: 1.
Test run details saved/updated under eval name '1d5f0018-c740-4fdd-8d1f-992c99feba7f' with timestamp 2024-06-23T18:31:49.968647.

📊 Results:

Test Name: chunk_impact_test

+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
|     Test Name     |          Response         |   Score   | Result |           Reason          | Threshold |          Context          |
+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
| chunk_impact_test |   Leonardo Da Vinci was   | 0.6449599 |   ✅   |  Score: 0.764 -> Born in  |    0.60   |  [["Born in Vinci, Italy, |
|                   |  born in Vinci, Italy in  |           |        |   Vinci, Italy, in 1452,  |           

In [6]:
prompt = "Where and when was leonardo da vinci born?"
response = "Leonardo Da Vinci was born in Vinci, Italy in 1452."
expected_response = "In Vinci, in 1452"
context = [
    "Leonardo da Vinci (1452–1519) was an Italian polymath of the Renaissance period, renowned for his diverse talents in painting, sculpture, architecture, engineering, science, and invention.",
    "Born in Vinci, Italy, in 1452, Leonardo's artistic prowess is epitomized by iconic works such as the Mona Lisa and The Last Supper, which are globally recognized masterpieces.",
    "Apart from his artistic achievements, Leonardo made significant contributions to science, conducting pioneering studies in anatomy, engineering, mathematics, and physics. His anatomical drawings, ahead of their time, remain invaluable to medical science.",
    "Leonardo da Vinci's engineering designs were visionary, encompassing ideas for flying machines, military weaponry, and architectural innovations. While many of his inventions were not realized in his lifetime, they continue to inspire scientists and inventors today.",
    "Leonardo's interdisciplinary approach to knowledge and his relentless curiosity exemplify Renaissance humanism, emphasizing the potential of human intellect and creativity. His legacy continues to captivate people worldwide, leaving an enduring mark on Western culture and inspiring generations beyond his death in 1519.",
]
# contextual relevance test
evaluator.add_test(
    test_names=["contextual_relevancy_test"],
    data={
        "prompt": prompt,
        "response": response,
        "expected_response": expected_response,
        "context": context,
    },
    arguments={"model": "gpt-4", "threshold": 0.6},
).run()

evaluator.print_results()

🚀 Starting execution of 1 tests...

🔍 Test 1 of 1: contextual_relevancy_test starts...
🚫 Error while running the tests. Resetting...
Test run details saved/updated under eval name '1d5f0018-c740-4fdd-8d1f-992c99feba7f' with timestamp 2024-06-23T18:34:16.305998.

📊 Results:

Test Name: chunk_impact_test

+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
|     Test Name     |          Response         |   Score   | Result |           Reason          | Threshold |          Context          |
+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
| chunk_impact_test |   Leonardo Da Vinci was   | 0.6449599 |   ✅   |  Score: 0.764 -> Born in  |    0.60   |  [["Born in Vinci, Italy, |
|                   |  born in Vinci, Italy in  |           |        |   Vinci, Italy, in 1452,  |           |    in 1452, Leonardo's    |
|

In [7]:
# Hallucination Test
prompt = "What was the blond doing?"
response = "A blond drinking water in public."
contradiction = "A blond woman is drinking water in public."
context = [
    "A man with blond-hair, and a brown shirt drinking out of a public water fountain."
]
evaluator.add_test(
    test_names="hallucination_test",
    data={
        "prompt": prompt,
        "response": response,
        "context": context,
    },
    arguments={"model": "gpt-4", "threshold": 0.6},
).add_test(
    test_names="hallucination_test",
    data={
        "prompt": prompt,
        "response": contradiction,
        "context": context,
    },
    arguments={"model": "gpt-4", "threshold": 0.6},
).run()

evaluator.print_results()

🚀 Starting execution of 2 tests...

🔍 Test 1 of 2: hallucination_test starts...
🚫 Error while running the tests. Resetting...
Test run details saved/updated under eval name '1d5f0018-c740-4fdd-8d1f-992c99feba7f' with timestamp 2024-06-24T07:25:58.460071.

📊 Results:

Test Name: chunk_impact_test

+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
|     Test Name     |          Response         |   Score   | Result |           Reason          | Threshold |          Context          |
+-------------------+---------------------------+-----------+--------+---------------------------+-----------+---------------------------+
| chunk_impact_test |   Leonardo Da Vinci was   | 0.6449599 |   ✅   |  Score: 0.764 -> Born in  |    0.60   |  [["Born in Vinci, Italy, |
|                   |  born in Vinci, Italy in  |           |        |   Vinci, Italy, in 1452,  |           |    in 1452, Leonardo's    |
|       