<a href="https://colab.research.google.com/github/safakatakancelik/rag-system-00-and-eval/blob/master/evaluation/RAG_Evaluation_with_Ragas_%2B_Ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Install Ollama and essential libraries
!curl -fsSL https://ollama.com/install.sh | sh
!pip install ragas langchain_ollama datasets langchain_community -q

import subprocess
import time

# Start Ollama server in the background
subprocess.Popen(['ollama', 'serve'])
time.sleep(5) # Give it a moment to wake up

# Pull the smarter judge model
!ollama pull qwen2.5:7b
!ollama pull nomic-embed-text

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25

In [2]:
import pandas as pd
from datasets import Dataset

# Sample data representing 10 RAG operations
data = {
    "question": [
        "What is the capital of France?",
        "Who wrote '1984'?",
        "What is the boiling point of water?",
        "Which planet is known as the Red Planet?",
        "What is the largest ocean on Earth?",
        "Who painted the Mona Lisa?",
        "What is the speed of light?",
        "What is the chemical symbol for gold?",
        "In which year did World War II end?",
        "What is the tallest mountain in the world?"
    ],
    "contexts": [
        ["Paris is the capital and most populous city of France."],
        ["George Orwell wrote the dystopian novel 1984."],
        ["The boiling point of water is 100 degrees Celsius at sea level."],
        ["Mars is often referred to as the Red Planet due to its reddish appearance."],
        ["The Pacific Ocean is the largest and deepest of Earth's oceanic divisions."],
        ["Leonardo da Vinci is the artist behind the Mona Lisa."],
        ["The speed of light in a vacuum is approximately 299,792,458 meters per second."],
        ["Au is the chemical symbol for gold, derived from the Latin word aurum."],
        ["World War II officially ended on September 2, 1945."],
        ["Mount Everest is Earth's highest mountain above sea level."]
    ],
    "answer": [
        "The capital of France is Paris.",
        "The book 1984 was written by George Orwell.",
        "Water boils at 100°C.",
        "Mars is the Red Planet.",
        "The Pacific Ocean is the largest.",
        "Mona Lisa was painted by Leonardo da Vinci.",
        "Light travels at about 300,000 km/s.",
        "Gold's symbol is Au.",
        "WWII ended in 1945.",
        "Mount Everest is the tallest peak."
    ],
    "ground_truth": [
        "Paris",
        "George Orwell",
        "100°C",
        "Mars",
        "Pacific Ocean",
        "Leonardo da Vinci",
        "299,792,458 m/s",
        "Au",
        "1945",
        "Mount Everest"
    ]
}

# Convert to a Hugging Face Dataset (required by Ragas)
eval_dataset = Dataset.from_dict(data)

In [None]:
import os
from openai import OpenAI
from ragas.llms import llm_factory
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness, context_recall
from ragas.run_config import RunConfig
from langchain_ollama import OllamaEmbeddings # Import this directly

# 1. Initialize modern OpenAI-compatible client for Ollama
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

# 2. Use the factory for the LLM
judge_llm = llm_factory("qwen2.5:7b", provider="openai", client=client)

# 3. Use LangChain's OllamaEmbeddings directly
# Ragas 0.2 automatically handles these objects when passed to evaluate()
judge_embeddings = OllamaEmbeddings(model="nomic-embed-text")

# 4. Configure to prevent timeouts and parsing errors
config = RunConfig(max_workers=1, timeout=600, max_retries=3)

# 5. Run Evaluation
print("Evaluating with Qwen 2.5 7B & Nomic Embeddings...")
results = evaluate(
    eval_dataset,
    metrics=[faithfulness, answer_correctness, context_recall],
    llm=judge_llm,
    embeddings=judge_embeddings, # Pass the object directly
    run_config=config
)

In [8]:
results

{'faithfulness': 0.9000, 'answer_correctness': 0.8801, 'context_recall': 1.0000}