<a href="https://colab.research.google.com/github/safakatakancelik/rag-system-00-and-eval/blob/master/evaluation/RAG_Evaluation_with_Ragas_%2B_Ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [None]:
# Install Ollama and essential libraries
!curl -fsSL https://ollama.com/install.sh | sh
!pip install ragas langchain_ollama datasets langchain_community -q

import subprocess
import time

# Start Ollama server in the background
subprocess.Popen(['ollama', 'serve'])
time.sleep(5) # Give it a moment to wake up

# Pull the smarter judge model
!ollama pull qwen2.5:7b
!ollama pull nomic-embed-text

### Prepare the eval dataset

In [None]:
import pandas as pd
import numpy as np
from datasets import Dataset

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/safakatakancelik/rag-system-00-and-eval/refs/heads/master/evaluation/test_responses.csv")

### Run Evaluation

In [None]:
import ast
df['contexts'] = df['contexts'].apply(ast.literal_eval) # typecast to list to prevent eval pipeline error
eval_dataset = Dataset.from_pandas(df)

In [None]:
import os
from openai import OpenAI
from ragas.llms import llm_factory
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness, context_recall
from ragas.run_config import RunConfig
from langchain_ollama import OllamaEmbeddings

# Initialize OpenAI-compatible client for Ollama
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

# Use the llm_factory wrapper for the LLM
judge_llm = llm_factory("qwen2.5:7b", provider="openai", client=client)

# Use the OllamaEmbeddings from langchain
judge_embeddings = OllamaEmbeddings(model="nomic-embed-text")

# Run Configurations to prevent timeouts and parsing errors
config = RunConfig(max_workers=1, timeout=600, max_retries=3)

# Run Evaluation
print("Evaluating with Qwen 2.5 7B & Nomic Embeddings...")
results = evaluate(
    eval_dataset,
    metrics=[faithfulness, answer_correctness, context_recall],
    llm=judge_llm,
    embeddings=judge_embeddings,
    run_config=config
)

In [None]:
# Export the evaluation results
results_df = pd.DataFrame.from_dict([results])
results_df.to_csv("EvaluationResults.csv")