In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.llms.ollama import Ollama

In [4]:
!mkdir  -p ../data
!wget "https://arxiv.org/pdf/2405.00247.pdf" -O "../data/non_traditional_credentials.pdf"

--2025-05-12 14:01:35--  https://arxiv.org/pdf/2405.00247.pdf
Resolving arxiv.org (arxiv.org)... 151.101.195.42, 151.101.131.42, 151.101.67.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.195.42|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://arxiv.org/pdf/2405.00247 [following]
--2025-05-12 14:01:35--  http://arxiv.org/pdf/2405.00247
Connecting to arxiv.org (arxiv.org)|151.101.195.42|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1524369 (1.5M) [application/pdf]
Saving to: ‘../data/non_traditional_credentials.pdf’


2025-05-12 14:01:35 (24.5 MB/s) - ‘../data/non_traditional_credentials.pdf’ saved [1524369/1524369]



In [5]:
docs = SimpleDirectoryReader("../data/").load_data(show_progress=True)

Loading files: 100%|██████████| 1/1 [00:00<00:00,  1.94file/s]


In [6]:
data_gen = RagDatasetGenerator.from_documents(
    docs,
    llm= Ollama("qwen2.5"),
    question_gen_query="You are a teacher/professor. Using the provided context, formulat a single question and its answer",
    num_questions_per_chunk=10
)

In [8]:
qa_dataset = data_gen.generate_dataset_from_nodes()



In [None]:
qa_dataset.examples

[LabelledRagDataExample(query='**Question:** What was the observed effect of encouraging learners to share their MOOC credentials on their job outcomes according to the study?', query_by=CreatedBy(model_name='qwen2.5', type=<CreatedByType.AI: 'ai'>), reference_contexts=['The value of non-traditional credentials in the labor market*\nSusan Athey & Emil Palikot\nMay 2, 2024\nAbstract\nThis study investigates the labor market value of credentials obtained from Massive Open On-\nline Courses (MOOCs) and shared on business networking platforms. We conducted a random-\nized experiment involving more than 800,000 learners, primarily from developing countries and\nwithout college degrees, who completed technology or business-related courses on the Coursera\nplatform between September 2022 and March 2023. The intervention targeted learners who had\nrecently completed their courses, encouraging them to share their credentials and simplifying the\nsharing process. One year after the intervention,

In [15]:
import json

def serialize_to_jsonl(examples, out_path="train.jsonl"):
    """
    examples: list of LabelledRagDataExample,
              each with .query (str) and .reference_answer (str)
    out_path:  path to write the JSONL file
    """
    def strip_prefix(text):
        # remove leading **Question:** or **Answer:** if present
        for p in ("**Question:**", "**Answer:**"):
            if text.strip().startswith(p):
                return text.strip()[len(p):].strip()
        return text

    with open(out_path, "w", encoding="utf8") as f:
        for ex in examples:
            q_raw = ex.query or ""
            a_raw = getattr(ex, "reference_answer", None)
            # only serialize if this is a 'Question' example and has an answer
            if q_raw.lower().startswith("**question") and a_raw:
                q = strip_prefix(q_raw)
                a = a_raw.strip()
                obj = {
                    "messages": [
                        {"role": "user",      "content": q},
                        {"role": "assistant", "content": a}
                    ]
                }
                f.write(json.dumps(obj, ensure_ascii=False) + "\n")

In [16]:
serialize_to_jsonl(qa_dataset.examples)

## Evaluate RAG

In [23]:
from llama_index.embeddings.ollama import OllamaEmbedding

embed_model = OllamaEmbedding(model_name="nomic-embed-text")

In [24]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(docs, embed_model=embed_model)
query_engine = index.as_query_engine(similarity_top_k=6, llm = Ollama("qwen2.5"))

In [27]:
from llama_index.core.llama_pack import download_llama_pack

RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./pack")
rag_evaluator = RagEvaluatorPack(
    query_engine=query_engine, 
    rag_dataset=qa_dataset,
    judge_llm=Ollama("qwen3"),
    embed_model=OllamaEmbedding(model_name="nomic-text-embed")
)

Processing /Users/tituslim/Documents/Personal Learning Folder/Personal Projects/ideal-palm-tree/notebooks/pack
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: llama-index-packs-rag-evaluator
  Building wheel for llama-index-packs-rag-evaluator (pyproject.toml): started
  Building wheel for llama-index-packs-rag-evaluator (pyproject.toml): finished with status 'done'
  Created wheel for llama-index-packs-rag-evaluator: filename=llama_index_packs_rag_evaluator-0.3.0-py3-none-any.whl size=4929 sha256=5e36c5b106ef33696bbfafe9ac35b343b71542a4411152f9f15bbac843b5b52a
  Stored in directory: /private/var/folders/zb/r15p7t_d62d8m2s0623s22gh0000gn/T/pip-ephem-wheel-cache-m0y2_dpg

In [None]:
benchmark_df = await rag_evaluator.run()

100%|██████████| 10/10 [01:43<00:00, 10.30s/it]
100%|██████████| 10/10 [01:28<00:00,  8.83s/it]
100%|██████████| 10/10 [02:57<00:00, 17.76s/it]
100%|██████████| 10/10 [02:47<00:00, 16.78s/it]
100%|██████████| 10/10 [01:32<00:00,  9.22s/it]
 30%|███       | 3/10 [00:33<01:31, 13.03s/it]