# Generating a Testset
Based on UCL Computer Science Handbook

In [7]:
# Alternative: Direct AI Foundry Configuration

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import AzureChatOpenAI, OpenAIEmbeddings
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# AI Foundry deployment details (from your deployment info)
endpoint = "https://llm-benchmarking.cognitiveservices.azure.com/"
deployment = "gpt-4.1-mini"
api_version = "2024-12-01-preview"

# Get API key from environment variable
subscription_key = os.environ.get("AZURE_OPENAI_API_KEY_4_1")

# Create Azure LLM for your AI Foundry deployment
azure_llm = AzureChatOpenAI(
    azure_deployment=deployment,
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
    temperature=1.0,  # Ragas recommends temperature=1.0 for testset generation
    max_tokens=4000
)

# Wrap for Ragas
generator_llm = LangchainLLMWrapper(azure_llm)

# For embeddings, you can use OpenAI's embedding models
# If you have an embedding model deployed in AI Foundry, you can use that instead
generator_embeddings = LangchainEmbeddingsWrapper(
    OpenAIEmbeddings(
        model="text-embedding-3-small"
        # API key is loaded from environment variables
    )
)

print("✅ AI Foundry LLM and embeddings configured successfully!")
print(f"📡 Using Azure endpoint: {endpoint}")
print(f"🤖 Using deployment: {deployment}")
print(f"📊 Model version: 2025-04-14 (from your deployment info)")


✅ AI Foundry LLM and embeddings configured successfully!
📡 Using Azure endpoint: https://llm-benchmarking.cognitiveservices.azure.com/
🤖 Using deployment: gpt-4.1-mini
📊 Model version: 2025-04-14 (from your deployment info)


  generator_embeddings = LangchainEmbeddingsWrapper(


In [8]:
from langchain_community.document_loaders import DirectoryLoader

path = "../../data/cs-handbook"
loader = DirectoryLoader(path, glob="**/*.md")
docs = loader.load()
docs

In [None]:
# load the documents
from langchain_community.document_loaders import TextLoader

path = "/Users/Malik/code/malikbou/ucl/thesis/on-premise-slm/data/cs-handbook.md"
loader = TextLoader(path)
docs = loader.load()
docs

In [9]:
# create a knowledge graph
from ragas.testset.graph import KnowledgeGraph
from ragas.testset.graph import Node, NodeType

kg = KnowledgeGraph()

for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={"page_content": doc.page_content, "document_metadata": doc.metadata}
        )
    )

kg

KnowledgeGraph(nodes: 24, relationships: 0)

In [10]:
from ragas.testset.transforms import apply_transforms
from ragas.testset.transforms import HeadlinesExtractor, HeadlineSplitter, KeyphrasesExtractor

# headline_extractor = HeadlinesExtractor(llm=generator_llm, max_num=20)
headline_extractor = HeadlinesExtractor(llm=generator_llm)
headline_splitter = HeadlineSplitter(max_tokens=1500)
keyphrase_extractor = KeyphrasesExtractor(llm=generator_llm)

transforms = [
    headline_extractor,
    headline_splitter,
    keyphrase_extractor
]

apply_transforms(kg, transforms=transforms)

Applying KeyphrasesExtractor:  40%|███▉      | 37/93 [00:06<00:08,  6.56it/s]Property 'keyphrases' already exists in node 'f207be'. Skipping!
Applying KeyphrasesExtractor:  81%|████████  | 75/93 [00:11<00:02,  8.55it/s]Property 'keyphrases' already exists in node '4b4f05'. Skipping!
Property 'keyphrases' already exists in node '2db82a'. Skipping!
Applying KeyphrasesExtractor:  96%|█████████▌| 89/93 [00:13<00:00,  8.72it/s]Property 'keyphrases' already exists in node '242e83'. Skipping!
                                                                             

In [11]:
# Let's see what happens to the kg after it's done
kg

KnowledgeGraph(nodes: 93, relationships: 110)

In [12]:
# Configure personas for UCL Computer Science Handbook testset generation
from ragas.testset.persona import Persona

# Define UCL-specific student personas based on common scenarios
personas = [
    Persona(
        name="injured_undergraduate",
        role_description="An undergraduate student who has hurt themselves and wants to know how to get an extension or support for missed deadlines."
    ),
    Persona(
        name="struggling_postgraduate",
        role_description="A Master's student who is worried about failing or missing coursework and needs clarity on resits, condonement, or progression rules."
    ),
    Persona(
        name="international_student",
        role_description="An international student concerned about visa requirements, travel, and how academic issues like resits or deferrals may affect their status."
    ),
    Persona(
        name="disabled_student",
        role_description="A student with a disability or long-term condition seeking information about reasonable adjustments, SORA, and who to contact for support."
    ),
    Persona(
        name="module_choice_student",
        role_description="A student unsure about how to select optional modules, late registration, or interdisciplinary modules, and wants guidance on the process."
    ),
    Persona(
        name="worried_exam_candidate",
        role_description="A student anxious about exams, asking about timetable clashes, exam formats, or what happens if they are ill during exams."
    ),
    Persona(
        name="faculty_member",
        role_description="A staff or faculty member who wants to check policies about communication with students, extenuating circumstances procedures, or progression rules."
    ),
    Persona(
        name="everyday_undergraduate",
        role_description="A first-year undergraduate who has everyday concerns such as where to find timetables, how to contact admin staff, or how teaching is structured."
    ),
]

In [13]:
from ragas.testset.synthesizers.single_hop.specific import (
    SingleHopSpecificQuerySynthesizer,
)

query_distibution = [
    (
        SingleHopSpecificQuerySynthesizer(llm=generator_llm, property_name="headlines"),
        0.5,
    ),
    (
        SingleHopSpecificQuerySynthesizer(
            llm=generator_llm, property_name="keyphrases"
        ),
        0.5,
    ),
]

In [14]:
# setup testset generator

from ragas.testset import TestsetGenerator

generator = TestsetGenerator(
    llm=generator_llm,
    embedding_model=generator_embeddings,
    knowledge_graph=kg,
    persona_list=personas,
)

In [15]:
testset = generator.generate(testset_size=10, query_distribution=query_distibution)
testset.to_pandas()

Generating Scenarios: 100%|██████████| 2/2 [00:30<00:00, 15.29s/it]
Generating Samples: 100%|██████████| 10/10 [00:17<00:00,  1.73s/it]


Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,"Could you please explain what is meant by ""10....",[10. Module information & selection\n\nAdvice ...,"""10.4 Module information"" refers to details ab...",single_hop_specific_query_synthesizer
1,hw can i get my transcrips and degree certifca...,[23. After study\n\nInformation on confirmatio...,"Students may request transcripts, degree certi...",single_hop_specific_query_synthesizer
2,Could you please explain what support is avail...,[15. Student support and wellbeing\n\nInformat...,"The 15.3 Equity, diversity, and inclusion supp...",single_hop_specific_query_synthesizer
3,can u tell me how 13.2 Feedback works and when...,[13. Assessment and feedback\n\nInformation on...,13.2 Feedback explains that students should ge...,single_hop_specific_query_synthesizer
4,wher can i find 4. Key dates and comunication?,[4. Key dates and communication\n\nAn overview...,"The section titled ""4. Key dates and communica...",single_hop_specific_query_synthesizer
5,"What are compulsory modules, and how do they d...",[10. Module information & selection Advice on ...,Compulsory modules are essential to a programm...,single_hop_specific_query_synthesizer
6,What does approval by the Module Leader and Pr...,[10.3 Module selection 10.3.1 Choosing modules...,All module selections are ultimately subject t...,single_hop_specific_query_synthesizer
7,who i ask for academic advice on module select...,[10.4 Module information Information about mod...,"For academic advice on module selection, such ...",single_hop_specific_query_synthesizer
8,Where can I find information on confirmation o...,[23. After study Information on confirmation o...,Information on confirmation of student status ...,single_hop_specific_query_synthesizer
9,how can i get academic reference plz?,[23.1 Summary of options for confirmation of s...,An academic reference is a personal academic r...,single_hop_specific_query_synthesizer


In [16]:
from datetime import datetime
import json
import os

# Save dataset as json for rag eval
# Navigate to project root (two levels up from src/testset/)
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
out_dir = os.path.join(project_root, "data", "testset")
os.makedirs(out_dir, exist_ok=True)

print(f"💾 Saving to: {out_dir}")

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_slug = deployment.replace(":", "_").replace("/", "_")
out_path = os.path.join(
    out_dir,
    f"cs-handbook_single_hop_testset_{model_slug}_{timestamp}.json"
)

with open(out_path, "w", encoding="utf-8") as f:
    json.dump(testset.to_list(), f, indent=2, ensure_ascii=False)

print(f"\n🎉 Success! Saved {len(testset.to_list())} high-quality single-hop samples to {out_path}")

# Show sample questions
if testset.to_list():
    print("\n📋 Sample Questions Generated:")
    for i, sample in enumerate(testset.to_list()[:3], 1):
        print(f"  {i}. {sample['user_input']}")


💾 Saving to: /Users/Malik/code/malikbou/ucl/thesis/on-premise-slm/data/testset

🎉 Success! Saved 10 high-quality single-hop samples to /Users/Malik/code/malikbou/ucl/thesis/on-premise-slm/data/testset/cs-handbook_single_hop_testset_gpt-4.1-mini_20250911_150427.json

📋 Sample Questions Generated:
  1. Could you please explain what is meant by "10.4 Module information" and how it can help me as a struggling postgraduate student in making informed module choices?
  2. hw can i get my transcrips and degree certifcate from UCL?
  3. Could you please explain what support is available under the 15.3 Equity, diversity, and inclusion support at UCL, and how the university and the Computer Science department promote equity, diversity, and inclusion for students?
