Imports

In [16]:
import json
from typing import List, Dict, Tuple
from ragas import SingleTurnSample
from ragas.metrics import NonLLMContextPrecisionWithReference


  from .autonotebook import tqdm as notebook_tqdm


Read Ground truth and get var

In [7]:
path = r"C:/Users/NCABALLERO/OneDrive - HBK/Thesis/Code/masterThesis/testing/gt.json"

# Load the JSON safely
with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Build a list of tuples: (query, [list of texts])
query_texts_pairs = [
    (
        item["question"].strip(),
        [seg["text"].strip() for seg in item.get("ground_truth_segments", [])]
    )
    for item in data
]

# Example: show the first query and its texts
first_query, first_texts = query_texts_pairs[0]
print("Query:", first_query)
print("Number of ground truth texts:", len(first_texts))
print("First text snippet:", first_texts[0][:200].replace("\n", " "), "...\n")

Query: Which solution should a researcher use if they want High accuracy and data export?
Number of ground truth texts: 17
First text snippet: You can simultaneously measure source (L1) and receiving room levels (L2) by connecting two HBK 2255 Sound Level Meters to the app.  This feature is designed to save time and enhance efficiency, espec ...



read dense json and get question and texts

In [8]:
# Path to your RAG output JSON
path = r"C:/Users/NCABALLERO/OneDrive - HBK/Thesis/Code/masterThesis/testing/retrieval_results.json"

# Load the JSON
with open(path, "r", encoding="utf-8") as f:
    rag_data_dense = json.load(f)

# Build a list of tuples: (query, [list of retrieved texts])
query_retrieved_pairs_dense = [
    (
        query,
        [item["text"].strip() for item in texts]
    )
    for query, texts in rag_data_dense.items()
]

# Example: show the first query and its retrieved texts
first_query, first_texts = query_retrieved_pairs_dense[0]
print("Query:", first_query)
print("Number of retrieved texts:", len(first_texts))
print("First retrieved text snippet:", first_texts[0][:200].replace("\n", " "), "...\n")

Query: Which solution should a researcher use if they want High accuracy and data export?
Number of retrieved texts: 2
First retrieved text snippet: Pros and cons of measurement methods Scans can be faster than measuring at fixed pos- You are able to listen to the sound field as you You can control measurements from outside the room, without intro ...



Same with the sparse

In [10]:
# Path to your BM25 output JSON
path = r"C:/Users/NCABALLERO/OneDrive - HBK/Thesis/Code/masterThesis/testing/retrieval_results_sparse copy.json"

# Load the JSON
with open(path, "r", encoding="utf-8") as f:
    bm25_data = json.load(f)

# Build a list of tuples: (query, [list of retrieved texts])
query_retrieved_pairs_sparse = [
    (
        item["query"],
        [res["window"].strip() for res in item["results"]]
    )
    for item in bm25_data
]

# Example: show the first query and its retrieved texts
first_query, first_texts = query_retrieved_pairs_sparse[0]
print("Query:", first_query)
print("Number of retrieved texts:", len(first_texts))
print("First retrieved text snippet:", first_texts[0][:200].replace("\n", " "), "...\n")

Query: Which solution should a researcher use if they want High accuracy and data export?
Number of retrieved texts: 2
First retrieved text snippet: It is possible to override all data.  Adjust  the slope of the decay, if needed, or edit data to test theories  about the effects of changes you can make to get specific results.   Data edited in this ...



LLM responses

In [15]:
path = r"C:/Users/NCABALLERO/OneDrive - HBK/Thesis/Code/masterThesis/testing/chat_logs.json"

dense_pairs = []
sparse_pairs = []

for item in data:
    query = item.get("query", "").strip()
    mode = item.get("mode", "").strip().lower()  # normalize to lowercase
    
    if mode == "dense rag":
        response = item.get("response", "").strip()
        dense_pairs.append((query, response))
    elif mode == "sparse rag":
        results = item.get("results", [])
        texts = [res.get("window", "").strip() for res in results]  # even if empty
        sparse_pairs.append((query, texts))

print("Dense pairs found:", len(dense_pairs))
print("Sparse pairs found:", len(sparse_pairs))


Dense pairs found: 18
Sparse pairs found: 18


### Context Precision

In [None]:
query_retrieved_pairs_sparse[0]

[('Which solution should a researcher use if they want High accuracy and data export?',
  ['It is possible to override all data.  Adjust \nthe slope of the decay, if needed, or edit data to test theories \nabout the effects of changes you can make to get specific results. \n Data edited in this way is marked so the manual changes are \nvisible.  Should you want to remove the edits, the original data will \nstill be available.\n Export the data.  To perform custom analysis or check \ncalculations, or to upload your data to a database, you can export \ndata to Microsoft Excel®.\n Create reports.',
   '3.  Specify the name and location of the export file in the Destination file field.\n 4.  Choose the format into which you want to export the measurement data:\nFile extension *.xlsx is compatible with Microsoft Excel 2007 and newer.\n l Tab separated values (*.txt)\n5.  If you are exporting to a workbook, you have the option to use a master file.\n When you export a measurement to a workbo

In [None]:
context_precision = NonLLMContextPrecisionWithReference()

sample = SingleTurnSample(
    retrieved_contexts=,
    reference_contexts=["Paris is the capital of France.", "The Eiffel Tower is one of the most famous landmarks in Paris."]
)

await context_precision.single_turn_ascore(sample)