# PSTUTS_RAG RAG evaluation

Wall of imports first.

In [48]:
import asyncio
import json
import os
from dataclasses import dataclass
from typing import Any, Dict, List
import requests
import chainlit as cl
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_core.language_models import BaseChatModel
from langchain_core.runnables import Runnable
from langchain_openai import ChatOpenAI
from langchain_core.embeddings import Embeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

import pstuts_rag.datastore
import pstuts_rag.rag
from pstuts_rag.loader import load_json_files
import logging

logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("langchain").setLevel(logging.WARNING)

load_dotenv()

def set_api_key_if_not_present(key_name, prompt_message=""):
    if len(prompt_message) == 0:
        prompt_message=key_name
    if key_name not in os.environ or not os.environ[key_name]:
        os.environ[key_name] = getpass.getpass(prompt_message)


set_api_key_if_not_present("OPENAI_API_KEY")


We're going to mimic the structure from `app_simple_rag.py` 

Yes, this should be reused/loaded from a common library, 
but right now let's get it right before it gets pretty.

In [64]:
class RAGChainInstance:
    """
    A class that encapsulates a Retrieval-Augmented Generation (RAG) chain.
    
    This class manages the components needed for a RAG system, including embeddings,
    vector store, document storage, and the chain itself. It provides methods to
    initialize and build the RAG chain from JSON data.
    
    Attributes:
        embeddings (Embeddings): The embedding model used to convert text to vectors.
        docs (List[Document]): List of documents to be processed.
        qdrant_client (QdrantClient): Client for interacting with Qdrant vector database.
        vector_store (QdrantVectorStore): Vector store for document embeddings.
        datastore_manager (DatastoreManager): Manager for document storage and retrieval.
        rag_factory (RAGChainFactory): Factory for creating RAG chains.
        llm (BaseChatModel): Language model used for generating responses.
        rag_chain (Runnable): The assembled RAG chain.
        name (str): Identifier for this RAG chain instance.
        pointsLoaded (int): Number of data points loaded into the vector store.
    """
    embeddings: Embeddings = None
    docs: List[Document] = []
    qdrant_client: QdrantClient = None
    vector_store: QdrantVectorStore = None
    datastore_manager: pstuts_rag.datastore.DatastoreManager
    rag_factory: pstuts_rag.rag.RAGChainFactory
    llm: BaseChatModel
    rag_chain: Runnable | None = None
    name:str

    pointsLoaded: int = 0

    def __init__(self, name, qdrant_client, llm, embeddings) -> None:
        """
        Initialize a new RAG chain instance.
        
        Args:
            name (str): Identifier for this RAG chain instance.
            qdrant_client (QdrantClient): Client for Qdrant vector database.
            llm (BaseChatModel): Language model for response generation.
            embeddings (Embeddings): Embedding model for text vectorization.
        """
        self.name = name
        self.qdrant_client = qdrant_client
        self.llm = llm
        self.embeddings = embeddings
        
        
    async def build_chain(self, json_payload:List[Dict[str,Any]]):
        """
        Build the RAG chain using the provided JSON data.
        
        This method initializes the datastore manager, populates the database if empty,
        creates the RAG factory, and assembles the final RAG chain.
        
        Args:
            json_payload (List[Dict[str,Any]]): List of JSON documents to be processed.
            
        Returns:
            Runnable: The assembled RAG chain ready for invocation.
        """
        

        self.datastore_manager = pstuts_rag.datastore.DatastoreManager(
                qdrant_client=self.qdrant_client, name=self.name
            )
            
        if self.datastore_manager.count_docs() == 0:
            self.pointsLoaded = await self.datastore_manager.populate_database(
                    raw_docs=json_payload)
            

        self.rag_factory = pstuts_rag.rag.RAGChainFactory(
            retriever=self.datastore_manager.get_retriever()
        )
        self.rag_chain = self.rag_factory.get_rag_chain(self.llm)   
        return self.rag_chain



Raw data is now stored on huggingface, so we can download it directly.

In [45]:
import pstuts_rag.loader

url = "https://huggingface.co/datasets/mbudisic/PsTuts-VQA/raw/main/train.json"
resp = requests.get(url)
resp.raise_for_status()
group = url.split('/')[-1].split('.')[0]
docs_json = pstuts_rag.loader.load_json_string(resp.content.decode('utf-8'), group)



Now, let's create the base chain.

In [43]:
from langchain_openai import OpenAIEmbeddings
from sentence_transformers import SentenceTransformer

model_id = "Snowflake/snowflake-arctic-embed-l"
base_embedding = SentenceTransformer(model_id)

qdrant_client = QdrantClient(":memory:")

base_rag = RAGChainInstance(name="base",
                              qdrant_client=qdrant_client,
                              llm=ChatOpenAI(model="gpt-4.1-nano"),
                              embeddings=base_embedding)



2025-05-18 00:47:02 - Use pytorch device_name: cuda:0
2025-05-18 00:47:02 - Load pretrained SentenceTransformer: Snowflake/snowflake-arctic-embed-l
2025-05-18 00:47:03 - 1 prompts are loaded, with the keys: ['query']


Now, let's populate the datastore of the first chain and create the chain.


In [None]:
_ = await base_rag.build_chain(docs_json)

<built-in function repr>


RunnableLambda(itemgetter('question'))
| {
    context: VectorStoreRetriever(tags=['QdrantVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_qdrant.qdrant.QdrantVectorStore object at 0x725adf8d4f90>, search_kwargs={'k': 2}),
    question: RunnablePassthrough()
  }
| {
    input: RunnablePassthrough(),
    answer: ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful and friendly Photoshop expert.\n\nYour job is to answer user questions based **only** on transcript excerpts from training videos. These transcripts include **timestamps** that indicate when in the video the information was spoken.\n\nThe transcript is from **spoken audio**, so it may include informal phrasing, filler words, or fragmented sentences. You may interpret meaning **only to the extent it is clearly implied**, but you mu

Testing it out...

In [52]:
response = base_rag.rag_chain.invoke({"question":"What is a layer?"})
response.pretty_print()


A layer is a level in the stacking order of your project that can contain different elements like images, colors, or adjustments. Making a new layer is helpful for organizing your work, and you can change its order, opacity, or delete it as needed (00:01:41).
**REFERENCES**
[
  {
    "title": "Learn layer basics",
    "source": "https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/01a575ae-f8b7-486c-987b-bcb4f2f4e57d/3868e305-c73c-4931-82a0-5e46f5eb41e5_20170727011800.1280x720at2400_h264.mp4",
    "start": 141.29,
    "stop": 156.87
  },
  {
    "title": "Unlock the Background layer",
    "source": "https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/696245e0-aaad-42df-b48f-8b44b1f5211a/22729011-a533-48a4-a7a2-0b5f86d4eedd_20170727011751.1280x720at2400_h264.mp4",
    "start": 113.65,
    "stop": 227.99
  }
]


Formal evaluation goes through the "golden" dataset also stored 
on HF.

We're going to evaluate only on a portion of it.


In [70]:
from datasets import load_dataset
from ragas import EvaluationDataset

golden_small_hf = load_dataset("mbudisic/pstuts_rag_qa",split="train[:10]")
golden_small_base = EvaluationDataset.from_hf_dataset(golden_small_hf)

golden_small_base

EvaluationDataset(features=['user_input', 'reference_contexts', 'reference'], len=10)

In [68]:
def apply_rag_chain_inplace(rag_chain, ragas_ds):
    for item in ragas_ds:
        # Create a TestsetItem for each row in the dataset
        response = rag_chain.invoke({"question":item.user_input})
        item.response = response.content
        item.retrieved_contexts = [context.page_content for context in response.additional_kwargs['context']]

In [71]:
apply_rag_chain_inplace(base_rag.rag_chain, golden_small_base)

In [72]:
golden_small_base.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference
0,how i use adobe photoshop creative cloud for d...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,To use Perspective Warp in Photoshop CC:\n\n1....,"in adobe photoshop creative cloud, to use pers..."
1,wut is Adobee Photoshoop Cretive Cloud?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...
2,"As a beginner Photoshop user, can you explain ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,The Perspective Warp feature in Adobe Photosho...,Adobe Photoshop Creative Cloud's Perspective W...
3,Who is PhotoSpin in relation to the image used...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,PhotoSpin is the company that took the photogr...,PhotoSpin is the company that took the photogr...
4,"How you use Perspective Warp in Photoshop, wha...","[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop let you change t...
5,What does the Perspective Warp feature in Phot...,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,The Perspective Warp feature in Photoshop allo...,Perspective Warp in Photoshop allows you to ch...
6,As a Photoshop trainer developing step-by-step...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,To use the Perspective Warp feature effectivel...,The new Perspective Warp feature in Adobe Phot...
7,wut is adobee fotoshop cretive clowd?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...
8,Wut duz Perspectiv Warp do in Photoshop?,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspectiv Warp in Photoshop allows you to re-...,Perspective Warp in Photoshop lets yu change t...
9,"How can I, as a Photoshop trainer, explain to ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,"In the Perspective Warp tutorial, the trainer ...","In the Perspective Warp tutorial, the image us..."


Since we now have the dataset, let's run it through evalutors.

In [39]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))

In [73]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

result_base = evaluate(
    dataset=golden_small_base,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)


Evaluating:   0%|          | 0/60 [00:00<?, ?it/s]

In [75]:
result_base.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,context_recall,faithfulness,factual_correctness(mode=f1),answer_relevancy,context_entity_recall,noise_sensitivity(mode=relevant)
0,how i use adobe photoshop creative cloud for d...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,To use Perspective Warp in Photoshop CC:\n\n1....,"in adobe photoshop creative cloud, to use pers...",0.3,0.333333,0.48,0.0,0.2,0.066667
1,wut is Adobee Photoshoop Cretive Cloud?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...,1.0,1.0,0.8,0.876445,1.0,0.166667
2,"As a beginner Photoshop user, can you explain ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,The Perspective Warp feature in Adobe Photosho...,Adobe Photoshop Creative Cloud's Perspective W...,0.4,0.75,0.47,0.933397,0.222222,0.0
3,Who is PhotoSpin in relation to the image used...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,PhotoSpin is the company that took the photogr...,PhotoSpin is the company that took the photogr...,1.0,0.5,0.67,0.914422,0.5,0.0
4,"How you use Perspective Warp in Photoshop, wha...","[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop let you change t...,0.25,0.666667,0.33,0.959503,0.5,0.333333
5,What does the Perspective Warp feature in Phot...,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,The Perspective Warp feature in Photoshop allo...,Perspective Warp in Photoshop allows you to ch...,0.5,0.333333,0.75,0.982226,0.5,0.0
6,As a Photoshop trainer developing step-by-step...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,To use the Perspective Warp feature effectivel...,The new Perspective Warp feature in Adobe Phot...,0.272727,0.923077,0.61,0.91709,0.4,0.461538
7,wut is adobee fotoshop cretive clowd?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...,1.0,0.857143,0.6,0.816509,1.0,0.285714
8,Wut duz Perspectiv Warp do in Photoshop?,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspectiv Warp in Photoshop allows you to re-...,Perspective Warp in Photoshop lets yu change t...,1.0,0.5,0.5,0.926159,1.0,0.333333
9,"How can I, as a Photoshop trainer, explain to ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,"In the Perspective Warp tutorial, the trainer ...","In the Perspective Warp tutorial, the image us...",1.0,0.583333,0.47,0.910802,0.333333,0.25


In [121]:
from pstuts_rag.stats import with_summary

In [122]:
summary_base = with_summary(result_base.to_pandas())
print( summary_base.select_dtypes(include="number") \
               .loc["Mean"] )
print( summary_base.select_dtypes(include="number") \
.loc["StdDev"])

user_input                               NaN
retrieved_contexts                       NaN
reference_contexts                       NaN
response                                 NaN
reference                                NaN
context_recall                      0.672273
faithfulness                        0.644689
factual_correctness(mode=f1)        0.568000
answer_relevancy                    0.823655
context_entity_recall               0.565556
noise_sensitivity(mode=relevant)    0.189725
Name: Mean, dtype: float64
user_input                               NaN
retrieved_contexts                       NaN
reference_contexts                       NaN
response                                 NaN
reference                                NaN
context_recall                      0.111424
faithfulness                        0.074534
factual_correctness(mode=f1)        0.045748
answer_relevancy                    0.092614
context_entity_recall               0.100662
noise_sensitivity(mode=relev