# PSTUTS_RAG RAG evaluation

Wall of imports first.

In [1]:
import os
import logging

import requests
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

from qdrant_client import QdrantClient

from pstuts_rag.rag import RAGChainInstance


from dataclasses import dataclass
from datasets import load_dataset
from langsmith import EvaluationResult
from ragas import EvaluationDataset
from pstuts_rag.evaluator_utils import apply_rag_chain_inplace, summary_stats
from pandas import DataFrame
from langchain_core.runnables import Runnable

load_dotenv()

def set_api_key_if_not_present(key_name, prompt_message=""):
    if len(prompt_message) == 0:
        prompt_message=key_name
    if key_name not in os.environ or not os.environ[key_name]:
        os.environ[key_name] = getpass.getpass(prompt_message)


set_api_key_if_not_present("OPENAI_API_KEY")

logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("langchain").setLevel(logging.WARNING)



Raw data is now stored on huggingface, so we can download it directly.

In [2]:
import pstuts_rag.loader

url = "https://huggingface.co/datasets/mbudisic/PsTuts-VQA/raw/main/train.json"
resp = requests.get(url)
resp.raise_for_status()
group = url.split('/')[-1].split('.')[0]
docs_json = pstuts_rag.loader.load_json_string(resp.content.decode('utf-8'), group)



Now, let's create the base chain.

In [3]:
from langchain_openai import OpenAIEmbeddings
from sentence_transformers import SentenceTransformer

model_id = "Snowflake/snowflake-arctic-embed-l"
base_embedding = SentenceTransformer(model_id)

qdrant_client = QdrantClient(":memory:")


In [4]:
from dataclasses import dataclass
from dataclasses import field


@dataclass
class DataGroup:
    rag:RAGChainInstance= field(init=False)  
    dataset:EvaluationDataset= field(init=False)  
    result:EvaluationResult= field(init=False)  
    statistics:DataFrame= field(init=False)  

In [5]:
base = DataGroup()

base.rag = RAGChainInstance(name="base",
                            qdrant_client=qdrant_client,
                            llm=ChatOpenAI(model="gpt-4.1-nano"),
                            embeddings=base_embedding)



Now, let's populate the datastore of the first chain and create the chain.


In [6]:
_ = await base.rag.build_chain(docs_json)

<built-in function repr>


Testing it out...

In [7]:
response = base.rag.rag_chain.invoke({"question":"What is a layer?"})
response.pretty_print()


A layer is like a separate piece of your image that you can work on independently. You can create a new one, rename it, move it around, and change its opacity. It acts as a separate element that you can organize and edit without affecting other parts of your project. (See timestamps 1:41 to 1:56)
**REFERENCES**
[
  {
    "title": "Learn layer basics",
    "source": "https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/01a575ae-f8b7-486c-987b-bcb4f2f4e57d/3868e305-c73c-4931-82a0-5e46f5eb41e5_20170727011800.1280x720at2400_h264.mp4",
    "start": 141.29,
    "stop": 156.87
  },
  {
    "title": "Unlock the Background layer",
    "source": "https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/696245e0-aaad-42df-b48f-8b44b1f5211a/22729011-a533-48a4-a7a2-0b5f86d4eedd_20170727011751.1280x720at2400_h264.mp4",
    "start": 113.65,
    "stop": 227.99
  }
]


Formal evaluation goes through the "golden" dataset also stored 
on HF.

We're going to evaluate only on a portion of it.


In [8]:

golden_small_hf = load_dataset("mbudisic/pstuts_rag_qa",split="train[:10]")

base.dataset = EvaluationDataset.from_hf_dataset(golden_small_hf)

In [9]:
apply_rag_chain_inplace(base.rag.rag_chain, base.dataset )
base.dataset.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference
0,how i use adobe photoshop creative cloud for d...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Here's how you can use Perspective Warp in Pho...,"in adobe photoshop creative cloud, to use pers..."
1,wut is Adobee Photoshoop Cretive Cloud?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,I don't know. This isn’t covered in the traini...,Adobe Photoshop Creative Cloud is a version of...
2,"As a beginner Photoshop user, can you explain ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,The Perspective Warp feature in Adobe Photosho...,Adobe Photoshop Creative Cloud's Perspective W...
3,Who is PhotoSpin in relation to the image used...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,PhotoSpin is the company that took the photogr...,PhotoSpin is the company that took the photogr...
4,"How you use Perspective Warp in Photoshop, wha...","[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop let you change t...
5,What does the Perspective Warp feature in Phot...,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,The Perspective Warp feature in Photoshop allo...,Perspective Warp in Photoshop allows you to ch...
6,As a Photoshop trainer developing step-by-step...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,"Based on the transcript, here’s how the Perspe...",The new Perspective Warp feature in Adobe Phot...
7,wut is adobee fotoshop cretive clowd?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...
8,Wut duz Perspectiv Warp do in Photoshop?,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop lets yu change t...
9,"How can I, as a Photoshop trainer, explain to ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,As a Photoshop trainer explaining the role of ...,"In the Perspective Warp tutorial, the image us..."


Since we now have the dataset, let's run it through evalutors.

In [10]:
from ragas.llms import LangchainLLMWrapper
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))

In [11]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

base.result = evaluate(
    dataset=base.dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)


Evaluating:   0%|          | 0/60 [00:00<?, ?it/s]

In [12]:
base.result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,context_recall,faithfulness,factual_correctness(mode=f1),answer_relevancy,context_entity_recall,noise_sensitivity(mode=relevant)
0,how i use adobe photoshop creative cloud for d...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Here's how you can use Perspective Warp in Pho...,"in adobe photoshop creative cloud, to use pers...",0.3,0.222222,0.42,0.890559,0.6,0.166667
1,wut is Adobee Photoshoop Cretive Cloud?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,I don't know. This isn’t covered in the traini...,Adobe Photoshop Creative Cloud is a version of...,1.0,0.0,0.0,0.0,1.0,0.0
2,"As a beginner Photoshop user, can you explain ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,The Perspective Warp feature in Adobe Photosho...,Adobe Photoshop Creative Cloud's Perspective W...,0.4,0.666667,0.4,0.933397,0.285714,0.0
3,Who is PhotoSpin in relation to the image used...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,PhotoSpin is the company that took the photogr...,PhotoSpin is the company that took the photogr...,1.0,1.0,1.0,0.914422,0.5,0.0
4,"How you use Perspective Warp in Photoshop, wha...","[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop let you change t...,0.25,0.666667,0.46,0.959503,0.5,0.0
5,What does the Perspective Warp feature in Phot...,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,The Perspective Warp feature in Photoshop allo...,Perspective Warp in Photoshop allows you to ch...,0.5,0.666667,0.67,0.982226,1.0,0.25
6,As a Photoshop trainer developing step-by-step...,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,"Based on the transcript, here’s how the Perspe...",The new Perspective Warp feature in Adobe Phot...,0.272727,0.909091,0.36,0.931184,0.4,0.444444
7,wut is adobee fotoshop cretive clowd?,[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,Adobe Photoshop Creative Cloud is a version of...,Adobe Photoshop Creative Cloud is a version of...,1.0,1.0,0.8,0.795962,1.0,0.333333
8,Wut duz Perspectiv Warp do in Photoshop?,"[If I turn it on and off, you can see the befo...",[>> What I want to show you in this video is s...,Perspective Warp in Photoshop allows you to ch...,Perspective Warp in Photoshop lets yu change t...,1.0,0.666667,0.4,0.926159,1.0,0.333333
9,"How can I, as a Photoshop trainer, explain to ...",[>> What I want to show you in this video is s...,[>> What I want to show you in this video is s...,As a Photoshop trainer explaining the role of ...,"In the Perspective Warp tutorial, the image us...",1.0,0.615385,0.67,0.930633,0.333333,0.307692


In [13]:
base.statistics = summary_stats(base.result.to_pandas())
print( base.statistics.select_dtypes(include="number") \
               .loc["Mean"] )
print( base.statistics.select_dtypes(include="number") \
.loc["StdDev"])

context_recall                      0.672273
faithfulness                        0.641336
factual_correctness(mode=f1)        0.518000
answer_relevancy                    0.826404
context_entity_recall               0.661905
noise_sensitivity(mode=relevant)    0.183547
Name: Mean, dtype: float64
context_recall                      0.111424
faithfulness                        0.101060
factual_correctness(mode=f1)        0.087696
answer_relevancy                    0.093153
context_entity_recall               0.096123
noise_sensitivity(mode=relevant)    0.054550
Name: StdDev, dtype: float64


  retval = retval.apply(partial(pd.to_numeric, **{"errors": "ignore"}))
