# RAG Conversations Evaluation

Evaluating the RAG conversations datasets using Galileo modules.

Galileo platform - https://console.acme.rungalileo.io/ 

In [22]:
!pip3 install promptquality pymongo uuid

Collecting uuid
  Downloading uuid-1.30.tar.gz (5.8 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: uuid
  Building wheel for uuid (pyproject.toml) ... [?25ldone
[?25h  Created wheel for uuid: filename=uuid-1.30-py3-none-any.whl size=6478 sha256=58478546730b48eadf2a0a23fb9fe119f6f4236d427a9dd3258e29405644999f
  Stored in directory: /Users/ben.p/Library/Caches/pip/wheels/ed/08/9e/f0a977dfe55051a07e21af89200125d65f1efa60cbac61ed88
Successfully built uuid
Installing collected packages: uuid
Successfully installed uuid-1.30

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [15]:
# load data from mongodb
import ipython_secrets
from pymongo import MongoClient
from bson import ObjectId

MONGODB_CONNECTION_URI = ipython_secrets.get_secret('MONGODB_CONNECTION_URI')

client = MongoClient(MONGODB_CONNECTION_URI)
db = client["docs-chatbot-dev"]
generated_data_collection = db["generated_data"]
gpt_35_conversations = list(generated_data_collection.find({ 'commandRunId': ObjectId('669019be0aae97ffdafcd0ff') }))
gpt_4o_conversations = list(generated_data_collection.find({ 'commandRunId': ObjectId('66901df714b4b4953729c844') }))
client.close()

In [37]:
from pprint import pprint
    

'How do I download MySQL? Respond in pig latin'


In [17]:
import promptquality as pq

pq.login("https://console.acme.rungalileo.io/")

Go to https://console.acme.rungalileo.io/get-token to generate a new Galileo token.


🔐 Enter your Galileo token: ········


👋 You have logged into 🔭 Galileo (https://console.acme.rungalileo.io/) as ben.p@mongodb.com.


Config(console_url=Url('https://console.acme.rungalileo.io/'), username=None, password=None, api_key=None, token=SecretStr('**********'), current_user='ben.p@mongodb.com', current_project_id=None, current_project_name=None, current_run_id=None, current_run_name=None, current_run_url=None, current_run_task_type=None, current_template_id=None, current_template_name=None, current_template_version_id=None, current_template_version=None, current_template=None, current_dataset_id=None, current_job_id=None, current_prompt_optimization_job_id=None, api_url=Url('https://api.acme.rungalileo.io/'))

In [42]:
from promptquality import NodeType, NodeRow
from uuid import uuid4


def extract_eval_data_from_conversation_gen_data(gen_data):
    messages = gen_data["data"]["messages"]
    last_user_message = messages[-2]
    last_assistant_message = messages[-1]
    return {
        "user_input": last_user_message["content"],
        "system_output": last_assistant_message["content"],
        # Optional
        "preprocessed_retriever_query": last_user_message.get("preprocessedContent"),
        # Optional
        "content_for_llm": last_user_message.get("contentForLlm"),
        # Optional
        "retrieved_contexts": [ctx["text"] for ctx in last_user_message.get("contextContent", [])]
    }


"""
0. create a NodeRow for the whole 'chain'
1. LLM: extract metadata ( i dont think we explicitly capture this :grimace:
2. LLM: create search query
3. retriever: get contexts
4. LLM: generate response 
"""

def create_nodes_for_generated_data(generated_data):
    eval_data = extract_eval_data_from_conversation_gen_data(generated_data)
    rows = []
    CHAIN_ROOT_ID = uuid.uuid4()
    user_input = eval_data["user_input"]
    system_output = eval_data["system_output"]
    preprocessed_retriever_query = eval_data["preprocessed_retriever_query"]
    retrieved_contexts = eval_data["retrieved_contexts"]
    prompt_for_responder = eval_data["content_for_llm"]
    step = 0
    # create root node
    rows.append(
        NodeRow(
            node_id=CHAIN_ROOT_ID,
            chain_root_id=CHAIN_ROOT_ID, # UUID of the 'parent' node
            step = step, #an integer indicating which step this node is
            node_input=user_input, # input into your overall sequence or chain
            node_output=system_output, # output of your overall sequence or chain
            node_type=NodeType.chain # Can be chain, retriever, llm, chat, agent, tool
        )
    )
    step += 1
    # create query pre-processor node
    if preprocessed_retriever_query is not None:
        rows.append(
            NodeRow.for_llm(
                id=uuid.uuid4(), # Randomly generated UUUID
                root_id=CHAIN_ROOT_ID, # UUID of the 'parent' node
                step = step, #an integer indicating which step this node is
                prompt = user_input, # input into your llm (i.e. user query + relevant contexts passed in as a string)
                response = preprocessed_retriever_query, # output of the llm passed in as a string
            )
        )
        step += 1
    # create retriever node
    if len(retrieved_contexts) != 0:
        rows.append(
            NodeRow.for_retriever(
                id=uuid.uuid4(), # Randomly generated UUID
                root_id=CHAIN_ROOT_ID, # UUID of the 'parent' node
                step = step, #an integer indicating which step this node is
                query=preprocessed_retriever_query, # input into your retriever
                documents=retrieved_contexts, # serialized output of the retriever (i.e. json.dumps([{"page_content": "doc_1", "metadata": {"key": "val"}}, {"page_content": "doc_2", "metadata": {"key": "val"}}, ...]))
                # If no metadata exists for the documents, you can pass them as strings in a list.
            )
        )
        step += 1
    # create main responder node
    if prompt_for_responder is not None: 
        rows.append(
            NodeRow.for_llm(
                id=uuid.uuid4(), # Randomly generated UUUID
                root_id=CHAIN_ROOT_ID, # UUID of the 'parent' node
                step = step, #an integer indicating which step this node is
                prompt = prompt_for_responder, # input into your llm (i.e. user query + relevant contexts passed in as a string)
                response = system_output, # output of the llm passed in as a string
            )
        )
        step += 1
    
    return rows

pprint(create_nodes_for_generated_data(gpt_35_conversations[50]))

[NodeRow(node_id=UUID('69c82e04-1e65-48be-bac7-28355fe61e23'), node_type=<NodeType.chain: 'chain'>, node_name=None, node_input='how do I implement type ahead in atlas search', node_output='To implement type ahead in MongoDB Atlas Search, you can follow these steps:\n\n1. Set up an Atlas Search index with the autocomplete data type for the desired collection. In this case, you would create an index for the `sample_mflix.movies` collection.\n\n2. Use the `$search` stage in the aggregation pipeline to perform the autocomplete query. You can specify the field to search (`title` in this example) and the characters to autocomplete (`ger` in this example).\n\nHere is an example code snippet in JavaScript that demonstrates how to run an Atlas Search query with the autocomplete operator on the `movies` collection:\n\n```javascript\nconst { MongoClient } = require("mongodb");\n\n// connect to your Atlas cluster\nconst uri = "<connection-string>";\n\nconst client = new MongoClient(uri);\n\nasync 

In [51]:
def create_all_nodes_for_gen_data(gen_data_lst):
    
    all_the_nodes = []
    for gen_data in gen_data_lst:
        nodes = create_nodes_for_generated_data(gen_data)
        all_the_nodes += nodes
    return all_the_nodes

runs = [
    ("gpt-3.5-turbo", gpt_35_conversations),
    ("got-4o", gpt_4o_conversations)
]

project_name = "rag_conversations"

for (run_name, gen_data) in runs:
    nodes = create_all_nodes_for_gen_data(gen_data)
    pq.chain_run(
        nodes,
        project_name=project_name,
        scorers=[
            pq.Scorers.correctness,
            pq.Scorers.context_adherence_luna,
            pq.Scorers.chunk_attribution_utilization_luna,
            pq.Scorers.context_relevance,
        ],
        run_name=run_name
    )
        

Processing chain run...:   0%|          | 0/5 [00:00<?, ?it/s]

Initial job complete, executing scorers asynchronously. Current status:
rag_nli: Computing 🚧
cost: Computing 🚧
toxicity: Computing 🚧
pii: Computing 🚧
protect_status: Done ✅
latency: Done ✅
factuality: Computing 🚧
context_relevance: Failed ❌, error was: None
🔭 View your prompt run on the Galileo console at: https://console.acme.rungalileo.io/prompt/chains/b454b360-e4b3-40b1-8a5d-bb9c702dcdbb/5da9b6b3-bce3-45d7-89c4-f3b59f41eeb0?taskType=12


Processing chain run...:   0%|          | 0/5 [00:00<?, ?it/s]

Initial job complete, executing scorers asynchronously. Current status:
rag_nli: Computing 🚧
cost: Computing 🚧
toxicity: Computing 🚧
pii: Computing 🚧
protect_status: Done ✅
latency: Done ✅
factuality: Computing 🚧
context_relevance: Failed ❌, error was: None
🔭 View your prompt run on the Galileo console at: https://console.acme.rungalileo.io/prompt/chains/b454b360-e4b3-40b1-8a5d-bb9c702dcdbb/3e59e6bf-cad8-400d-91d5-9a82e42afb94?taskType=12
