[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openlayer-ai/openlayer-python/blob/main/examples/tracing/rag/rag_tracing.ipynb)


# <a id="top">Tracing a RAG system</a>

In [None]:
import os

# OpenAI env variables
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY_HERE"

# Openlayer env variables
os.environ["OPENLAYER_API_KEY"] = "YOUR_OPENLAYER_API_KEY_HERE"
os.environ["OPENLAYER_INFERENCE_PIPELINE_ID"] = "YOUR_OPENLAYER_INFERENCE_PIPELINE_ID_HERE"

## Defining and decorating our RAG system

In [None]:
%%bash

if [ ! -e "context.txt" ]; then
    curl "https://raw.githubusercontent.com/openlayer-ai/templates/refs/heads/main/python/llms/azure-openai-rag/app/model/contexts.txt" --output "context.txt"
fi

In [None]:
from typing import List

import numpy as np
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

from openlayer.lib import trace, trace_openai

In [None]:
class RagPipeline:
    def __init__(self, context_path: str):
        # Wrap OpenAI client with Openlayer's `trace_openai` to trace it
        self.openai_client = trace_openai(OpenAI())

        self.vectorizer = TfidfVectorizer()
        with open(context_path, "r", encoding="utf-8") as file:
            self.context_sections = file.read().split("\n\n")
        self.tfidf_matrix = self.vectorizer.fit_transform(self.context_sections)

    # Decorate the functions you'd like to trace with @trace()
    @trace()
    def query(self, user_query: str) -> str:
        """Main method.

        Answers to a user query with the LLM.
        """
        context = self.retrieve_contexts(user_query)
        prompt = self.inject_prompt(user_query, context)
        answer = self.generate_answer_with_gpt(prompt)
        return answer

    @trace()
    def retrieve_contexts(self, query: str) -> List[str]:
        """Context retriever.

        Given the query, returns the most similar context (using TFIDF).
        """
        query_vector = self.vectorizer.transform([query])
        cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
        most_relevant_idx = np.argmax(cosine_similarities)
        contexts = [self.context_sections[most_relevant_idx]]
        return contexts

    # You can also specify the name of the `context_kwarg` to unlock RAG metrics that
    # evaluate the performance of the context retriever. The value of the `context_kwarg`
    # should be a list of strings.
    @trace(context_kwarg="contexts")
    def inject_prompt(self, query: str, contexts: List[str]) -> List[dict]:
        """Combines the query with the context and returns
        the prompt (formatted to conform with OpenAI models)."""
        return [
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": f"Answer the user query using only the following context: {contexts[0]}. \nUser query: {query}",
            },
        ]

    @trace()
    def generate_answer_with_gpt(self, prompt):
        """Forwards the prompt to GPT and returns the answer."""
        response = self.openai_client.chat.completions.create(
            messages=prompt,
            model="gpt-3.5-turbo",
        )
        return response.choices[0].message.content.strip()

In [None]:
rag = RagPipeline("context.txt")

In [None]:
rag.query("Who were the founders of Apple?")

In [None]:
rag.query("When did Apple IPO?")

That's it! After each inference, the traces are uploaded to Openlayer. If you navigate to your project, you should see the traces for these two inferences with our RAG system.