[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openlayer-ai/examples-gallery/blob/main/monitoring/llms/rag-tracing/rag_tracer.ipynb)


# <a id="top">Tracing a RAG system</a>

In [None]:
import os
import openai

# OpenAI env variable
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_KEY_HERE"

# Openlayer env variables
os.environ["OPENLAYER_API_KEY"] = "YOUR_OPENLAYER_API_KEY_HERE"
os.environ["OPENLAYER_PROJECT_NAME"] = "YOUR_OPENLAYER_PROJECT_NAME_HERE" # Where the traces will be uploaded to

## Defining and decorating our RAG system

In [None]:
%%bash

if [ ! -e "context.txt" ]; then
    curl "https://raw.githubusercontent.com/openlayer-ai/examples-gallery/main/monitoring/llms/rag-tracing/context.txt" --output "context.txt"
fi

In [None]:
import random
import time

import numpy as np
from openai import OpenAI
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from openlayer import llm_monitors
from openlayer.tracing import tracer

In [None]:
class RagPipeline:
    def __init__(self, context_path: str):
        # Wrap OpenAI client with Openlayer's OpenAIMonitor to trace it 
        self.openai_client = OpenAI()
        llm_monitors.OpenAIMonitor(client=self.openai_client)
        
        self.vectorizer = TfidfVectorizer()
        with open(context_path, 'r', encoding='utf-8') as file:
            self.context_sections = file.read().split('\n\n')  
        self.tfidf_matrix = self.vectorizer.fit_transform(self.context_sections)

    # Decorate the functions you'd like to trace with @tracer.trace()
    @tracer.trace()
    def query(self, user_query: str) -> str:
        """Main method.

        Answers to a user query with the LLM.
        """
        context = self.retrieve_context(user_query)
        prompt = self.inject_prompt(user_query, context)
        answer = self.generate_answer_with_gpt(prompt)
        return answer

    @tracer.trace()
    def retrieve_context(self, query: str) -> str:
        """Context retriever. 
        
        Given the query, returns the most similar context (using TFIDF).
        """
        query_vector = self.vectorizer.transform([query])
        cosine_similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
        most_relevant_idx = np.argmax(cosine_similarities)
        return self.context_sections[most_relevant_idx]

    @tracer.trace()
    def inject_prompt(self, query: str, context: str):
        """Combines the query with the context and returns
        the prompt (formatted to conform with OpenAI models)."""
        return [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Answer the user query using only the following context: {context}. \nUser query: {query}"}
        ]

    @tracer.trace()
    def generate_answer_with_gpt(self, prompt):
        """Forwards the prompt to GPT and returns the answer."""
        response = self.openai_client.chat.completions.create(
            messages=prompt,
            model="gpt-3.5-turbo",
        )
        return response.choices[0].message.content.strip()

In [None]:
rag = RagPipeline("context.txt")

In [None]:
rag.query("Who were the founders of Apple?")

In [None]:
rag.query("When did Apple IPO?")

That's it! After each inference, the traces are uploaded to Openlayer. If you navigate to your project, you should see the traces for these two inferences with our RAG system.