# RAG Tracer Example: OpenAI + pgvector Integration

This notebook demonstrates how to integrate the RAG Tracer SDK with an OpenAI-based RAG pipeline using pgvector for document storage and retrieval.

In [None]:
# Install required packages
!pip install openai pgvector psycopg2-binary scikit-learn rag-tracer-sdk

In [None]:
import os
import openai
import psycopg2
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from tracer_sdk.tracer import RAGTracer, EmbeddingData, RetrievalData, ResponseData, TelemetryData
import time

In [None]:
# Set up OpenAI client
openai.api_key = os.getenv("OPENAI_API_KEY")

# Set up RAG Tracer
tracer = RAGTracer(api_url="http://localhost:8000")

In [None]:
# Sample documents for our RAG system
documents = [
    {"id": "doc1", "content": "Elon Musk is the CEO of Tesla and SpaceX."},
    {"id": "doc2", "content": "Tesla is an electric vehicle manufacturer founded by Elon Musk."},
    {"id": "doc3", "content": "SpaceX is a space exploration company founded by Elon Musk."},
    {"id": "doc4", "content": "Apple Inc. is a technology company led by CEO Tim Cook."},
    {"id": "doc5", "content": "Microsoft is a technology company led by CEO Satya Nadella."}
]

In [None]:
# Function to generate embeddings using OpenAI
def generate_embedding(text):
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response['data'][0]['embedding']

In [None]:
# Function to retrieve documents using pgvector
def retrieve_documents(query_embedding, top_k=3):
    # In a real implementation, this would connect to a PostgreSQL database with pgvector
    # For this example, we'll simulate retrieval
    
    # Generate embeddings for all documents
    doc_embeddings = [generate_embedding(doc["content"]) for doc in documents]
    
    # Calculate similarity scores
    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
    
    # Get top-k documents
    top_indices = np.argsort(similarities)[::-1][:top_k]
    
    retrieval_results = []
    for i in top_indices:
        retrieval_results.append({
            "document_id": documents[i]["id"],
            "similarity_score": float(similarities[i]),
            "content": documents[i]["content"]
        })
    
    return retrieval_results

In [None]:
# Function to generate response using OpenAI
def generate_response(query, retrieved_docs):
    context = "\n".join([doc["content"] for doc in retrieved_docs])
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100,
        temperature=0.7
    )
    
    return response.choices[0].text.strip(), prompt

In [None]:
# Main RAG pipeline with tracing
def rag_pipeline(query):
    # Record start time for telemetry
    start_time = time.time()
    
    # 1. Generate embedding for query
    embedding_start = time.time()
    query_embedding = generate_embedding(query)
    embedding_latency = (time.time() - embedding_start) * 1000
    
    # 2. Retrieve documents
    retrieval_start = time.time()
    retrieved_docs = retrieve_documents(query_embedding)
    retrieval_latency = (time.time() - retrieval_start) * 1000
    
    # 3. Generate response
    llm_start = time.time()
    response_text, final_prompt = generate_response(query, retrieved_docs)
    llm_latency = (time.time() - llm_start) * 1000
    
    total_latency = (time.time() - start_time) * 1000
    
    # 4. Trace the pipeline execution
    trace_result = tracer.trace_complete(
        user_query=query,
        final_prompt=final_prompt,
        embedding=EmbeddingData(
            vector=query_embedding,
            retrieval_candidates=[
                {"doc_id": doc["document_id"], "score": doc["similarity_score"]}
                for doc in retrieved_docs
            ]
        ),
        retrievals=[
            RetrievalData(
                document_id=doc["document_id"],
                similarity_score=doc["similarity_score"],
                metadata={"text": doc["content"]}
            )
            for doc in retrieved_docs
        ],
        response=ResponseData(
            text=response_text,
            token_stream=response_text.split()
        ),
        telemetry=TelemetryData(
            embedding_latency_ms=embedding_latency,
            retrieval_latency_ms=retrieval_latency,
            llm_latency_ms=llm_latency,
            total_latency_ms=total_latency,
            embedding_tokens=len(query.split()),
            completion_tokens=len(response_text.split()),
            api_cost=0.002  # Example cost
        )
    )
    
    return response_text, trace_result

In [None]:
# Example usage
query = "Who is the CEO of Tesla?"
response, trace = rag_pipeline(query)

print(f"Query: {query}")
print(f"Response: {response}")
print(f"Trace ID: {trace.get('id', 'N/A')}")

## Async Tracing Example

For high-throughput applications, you can use async tracing to avoid blocking the main pipeline:

In [None]:
# Set up async tracer
async_tracer = RAGTracer(api_url="http://localhost:8000", async_mode=True)

# Async version of the RAG pipeline
def rag_pipeline_async(query):
    # Record start time for telemetry
    start_time = time.time()
    
    # 1. Generate embedding for query
    embedding_start = time.time()
    query_embedding = generate_embedding(query)
    embedding_latency = (time.time() - embedding_start) * 1000
    
    # 2. Retrieve documents
    retrieval_start = time.time()
    retrieved_docs = retrieve_documents(query_embedding)
    retrieval_latency = (time.time() - retrieval_start) * 1000
    
    # 3. Generate response
    llm_start = time.time()
    response_text, final_prompt = generate_response(query, retrieved_docs)
    llm_latency = (time.time() - llm_start) * 1000
    
    total_latency = (time.time() - start_time) * 1000
    
    # 4. Trace the pipeline execution (async)
    trace_result = async_tracer.trace_complete(
        user_query=query,
        final_prompt=final_prompt,
        embedding=EmbeddingData(
            vector=query_embedding,
            retrieval_candidates=[
                {"doc_id": doc["document_id"], "score": doc["similarity_score"]}
                for doc in retrieved_docs
            ]
        ),
        retrievals=[
            RetrievalData(
                document_id=doc["document_id"],
                similarity_score=doc["similarity_score"],
                metadata={"text": doc["content"]}
            )
            for doc in retrieved_docs
        ],
        response=ResponseData(
            text=response_text,
            token_stream=response_text.split()
        ),
        telemetry=TelemetryData(
            embedding_latency_ms=embedding_latency,
            retrieval_latency_ms=retrieval_latency,
            llm_latency_ms=llm_latency,
            total_latency_ms=total_latency,
            embedding_tokens=len(query.split()),
            completion_tokens=len(response_text.split()),
            api_cost=0.002  # Example cost
        )
    )
    
    return response_text, trace_result

In [None]:
# Example usage with async tracing
query = "Who is the CEO of SpaceX?"
response, trace = rag_pipeline_async(query)

print(f"Query: {query}")
print(f"Response: {response}")
print(f"Trace submission status: {trace.get('status', 'N/A')}")

## WebSocket Real-time Tracing

For real-time monitoring, you can also use WebSockets to send trace data:

In [None]:
import websocket
import json

# WebSocket client for real-time tracing
def trace_via_websocket(trace_data):
    ws = websocket.WebSocket()
    ws.connect("ws://localhost:8000/ws/traces")
    ws.send(json.dumps(trace_data))
    result = ws.recv()
    ws.close()
    return result