In [1]:
%pip install langfuse==2.57.5
%pip install deepeval

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# !deepeval set-local-model --model-name="codesmart.ide" \
#     --base-url="https://api.codesmart.app/v1" \
#     --api-key="sk-LgRP10Z1i16SPuGgcSxQMg"


In [3]:
import os
 
# get keys for your project from https://cloud.langfuse.com
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com"

# openai key
os.environ["OPENAI_API_KEY"] = ""
os.environ["OPENAI_API_BASE"]="https://api.openai.com/v1"

In [4]:
from typing import List
from langfuse import Langfuse
from datetime import datetime, timedelta
import os

from langfuse.api.resources.commons.types.trace_with_details import TraceWithDetails
 
BATCH_SIZE = 10
TOTAL_TRACES = 50
 
langfuse = Langfuse(
    secret_key=os.environ["LANGFUSE_SECRET_KEY"],
    public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
    host=os.environ["LANGFUSE_HOST"]
)
 
now = datetime.now()
five_am_today = datetime(now.year, now.month, now.day, 5, 0)
five_am_yesterday = five_am_today - timedelta(days=3)
 
traces_batch: List[TraceWithDetails] = langfuse.fetch_traces(page=1,
                                     limit=BATCH_SIZE,
                                     name="query_rag",
                                     from_timestamp=five_am_yesterday,
                                     to_timestamp=datetime.now()
                                   ).data

Langfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client
Unauthorized. Please check your public/private host settings. Refer to our installation and setup guide: https://langfuse.com/docs/sdk/typescript/guide for details on SDK configuration.


UnauthorizedError: status_code: 401, body: {'message': "Invalid authorization header. Confirm that you've configured the correct host."}

In [88]:
from typing import Any, Dict
from deepeval.metrics import HallucinationMetric, AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage
from langfuse.api.resources.commons.types.trace_with_details import TraceWithDetails

def evaluate_response(response: str, expected_output: str, context: str, query: str) -> Dict[str, Any]:
    """
    Evaluate the response using deepeval metrics and return scores
    """
    # Initialize metrics
    hallucination_metric = HallucinationMetric(
        threshold=0.7
    )
    
    relevancy_metric = AnswerRelevancyMetric(
        threshold=0.7
    )
    
    context=[context]

    actual_output=response

    test_case = LLMTestCase(
        input=query,
        expected_output=expected_output,
        actual_output=actual_output,
        context=context,

    )
    relevancy_metric.measure(test_case=test_case)
    hallucination_metric.measure(test_case=test_case)
    
    return {
        "hallucination_score": hallucination_metric.score,
        "relevancy_score": relevancy_metric.score,
        "hallucination_reason": hallucination_metric.reason,
        "relevancy_reason": relevancy_metric.reason
    }

def run_with_evaluation(trace: TraceWithDetails) -> Dict[str, Any]:
    """
    Run the RAG chain with evaluation and log to Langfuse
    """
    
    chat_model = ChatOpenAI(
        openai_api_base=os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1"),
        openai_api_key=os.getenv("OPENAI_API_KEY"),
    )
    
    response: AIMessage = chat_model.invoke(input=trace.input)

    try:
                
        # Evaluate the response
        evaluation_span = trace.span(name="evaluation")
        evaluation_results = evaluate_response(
            response=response,
            expected_output=response.content,
            context="",
            query=trace.output
        )
        evaluation_span.end()
        
        # Update trace with scores
        langfuse.score(
            trace_id=trace.id,
            name="hallucination_score",
            value=evaluation_results["hallucination_score"],
            comment=evaluation_results["hallucination_reason"]
        )
        langfuse.score(
            trace_id=trace.id,
            name="relevancy_score",
            value=evaluation_results["relevancy_score"],
            comment="test_revelancy"
        )
        
        return {
            "response": response,
            "evaluation": evaluation_results,
            "trace_id": trace.id
        }
        
    except Exception as e:
        print(e)
        raise
    finally:
        print("Complete tracing")
        langfuse.flush()

In [89]:
import math
import trace
 
for page_number in range(1, math.ceil(TOTAL_TRACES/BATCH_SIZE)):
 
 
    print(traces_batch)
    
    for item in traces_batch:
        if trace.output is None:
            print(f"Warning: \n Trace {trace.name} had no generated output, \
            it was skipped")
            continue

        print(trace)
        print(trace.input)

        jscore = run_with_evaluation(trace=trace)
        print(jscore)
 
    print(f"Batch {page_number} processed 🚀 \n")

[TraceWithDetails(id='1ce32753-0479-4ce7-b6bc-fbc5e135b288', timestamp=datetime.datetime(2025, 1, 9, 3, 13, 26, 720000, tzinfo=datetime.timezone.utc), name='query_rag', input='{"query":"Can you tell maximum how many players can play the monopoly game?"}', output='["The maximum number of players that can play the Monopoly game is 8."]', session_id=None, release=None, version=None, user_id=None, metadata={'query': 'Can you tell maximum how many players can play the monopoly game?', 'context': 'MONOPOLY \nProperty Trading Game from Parker Brothers" \nAGES 8+ \n2 to 8 Players \nContents: Gameboard, 3 dice, tokens, 32 houses, I2 hotels, Chance \nand Community Chest cards, Title Deed cards, play money and a Banker\'s tray. \nNow there\'s a faster way to play MONOPOLY. Choose to play by \nthe classic rules for buying, renting and selling properties or use the \nSpeed Die to get into the action faster. If you\'ve never played the classic \nMONOPOLY game, refer to the Classic Rules beginning on