In [1]:
import os
from getpass import getpass

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [4]:
import os
from dotenv import load_dotenv
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

load_dotenv()

def instrument():
    tracer_provider = trace_sdk.TracerProvider()
    
    # Phoenix
    span_exporter = OTLPSpanExporter("http://phoenix:6006/v1/traces")
    span_processor = SimpleSpanProcessor(span_exporter)
    tracer_provider.add_span_processor(span_processor)

    # Grafana Cloud
    span_exporter = OTLPSpanExporter("https://otlp-gateway-prod-us-east-0.grafana.net/otlp", headers={"Authorization": os.getenv("OTEL_EXPORTER_OTLP_HEADERS")})
    span_processor = SimpleSpanProcessor(span_exporter)
    tracer_provider.add_span_processor(span_processor)

    trace_api.set_tracer_provider(tracer_provider)
    LlamaIndexInstrumentor().instrument()

In [5]:
instrument()

In [6]:
import pandas as pd
import phoenix as px
from phoenix.trace import TraceDataset, using_project


# Display the complete contents of dataframe cells.
# pd.set_option("display.max_colwidth", None)

In [7]:
!sleep 3

In [8]:
testset_hex = "e4de504721b64dd1b1b2cd762b46b628"

In [9]:
with using_project("ragas-testset"):
    trace_dataset = TraceDataset.load(testset_hex, directory='./data')
    session = px.launch_app(trace=trace_dataset)

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [10]:
from llama_index.core import SimpleDirectoryReader

dir_path = "./data/prompt-engineering-papers"
reader = SimpleDirectoryReader(dir_path, num_files_limit=2)
documents = reader.load_data()

In [11]:
len(documents)

41

In [12]:
documents[1]

Document(id_='22114005-4e0f-4440-8f1d-73bcb73afe89', embedding=None, metadata={'page_label': '2', 'file_name': '1605.08386v1.Heat_bath_random_walks_with_Markov_bases.pdf', 'file_path': '/home/peter-legion-wsl2/peter-projects/regen-ai/nbs/data/prompt-engineering-papers/1605.08386v1.Heat_bath_random_walks_with_Markov_bases.pdf', 'file_type': 'application/pdf', 'file_size': 289178, 'creation_date': '2024-04-13', 'last_modified_date': '2024-04-13'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='2 CAPRICE STANLEY AND TOBIAS WINDISCH\nreached by a random walk that uses moves from M, whereas for the continuous version, a\nrandom sampling from the unit sphere suﬃces. However, in man y situations where a Markov\nbasis is known, the heat-bath random walk is evid

Set handler for `Llama Index` and `Langchain`(Ragas uses Langchain)

In [13]:

from llama_index.core import set_global_handler
from phoenix.trace.langchain import LangChainInstrumentor


# Setup instrumentation for both llama-index and LangChain (used by Ragas)
set_global_handler("arize_phoenix")
LangChainInstrumentor().instrument()

In [14]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from phoenix.trace import using_project


def build_query_engine(documents):
    vector_index = VectorStoreIndex.from_documents(
        documents,
        embed_model=OpenAIEmbedding(model="text-embedding-3-small")
    )
    query_engine = vector_index.as_query_engine(similarity_top_k=2)
    return query_engine


with using_project("indexing"):
    # By assigning a project name, the instrumentation will send all the embeddings to the indexing project
    query_engine = build_query_engine(documents)

In [15]:
my_traces = px.Client().get_trace_dataset(project_name="indexing").save(directory="./data")

💾 Trace dataset saved to under ID: 28ad8281-0784-4865-b6dd-b227dc47b6d7
📂 Trace dataset path: data/trace_dataset-28ad8281-0784-4865-b6dd-b227dc47b6d7.parquet


In [16]:
my_traces.hex

'28ad828107844865b6ddb227dc47b6d7'

In [17]:
px.Client().query_spans(project_name="indexing").head(n=1)


Unnamed: 0_level_0,name,span_kind,parent_id,start_time,end_time,status_code,status_message,events,conversation,context.trace_id,context.span_id,attributes.embedding.embeddings,attributes.embedding.model_name
context.span_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
f3af7c71a9716557,embedding,EMBEDDING,,2024-04-15T09:08:10.420547+00:00,2024-04-15T09:08:11.650588+00:00,OK,,[],,ccd06ddaae16c1afb2b853edba001779,f3af7c71a9716557,"[{'embedding.vector': [-0.027629341930150986, ...",text-embedding-3-small


In [18]:
px.Client().query_spans(project_name="indexing")["attributes.embedding.embeddings"]

context.span_id
f3af7c71a9716557    [{'embedding.vector': [-0.027629341930150986, ...
ed3fde2f556ccd12                                                  NaN
72340bc8c5ac77f6                                                  NaN
45215b0e738e9995                                                  NaN
e2bddbb7eca1e0ae                                                  NaN
7d7ff000fee46a44                                                  NaN
d42220f7ed7e1b74                                                  NaN
97e18f2244914661                                                  NaN
87c35398bbe9b28d                                                  NaN
819737c0fd20e377                                                  NaN
351a74b2ed6b92db                                                  NaN
72333dda20dfb507                                                  NaN
37bc8812194fcb60                                                  NaN
acd90611d8927ab9                                                  NaN
f1f6

Embeddings to dataframe for use in visualization

In [19]:
from phoenix.trace.dsl.helpers import SpanQuery

client = px.Client()
corpus_df = px.Client().query_spans(
    SpanQuery().explode(
        "embedding.embeddings",
        text="embedding.text",
        vector="embedding.vector",
    ),
    project_name="indexing",
)
corpus_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,text,vector
context.span_id,position,Unnamed: 2_level_1,Unnamed: 3_level_1
f3af7c71a9716557,0,page_label: 1\nfile_path: /home/peter-legion-w...,"[-0.027629341930150986, 0.0020648075733333826,..."
f3af7c71a9716557,1,page_label: 2\nfile_path: /home/peter-legion-w...,"[-0.008014542050659657, -0.0056237997487187386..."
f3af7c71a9716557,2,page_label: 2\nfile_path: /home/peter-legion-w...,"[0.022452350705862045, 0.012023705057799816, 0..."
f3af7c71a9716557,3,page_label: 3\nfile_path: /home/peter-legion-w...,"[-0.004942848347127438, -0.01833670400083065, ..."
f3af7c71a9716557,4,page_label: 3\nfile_path: /home/peter-legion-w...,"[-0.0169712845236063, 0.0017904225969687104, 0..."


In [20]:
test_df = pd.read_csv(f"./data/{testset_hex}-testset.csv")
test_df.head()

Unnamed: 0.1,Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,0,What aspects of training strategies are discus...,['ment aims to improve the scalability and eff...,The context discusses aspects of training stra...,simple,"[{'page_label': '13', 'file_name': '2301.00234...",True
1,1,How did Li et al. (2023d) enhance long-range l...,"[' (Zhou et al., 2023), and itera-\ntive forwa...",Li et al. (2023d) enhanced long-range language...,simple,"[{'page_label': '12', 'file_name': '2301.00234...",True


In [21]:
import pandas as pd
from datasets import Dataset
from phoenix.trace import using_project
from tqdm.auto import tqdm


def generate_response(query_engine, question):
    response = query_engine.query(question)
    return {
        "answer": response.response,
        "contexts": [c.node.get_content() for c in response.source_nodes],
    }


def generate_ragas_dataset(query_engine, test_df):
    test_questions = test_df["question"].values
    responses = [generate_response(query_engine, q) for q in tqdm(test_questions)]

    dataset_dict = {
        "question": test_questions,
        "answer": [response["answer"] for response in responses],
        "contexts": [response["contexts"] for response in responses],
        "ground_truth": test_df["ground_truth"].values.tolist(),
    }
    ds = Dataset.from_dict(dataset_dict)
    return ds


with using_project("llama-index"):
    ragas_eval_dataset = generate_ragas_dataset(query_engine, test_df)

ragas_evals_df = pd.DataFrame(ragas_eval_dataset)
ragas_evals_df.head()

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0,question,answer,contexts,ground_truth
0,What aspects of training strategies are discus...,Supervised in-context training strategies disc...,[In-context LearningTraining Warmup (§4)Superv...,The context discusses aspects of training stra...
1,How did Li et al. (2023d) enhance long-range l...,Li et al. (2023d) enhanced long-range language...,[2022. Lamda: Language mod-\nels for dialog ap...,Li et al. (2023d) enhanced long-range language...


#### Query Embeds => DF for vis.

In [22]:
# dataset containing embeddings for visualization
query_embeddings_df = px.Client().query_spans(
    SpanQuery().explode("embedding.embeddings", text="embedding.text", vector="embedding.vector"),
    project_name="llama-index",
)
query_embeddings_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,text,vector
context.span_id,position,Unnamed: 2_level_1,Unnamed: 3_level_1
705192fc8365722f,0,How did Li et al. (2023d) enhance long-range l...,"[0.01767698861658573, -0.02579161524772644, 0...."
179e419e17d2b0d4,0,What aspects of training strategies are discus...,"[-0.04597451165318489, 0.020289277657866478, 0..."


In [23]:

from phoenix.session.evaluation import get_qa_with_reference

# dataset containing span data for evaluation with Ragas
spans_dataframe = get_qa_with_reference(client, project_name="llama-index")
spans_dataframe.head()

Unnamed: 0_level_0,input,output,reference
context.span_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
51cc20b4798719e5,How did Li et al. (2023d) enhance long-range l...,Li et al. (2023d) enhanced long-range language...,2022. Lamda: Language mod-\nels for dialog app...
e586fb8107e71bac,What aspects of training strategies are discus...,Supervised in-context training strategies disc...,In-context LearningTraining Warmup (§4)Supervi...


#### Evaluate LLM Traces; Eval scores => DF

In [24]:

from phoenix.trace import using_project
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    context_precision,
    context_recall,
    faithfulness,
)

# Log the traces to the project "ragas-evals" just to view
# how Ragas works under the hood
with using_project("ragas-evals"):
    evaluation_result = evaluate(
        dataset=ragas_eval_dataset,
        metrics=[faithfulness, answer_correctness, context_recall, context_precision],
    )
eval_scores_df = pd.DataFrame(evaluation_result.scores)

Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

#### Submit Evals to Phoenix to make visible on spans

In [25]:
from phoenix.trace import SpanEvaluations

# Assign span ids to your ragas evaluation scores (needed so Phoenix knows where to attach the spans).
eval_data_df = pd.DataFrame(evaluation_result.dataset)
assert eval_data_df.question.to_list() == list(
    reversed(spans_dataframe.input.to_list())  # The spans are in reverse order.
), "Phoenix spans are in an unexpected order. Re-start the notebook and try again."
eval_scores_df.index = pd.Index(
    list(reversed(spans_dataframe.index.to_list())), name=spans_dataframe.index.name
)

# Log the evaluations to Phoenix under the project "llama-index"
# This will allow you to visualize the scores alongside the spans in the UI
for eval_name in eval_scores_df.columns:
    evals_df = eval_scores_df[[eval_name]].rename(columns={eval_name: "score"})
    evals = SpanEvaluations(eval_name, evals_df)
    px.Client().log_evaluations(evals, project_name="llama-index")

#### Visualize and Analyze Embeds

In [26]:
query_embeddings_df = query_embeddings_df.iloc[::-1]
assert ragas_evals_df.question.tolist() == query_embeddings_df.text.tolist()
assert test_df.question.tolist() == ragas_evals_df.question.tolist()
query_df = pd.concat(
    [
        ragas_evals_df[["question", "answer", "ground_truth"]].reset_index(drop=True),
        query_embeddings_df[["vector"]].reset_index(drop=True),
        test_df[["evolution_type"]],
        eval_scores_df.reset_index(drop=True),
    ],
    axis=1,
)
query_df.head()

Unnamed: 0,question,answer,ground_truth,vector,evolution_type,faithfulness,answer_correctness,context_recall,context_precision
0,What aspects of training strategies are discus...,Supervised in-context training strategies disc...,The context discusses aspects of training stra...,"[-0.04597451165318489, 0.020289277657866478, 0...",simple,0.8,0.727399,1.0,1.0
1,How did Li et al. (2023d) enhance long-range l...,Li et al. (2023d) enhanced long-range language...,Li et al. (2023d) enhanced long-range language...,"[0.01767698861658573, -0.02579161524772644, 0....",simple,1.0,0.616301,0.0,0.5


In [27]:

query_schema = px.Schema(
    prompt_column_names=px.EmbeddingColumnNames(
        raw_data_column_name="question", vector_column_name="vector"
    ),
    response_column_names="answer",
)
corpus_schema = px.Schema(
    prompt_column_names=px.EmbeddingColumnNames(
        raw_data_column_name="text", vector_column_name="vector"
    )
)
# relaunch phoenix with a primary and corpus dataset to view embeddings
px.close_app()
session = px.launch_app(
    primary=px.Inferences(query_df, query_schema, "query"),
    corpus=px.Inferences(corpus_df.reset_index(drop=True), corpus_schema, "corpus"),
)
   

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix
