In [None]:
import pandas as pd

# Display the complete contents of dataframe cells.
pd.set_option("display.max_colwidth", None)

In [None]:

import os
from getpass import getpass

import openai

if not (openai_api_key := os.getenv("OPENAI_API_KEY")):
    openai_api_key = getpass("🔑 Enter your OpenAI API key: ")
openai.api_key = openai_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key

In [None]:

import phoenix as px
from llama_index.core import set_global_handler
from phoenix.trace.langchain import LangChainInstrumentor

session = px.launch_app()

# Setup instrumentation for both llama-index and LangChain (used by Ragas)
set_global_handler("arize_phoenix")
LangChainInstrumentor().instrument()

In [None]:
from llama_index.core import SimpleDirectoryReader

dir_path = "./data/prompt-engineering-papers"
reader = SimpleDirectoryReader(dir_path, num_files_limit=2)
documents = reader.load_data()

In [None]:
from phoenix.trace import using_project
from ragas.testset.evolutions import multi_context, reasoning, simple
from ragas.testset.generator import TestsetGenerator

TEST_SIZE = 1 

# generator with openai models
generator = TestsetGenerator.with_openai(
    generator_llm="gpt-3.5-turbo-0125",
    critic_llm="gpt-3.5-turbo-0125",
    embeddings="text-embedding-3-large",
)

# set question type distribution
distribution = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}

# generate testset
with using_project("ragas-testset"):
    testset = generator.generate_with_llamaindex_docs(
        documents, test_size=TEST_SIZE, distributions=distribution
    )
test_df = testset.to_pandas()
test_df.head()

In [None]:
client = px.Client()

In [None]:
spans_df = client.get_spans_dataframe(project_name="ragas-testset")

In [None]:
spans_df.to_csv('./data/ragas-testset')

In [None]:
spans_df = client.get_spans_dataframe()

In [None]:
spans_df

In [None]:
my_traces = px.Client().get_trace_dataset().save(directory="./data")

In [None]:
px.Client().get_trace_dataset()

In [None]:
span_df = px.Client().get_spans_dataframe()

In [None]:
span_df.head()