# Setup

In [31]:
# OctoAI
# ! pip install langchain langchain-community faiss-cpu sentence-transformers octoai-sdk langchain-text-splitters lxml tiktoken python-dotenv 'arize-phoenix[evals]' openai

In [32]:
from dotenv import load_dotenv
import os

load_dotenv()
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [33]:
import phoenix as px
session = px.launch_app()

WARNI [phoenix.session.session] Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [34]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

WARNI [opentelemetry.instrumentation.instrumentor] Attempting to instrument while already instrumented


# Ingest Data

In [35]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

In [36]:
files = os.listdir("../city_data")
file_texts = []
for file in files:
    with open(f"../city_data/{file}") as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=1234, chunk_overlap=123, 
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

WARNI [langchain_text_splitters.base] Created a chunk of size 1280, which is longer than the specified 1234
WARNI [langchain_text_splitters.base] Created a chunk of size 2076, which is longer than the specified 1234
WARNI [langchain_text_splitters.base] Created a chunk of size 1311, which is longer than the specified 1234
WARNI [langchain_text_splitters.base] Created a chunk of size 1344, which is longer than the specified 1234


In [37]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [38]:
embeddings = HuggingFaceEmbeddings()



In [39]:
vector_store = FAISS.from_documents(
    file_texts,
    embedding=embeddings
)

# Search the Data

In [40]:
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="meta-llama-3-8b-instruct",
        max_tokens=3072,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
    )

                model was transferred to model_kwargs.
                Please confirm that model is what you intended.


In [41]:
retriever = vector_store.as_retriever()

In [42]:
from langchain.prompts import ChatPromptTemplate
template="""You are a tour guide. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [43]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [49]:
chain.invoke("Which city is known for its party vibe?")

" \nCopenhagen has a vibrant nightlife scene, with many bars, clubs, and festivals throughout the city. The city has a reputation for being a party city, with many young people coming to the city to enjoy its nightlife. The city has a number of popular nightlife areas, including Vesterbro, Nørrebro, and Frederiksberg, which are known for their bars, clubs, and restaurants. The city also hosts a number of festivals throughout the year, including the Copenhagen Carnival, Copenhell, and Copenhagen Pride, which attract large crowds and are a major part of the city's nightlife scene. \nNote: The answer is based on the provided context, which mentions Copenhagen's nightlife scene, bars, clubs, and festivals. The city's reputation for being a party city and its popular nightlife areas are also mentioned. The answer does not mention other cities, such as Berlin or San Francisco, which are mentioned in the context but are not relevant to the question. \nIf you don't know the answer, just say th

# Run Evals

In [45]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations

In [46]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [52]:
eval_model = OpenAIModel(
    model="gpt-4-turbo",
)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)

WARNI [phoenix.evals.executors] 🐌!! If running llm_classify inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.


                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:01<? | ?it/s 
[A

                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:01<? | ?it/s 
[A

[A[A

Exception in worker on attempt 1: Error code: 404 - {'error': {'message': 'The model `gpt-4-turbo` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}
Retrying...


                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:01<? | ?it/s 
[A

                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:01<? | ?it/s 
[A

                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:02<? | ?it/s 
[A

                                                      

[A[A                                                


[A[A[A                                             
run_evals |          | 0/2 (0.0%) | ⏳ 20:02<? | ?it/s 
[A

[A[A

Exception in worker on attempt 2: Error code: 404 - {'error': {'message': 'The model `gpt-4-turbo` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}
Retrying...
Exception in worker on attempt 3: Error code: 404 - {'error': {'message': 'The model `gpt-4-turbo` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}
Retrying...


run_evals |██████████| 2/2 (100.0%) | ⏳ 00:19<00:00 |  9.81s/it
WARNI [phoenix.evals.executors] 🐌!! If running llm_classify inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.
run_evals |██████████| 4/4 (100.0%) | ⏳ 00:21<00:00 |  5.38s/it
