In [17]:
from os import environ
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = environ["OPENAI_API_KEY"]

In [18]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI


# Create an llm object to use for the QueryEngine and the ReActAgent
llm = OpenAI(model="gpt-4o-mini")

In [19]:
import phoenix as px
session = px.launch_app()

Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [20]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

Overriding of current TracerProvider is not allowed
Attempting to instrument while already instrumented


🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [21]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

Overriding of current TracerProvider is not allowed
Attempting to instrument while already instrumented


🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [22]:
try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

In [23]:
if not index_loaded:
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["./10k/lyft_2021.pdf"]
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["./10k/uber_2021.pdf"]
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs, show_progress=True)
    uber_index = VectorStoreIndex.from_documents(uber_docs, swow_progress=True)
    VectorStoreIndex

    # persist index
    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")

In [24]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3, llm=llm)
uber_engine = uber_index.as_query_engine(similarity_top_k=3, llm=llm)

In [25]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description=(
                "Provides information about Lyft financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description=(
                "Provides information about Uber financials for year 2021. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
]

In [26]:
agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    max_turns=10,
)

In [27]:
response = agent.chat("Who had more profit in 2021, Lyft or Uber?")
print(str(response))

> Running step 7c1efee4-6e20-409c-9eba-8d7c03502b9a. Step input: Who had more profit in 2021, Lyft or Uber?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use tools to find the financial information for Lyft and Uber in 2021 to compare their profits.
Action: lyft_10k
Action Input: {'input': "What was Lyft's profit in 2021?"}
[0m[1;3;34mObservation: Lyft reported a net loss of $1.009 billion in 2021.
[0m> Running step 8e2e8cdf-a4ec-4653-a855-a123ca0ffdc1. Step input: None
[1;3;38;5;200mThought: I have the profit information for Lyft. Now, I need to find the profit information for Uber in 2021 to make a comparison.
Action: uber_10k
Action Input: {'input': "What was Uber's profit in 2021?"}
[0m[1;3;34mObservation: Uber reported a net loss attributable to the company of $496 million in 2021.
[0m> Running step 2e13f8ed-935d-4e70-a18c-c51f2dc4907c. Step input: None
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's l

# Audio process with Whisper

I'm gonna try and get a transcript from an ai model and read that accordingly.

In [8]:
from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)

file_path = "./realtime-audio/can-you-help-me-find-a-bathroom.mp3"

with open(file_path, "rb") as audio_file:
    response = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file,
        language="en",
        response_format="text"
    )

if response:
    print("Transcription:", response)


Transcription: Hey AI, I'm looking for a bathroom but I can't find one. Can you find one for me on Google Maps? Thank you.

