# Earnings Call Transcript Analysis

## Setup

In [2]:
# %%writefile requirements.txt
# llama-index
# llama-index-llms-huggingface
# llama-index-embeddings-fastembed
# fastembed
# Unstructured[md]
# chromadb
# llama-index-vector-stores-chroma
# llama-index-llms-groq
# einops
# accelerate
# sentence-transformers
# llama-index-llms-mistralai
# llama-index-llms-openai

In [3]:
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()

In [4]:
import nest_asyncio
nest_asyncio.apply()

## 1. Setup an agent over 3 papers

We can create vector search and summary tools respectively for each of the 3 papers, which then result in 6 tools in total. These tools can then be made available to the agent worker. 

In [5]:
import os
root_path = "transcripts"
file_name = []
file_path = []
for file in os.listdir(root_path):
  if file.endswith(".pdf"):
    file_name.append(file.split(".")[0])
    file_path.append(os.path.join(root_path,file))
#
print(file_name)
print(file_path)

['shop', 'low']
['transcripts/shop.pdf', 'transcripts/low.pdf']


In [6]:
from utils import get_doc_tools
from pathlib import Path

papers_to_tools_dict = {}
for name,filename in zip(file_name,file_path):
  vector_tool,summary_tool = get_doc_tools(filename,name)
  papers_to_tools_dict[name] = [vector_tool,summary_tool]

In [17]:
initial_tools = [t for f in file_name for t in papers_to_tools_dict[f]]

In [18]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")

In [19]:
len(initial_tools)

4

In [20]:
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

system_message = """ \
You are an agent designed to answer queries over a set of given earnings call transcripts reports.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\
"""

agent_worker = FunctionCallingAgentWorker.from_tools(
    initial_tools, 
    llm=llm, 
    system_prompt=system_message,
    verbose=True
)
agent = AgentRunner(agent_worker)

In [25]:
# The agent can then choose the appropriate tools for each steps 
response = agent.query(
    "Tell me about how shopify uses AI in their business (please use original words and as detailed as possible),"
    "and then tell me their initial results on using AI"
)

Added user message to memory: Tell me about how shopify uses AI in their business (please use original words and as detailed as possible),and then tell me their initial results on using AI
=== Calling Function ===
Calling function: summary_tool_shop with args: {"input": "Shopify's use of AI in their business and their initial results"}
=== Function Output ===
Shopify has utilized AI in their business to optimize marketing strategies, enhance customer acquisition, and improve customer engagement. Their AI-driven marketing systems have enabled them to maximize returns on existing channels and explore new ones effectively. By leveraging data analytics, Shopify has identified successful channels and capitalized on them, leading to increased merchant acquisition. The strategic use of AI, particularly in their Shop App, has driven traffic, engagement, and ultimately growth for the company.
=== Calling Function ===
Calling function: vector_tool_shop with args: {"query": "initial results of us

In [None]:
# we can chat/query about the LLM's reasoning process
response = agent.chat(
    "Tell me about the tools you leverage to answer the above question."
)

## 2. Setup an agent over more files

In [7]:
from utils import get_doc_tools

paper_to_tools_dict = {}
for name,filename in zip(file_name,file_path):
  print(f"Getting tools for paper: {filename}")
  vector_tool, summary_tool = get_doc_tools(filename,name)
  paper_to_tools_dict[name] = [vector_tool, summary_tool]

Getting tools for paper: transcripts/shop.pdf
Getting tools for paper: transcripts/low.pdf


### Extend the Agent with Tool Retrieval

In [8]:
all_tools = [t for f in file_name for t in papers_to_tools_dict[f]]

In [9]:
# define an "object" index over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex

obj_index = ObjectIndex.from_objects(
    all_tools,
    index_cls=VectorStoreIndex,
)

In [10]:
# define the "retriever" over the index with specified retrieval method
obj_retriever = obj_index.as_retriever(similarity_top_k=3)

In [12]:
tools = obj_retriever.retrieve(
    "Tell me about the eval dataset used in self rag and crag"
)

In [13]:
# check for the top 3 tools selected
tools[0].metadata

ToolMetadata(description='Use ONLY IF you want to get a holistic summary of MetaGPT. Do NOT use if you have specific questions over MetaGPT.', name='summary_tool_shop', fn_schema=<class 'llama_index.core.tools.types.DefaultToolFnSchema'>, return_direct=False)

In [14]:
system_message = """ \
You are an agent designed to answer queries over a set of given earnings call transcripts reports.
Please always use the tools provided to answer a question. Do not rely on prior knowledge.\
"""

In [15]:
from llama_index.core.agent import FunctionCallingAgentWorker, AgentRunner

# Define the agentWorker and agentRunner
agent_worker = FunctionCallingAgentWorker.from_tools(
    tool_retriever=obj_retriever,
    llm=llm, 
    system_prompt=system_message,
    verbose=True
)

# Initialize the AgentRunner with the agent worker
agent = AgentRunner(agent_worker)

NameError: name 'llm' is not defined

In [None]:
response = agent.query(
    "Tell me about the evaluation dataset used "
    "in self RAG against the dataset used in CRAG"
)
print(str(response))