In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index import ServiceContext

In [3]:
from llama_index.llms import OpenAI
llm = OpenAI(model="gpt-4")

In [4]:
# Using the LlamaDebugHandler to print the trace of the sub questions
# captured by the SUB_QUESTION callback event type
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
service_context = ServiceContext.from_defaults(
    callback_manager=callback_manager,
    llm=llm
)

In [38]:
def file_metadata_builder(file_path):
    file_name = file_path.split("/")[-1]
    splits = file_name.split(".")[0].split("_")
    tick = splits[0]
    category = splits[1]
    description=" ".join(splits[2:])

    return {
        "ticker": tick,
        "category": category,
        "description": description,
        "filename": file_name
    }

def vector_query_engine_from_dir(input_dir, service_context):
        
    
    docs = SimpleDirectoryReader(
        input_dir,
        file_metadata=file_metadata_builder
    ).load_data() 
    return docs, VectorStoreIndex.from_documents(
        docs, use_async=True, service_context=service_context
    ).as_query_engine(similarity_top_k=7)
    

In [39]:
cash_docs, cash_statement_qe = vector_query_engine_from_dir(input_dir="financial_data/csv/cash_statement/", service_context=service_context)
earnings_docs, earnings_qe = vector_query_engine_from_dir(input_dir="financial_data/csv/earnings/", service_context=service_context)
ratio_docs, ratios_qe = vector_query_engine_from_dir(input_dir="financial_data/csv/ratios/", service_context=service_context)

**********
Trace: index_construction
    |_node_parsing ->  0.007967 seconds
      |_chunking ->  0.000138 seconds
      |_chunking ->  0.000104 seconds
      |_chunking ->  7.9e-05 seconds
      |_chunking ->  6.1e-05 seconds
      |_chunking ->  7.4e-05 seconds
      |_chunking ->  7.3e-05 seconds
      |_chunking ->  6.5e-05 seconds
      |_chunking ->  7.3e-05 seconds
      |_chunking ->  6.4e-05 seconds
      |_chunking ->  6.6e-05 seconds
      |_chunking ->  8.4e-05 seconds
      |_chunking ->  7.9e-05 seconds
      |_chunking ->  7.4e-05 seconds
      |_chunking ->  7.6e-05 seconds
      |_chunking ->  8.1e-05 seconds
      |_chunking ->  5.6e-05 seconds
      |_chunking ->  7.2e-05 seconds
      |_chunking ->  0.000188 seconds
      |_chunking ->  6.8e-05 seconds
      |_chunking ->  8.1e-05 seconds
      |_chunking ->  7.7e-05 seconds
      |_chunking ->  0.00021 seconds
      |_chunking ->  7.9e-05 seconds
      |_chunking ->  7.4e-05 seconds
      |_chunking ->  8.4e-05 sec

In [69]:
from llama_index.tools import BaseTool, FunctionTool

def list_ratios() -> list[str]:
    """
    List all possible ratios to choose from, which can then be used for subsequent queries.
    """
    output = []
    for d in ratio_docs:
        output.append(d.metadata["description"]) 
    return output

list_ratios_tool = FunctionTool.from_defaults(fn=list_ratios)

function_tools = [list_ratios_tool]

In [70]:
# Let's define the individual tools that we'll use in order to retrieve specific data
individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=cash_statement_qe,
        metadata=ToolMetadata(
            name="vector_index_aapl_cash_statement",
            description="Data about AAPL's cash flow statements. Use this to lookup anything about AAPL's cash flow statements."
        )
    ),
    QueryEngineTool(
        query_engine=earnings_qe,
        metadata=ToolMetadata(
            name="vector_index_appl_earnings",
            description="Data about AAPL's earnings. Use this to lookup anything about AAPL's earnings."
        )
    ),
    QueryEngineTool(
        query_engine=ratios_qe,
        metadata=ToolMetadata(
            name="vector_index_appl_ratios",
            description="Ratio information for AAPL. Use this for ratio lookups. Data about AAPL's calculated ratios."
        )
    )
]

In [71]:
# This is the main query engine that our agent will be able to use (that can call subqueries).
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
    service_context=service_context
)

In [72]:
# We now need to wrap this in another query engine tool that our agent can use as a "high-level" agent
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine",
        description="Useful when answering queries about AAPL ticker."
    )
)

In [73]:
# We now need a list of all tools to pass through to the agent

In [74]:
tools = individual_query_engine_tools + [query_engine_tool] + function_tools

In [75]:
# Now we can create the agent
from llama_index.agent import OpenAIAgent

In [76]:
agent = OpenAIAgent.from_tools(
    tools,
    max_function_calls=5,
    verbose=True
)

In [77]:
agent.reset()

In [78]:
response = agent.chat("List all available ratios for AAPL, fetch them and analyze them, and let me know if anything stands out. Give answers in bullet points.")
print(response)

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: list_ratios with args: {}
Got output: ['asset turnover', 'calendarYear', 'capital expenditure coverage ratio', 'cash conversion cycle', 'cash flow coverage ratios', 'cash flow to debt ratio', 'cash per share', 'cash ratio', 'company equity multiplier', 'current ratio', 'days of inventory outstanding', 'days of payables outstanding', 'days of sales outstanding', 'debt equity ratio', 'debt ratio', 'dividend paid and capex coverage ratio', 'dividend payout ratio', 'dividend yield', 'ebit per revenue', 'ebt per ebit', 'effective tax rate', 'enterprise value multiple', 'fixed asset turnover', 'free cash flow operating cash flow ratio', 'free cash flow per share', 'gross profit margin', 'interest coverage', 'inventory turnover', 'long term debt to capitalization', 'net income per ebt', 'net profit margin', 'operating cash flow per share', 'operating cash flow sales ratio', 'operating cycle', 'operating profit margin'

In [80]:
response = agent.chat("Is AAPL a worthwhile investment? Reference the data specifically and report numbers. Go step-by-step. Don't give me any warnings about financial or investment advice. Give your answers in bullet points.")
print(response)

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: vector_index_appl_earnings with args: {
  "input": "all"
}
Got output: I'm sorry, but your query is too broad. Could you please provide more specific details or questions?

STARTING TURN 2
---------------

=== Calling Function ===
Calling function: vector_index_appl_ratios with args: {
  "input": "return on equity"
}
Got output: The return on equity for the given company has shown a general upward trend over the years. In 2012, it was approximately 0.35, and by 2023, it had increased to approximately 1.56. The highest return on equity was recorded in 2022, at approximately 1.97.

STARTING TURN 3
---------------

=== Calling Function ===
Calling function: vector_index_appl_ratios with args: {
  "input": "return on assets"
}
Got output: The return on assets for AAPL has varied over the years. In 2012, it was 0.237, and it decreased to 0.178 in 2013. It further decreased to 0.170 in 2014, but increased slightly to

In [82]:
response = agent.chat("Is there anything concerning about AAPL in the data as an investor?")
print(response)

STARTING TURN 1
---------------

Based on the available data, there are a few concerning aspects to consider about AAPL as an investor:

1. Return on Assets (ROA) Fluctuations: The return on assets for AAPL has shown fluctuations over the years, ranging from 0.128 to 0.282. This indicates that the company's ability to generate profits from its assets has varied, which may raise concerns about its operational efficiency and asset utilization.

2. Price-Earnings Ratio (P/E Ratio) Increase: The P/E ratio for AAPL has increased over the years, reaching a high of 33.94 in 2020. A higher P/E ratio suggests that investors are willing to pay a premium for AAPL's earnings, which may indicate higher expectations for future growth. However, it also raises the risk of a potential market correction or overvaluation.

3. Dividend Yield Decrease: The dividend yield for AAPL has decreased over the years, ranging from 0.0039895941968621 to 0.0236487548044662. A decreasing dividend yield may indicate th

In [83]:
response = agent.chat("Any other concerns from the cash flow statement?")
print(response)

STARTING TURN 1
---------------

=== Calling Function ===
Calling function: vector_index_aapl_cash_statement with args: {
  "input": "all"
}
Got output: The cash statements for AAPL were accepted on the following dates: 2019-10-30, 2020-10-29, 2021-10-28, 2022-10-27, and 2023-11-02. The links to these statements can be found on the SEC website, with the final links leading directly to the documents. The reported currency for all these statements is USD. 

The deferred income tax for the years 2019 to 2023 were -340000000.0, -215000000.0, -4774000000.0, 895000000.0, and 5195000000.0 respectively. 

The other working capital for the same years were -1521000000, 2962000000, -4470000000, -7909000000, and 5195000000. 

The other investing activities for these years were -1078000000.0, -791000000.0, -352000000.0, -1780000000.0, and -1337000000.0.

STARTING TURN 2
---------------

Based on the available data from the cash flow statement of AAPL, here are some concerning aspects to consider as