In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index import ServiceContext

In [42]:
from llama_index.llms import OpenAI
llm = OpenAI(model="gpt-4-1106-preview")
llm = OpenAI(model="gpt-4")

In [43]:
# Using the LlamaDebugHandler to print the trace of the sub questions
# captured by the SUB_QUESTION callback event type
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
service_context = ServiceContext.from_defaults(
    callback_manager=callback_manager,
    llm=llm
)

In [20]:
def file_metadata_builder(file_path):
    file_name = file_path.split("/")[-1]
    splits = file_name.split(".")[0].split("_")
    tick = splits[0]
    category = splits[1]
    description=" ".join(splits[2:])

    return {
        "ticker": tick,
        "category": category,
        "description": description,
        "filename": file_name
    }

def vector_query_engine_from_dir(input_dir, service_context):
        
    
    docs = SimpleDirectoryReader(
        input_dir,
        file_metadata=file_metadata_builder
    ).load_data() 
    return docs, VectorStoreIndex.from_documents(
        docs, use_async=True, service_context=service_context
    ).as_query_engine(similarity_top_k=7)
    

In [34]:
TICKER = "TSLA"

cash_docs, cash_statement_qe = vector_query_engine_from_dir(input_dir=f"financial_data/csv/{TICKER}/cash_statement/", service_context=service_context)
earnings_docs, earnings_qe = vector_query_engine_from_dir(input_dir=f"financial_data/csv/{TICKER}/earnings/", service_context=service_context)
ratio_docs, ratios_qe = vector_query_engine_from_dir(input_dir=f"financial_data/csv/{TICKER}/ratios/", service_context=service_context)

**********
Trace: index_construction
    |_node_parsing ->  0.008236 seconds
      |_chunking ->  0.00022 seconds
      |_chunking ->  0.000103 seconds
      |_chunking ->  9.4e-05 seconds
      |_chunking ->  6.9e-05 seconds
      |_chunking ->  8.3e-05 seconds
      |_chunking ->  8.6e-05 seconds
      |_chunking ->  8.3e-05 seconds
      |_chunking ->  7.8e-05 seconds
      |_chunking ->  7.5e-05 seconds
      |_chunking ->  8.4e-05 seconds
      |_chunking ->  7.5e-05 seconds
      |_chunking ->  9.5e-05 seconds
      |_chunking ->  7.5e-05 seconds
      |_chunking ->  8.9e-05 seconds
      |_chunking ->  7.2e-05 seconds
      |_chunking ->  0.000102 seconds
      |_chunking ->  9e-05 seconds
      |_chunking ->  0.000229 seconds
      |_chunking ->  8e-05 seconds
      |_chunking ->  8.5e-05 seconds
      |_chunking ->  9.1e-05 seconds
      |_chunking ->  0.00023 seconds
      |_chunking ->  9.6e-05 seconds
      |_chunking ->  8.5e-05 seconds
      |_chunking ->  7.9e-05 seconds

In [35]:
from llama_index.tools import BaseTool, FunctionTool


# TODO: Automate this

def list_ratios() -> list[str]:
    """
    List all possible ratios to choose from.
    """
    output = []
    for d in ratio_docs:
        output.append(d.metadata["description"]) 
    return output

def list_earnings() -> list[str]:
    """
    List all possible earnings data to choose from.
    """
    output = []
    for d in earnings_docs:
        output.append(d.metadata["description"]) 
    return output

list_ratios_tool = FunctionTool.from_defaults(fn=list_ratios)
list_earnings_tool = FunctionTool.from_defaults(fn=list_earnings)

function_tools = [list_ratios_tool, list_earnings_tool]

In [36]:
# Let's define the individual tools that we'll use in order to retrieve specific data
individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=cash_statement_qe,
        metadata=ToolMetadata(
            name=f"vector_index_{TICKER}_cash_statement",
            description=f"Data about {TICKER} cash flow statements. Use this to lookup anything about {TICKER}'s cash flow statements."
        )
    ),
    QueryEngineTool(
        query_engine=earnings_qe,
        metadata=ToolMetadata(
            name=f"vector_index_{TICKER}_earnings",
            description=f"Data about {TICKER} earnings. Use this to lookup anything about {TICKER}'s earnings."
        )
    ),
    QueryEngineTool(
        query_engine=ratios_qe,
        metadata=ToolMetadata(
            name=f"vector_index_{TICKER}_ratios",
            description=f"Ratio information for {TICKER}. Use this for ratio lookups. Data about {TICKER}'s calculated ratios."
        )
    )
]

In [37]:
individual_query_engine_tools

[<llama_index.tools.query_engine.QueryEngineTool at 0x16ad9fdc0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x16ad9f850>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x16ad9f730>]

In [38]:
# This is the main query engine that our agent will be able to use (that can call subqueries).
query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
    service_context=service_context
)

In [39]:
# We now need to wrap this in another query engine tool that our agent can use as a "high-level" agent
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine",
        description=f"Useful when answering queries about {TICKER}."
    )
)

In [40]:
# We now need a list of all tools to pass through to the agent
tools = individual_query_engine_tools + [query_engine_tool] + function_tools
tools

[<llama_index.tools.query_engine.QueryEngineTool at 0x16ad9fdc0>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x16ad9f850>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x16ad9f730>,
 <llama_index.tools.query_engine.QueryEngineTool at 0x16a7c6560>,
 <llama_index.tools.function_tool.FunctionTool at 0x16ab8a950>,
 <llama_index.tools.function_tool.FunctionTool at 0x16a883910>]

In [41]:
# Now we can create the agent
from llama_index.agent import ReActAgent
from llama_index.llms import OpenAI

In [42]:
llm = OpenAI("gpt-4-1106-preview")
agent = ReActAgent.from_tools(tools, verbose=True)

In [43]:
agent.reset()

In [45]:
response = agent.chat(
    """
    You are a financial and investment analyst. What can you tell me about TSLA's cash flow from the data?
    """
)

print('=======')
print(response)

[1;3;38;5;200mThought: I can use the tool vector_index_TSLA_cash_statement to get information about TSLA's cash flow statements.
Action: vector_index_TSLA_cash_statement
Action Input: {'input': 'cash_flow'}
[0m[1;3;34mObservation: The cash flow information for TSLA over several years is categorized into different types of cash flows as follows:

Net Cash Flow from Financing Activities:
- 2018: $573,755,000
- 2019: $1,529,000,000
- 2020: $9,973,000,000
- 2021: -$5,203,000,000
- 2022: -$3,527,000,000

Net Cash Flow from Investing Activities:
- 2018: -$2,337,428,000
- 2019: -$1,436,000,000
- 2020: -$3,132,000,000
- 2021: -$7,868,000,000
- 2022: -$11,973,000,000

Operating Cash Flow:
- 2018: $2,097,802,000
- 2019: $2,405,000,000
- 2020: $5,943,000,000
- 2021: $11,497,000,000
- 2022: $14,724,000,000

Net Cash Flow from Operating Activities:
- 2018: $2,097,802,000
- 2019: $2,405,000,000
- 2020: $5,943,000,000
- 2021: $11,497,000,000
- 2022: $14,724,000,000

Free Cash Flow:
- 2018: -$221,7

In [46]:
response = agent.chat(
    """
    What can you tell me about TSLA's ratios from the data? 
    """
)
print(response)

[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: list_ratios
Action Input: {}
[0m[1;3;34mObservation: ['asset turnover', 'calendarYear', 'capital expenditure coverage ratio', 'cash conversion cycle', 'cash flow coverage ratios', 'cash flow to debt ratio', 'cash per share', 'cash ratio', 'company equity multiplier', 'current ratio', 'days of inventory outstanding', 'days of payables outstanding', 'days of sales outstanding', 'debt equity ratio', 'debt ratio', 'dividend paid and capex coverage ratio', 'dividend payout ratio', 'dividend yield', 'ebit per revenue', 'ebt per ebit', 'effective tax rate', 'enterprise value multiple', 'fixed asset turnover', 'free cash flow operating cash flow ratio', 'free cash flow per share', 'gross profit margin', 'interest coverage', 'inventory turnover', 'long term debt to capitalization', 'net income per ebt', 'net profit margin', 'operating cash flow per share', 'operating cash flow sales ratio', 'operating cycle',

In [47]:
response = agent.chat(
    """
    What about TSLA's margin? How large is it? Is it increasing over time?
    """
)
print(response)

[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: list_ratios
Action Input: {}
[0m[1;3;34mObservation: ['asset turnover', 'calendarYear', 'capital expenditure coverage ratio', 'cash conversion cycle', 'cash flow coverage ratios', 'cash flow to debt ratio', 'cash per share', 'cash ratio', 'company equity multiplier', 'current ratio', 'days of inventory outstanding', 'days of payables outstanding', 'days of sales outstanding', 'debt equity ratio', 'debt ratio', 'dividend paid and capex coverage ratio', 'dividend payout ratio', 'dividend yield', 'ebit per revenue', 'ebt per ebit', 'effective tax rate', 'enterprise value multiple', 'fixed asset turnover', 'free cash flow operating cash flow ratio', 'free cash flow per share', 'gross profit margin', 'interest coverage', 'inventory turnover', 'long term debt to capitalization', 'net income per ebt', 'net profit margin', 'operating cash flow per share', 'operating cash flow sales ratio', 'operating cycle',

In [49]:
response = agent.chat(
    """
    Is TLSA profitable? How profitable? Is the margin growing or decreasing?
    """
)
print(response)

[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: vector_index_TSLA_earnings
Action Input: {'input': 'net_income'}
[0m[1;3;34mObservation: 2018-12-31, -1062582000.0
2019-12-31, -775000000.0
2020-12-31, 862000000.0
2021-12-31, 5644000000.0
2022-12-31, 12587000000.0
[0m[1;3;38;5;200mThought: I can answer without using any more tools.
Response: TSLA has been profitable in recent years. The net income for TSLA was negative in 2018 and 2019, but turned positive in 2020 with a net income of $862 million. The profitability further increased in 2021 with a net income of $5.644 billion and in 2022 with a net income of $12.587 billion. Therefore, TSLA has shown significant profitability growth over the years.
[0mTSLA has been profitable in recent years. The net income for TSLA was negative in 2018 and 2019, but turned positive in 2020 with a net income of $862 million. The profitability further increased in 2021 with a net income of $5.644 billion and in 2

In [50]:
response = agent.chat(
    """
    Does TSLA have debt? How much? Is it expensive for the company to finance?
    """
)
print(response)

[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: vector_index_TSLA_cash_statement
Action Input: {'input': 'debt'}
[0m[1;3;34mObservation: The debt repayment figures for the specified periods are as follows:

- On December 31, 2018, the debt repayment amount was $6,087,029,000.
- On December 31, 2019, the debt repayment amount was $389,000,000.
- On December 31, 2020, the debt repayment amount was $12,201,000,000.
- On December 31, 2021, the debt repayment amount was $14,615,000,000.
- On December 31, 2022, the debt repayment amount was $3,866,000,000.
[0m[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: vector_index_TSLA_ratios
Action Input: {'input': 'interest_coverage_ratio'}
[0m[1;3;34mObservation: 29.062039155860667
[0m[1;3;38;5;200mThought: I can answer without using any more tools.
Response: Yes, TSLA has debt. The debt repayment amounts for TSLA have varied over the years, with the highest repayment am

In [71]:
response = agent.chat(
    """
    From the data available to you via function calling, what financial risks are there for TSLA?
    """
)
print(response)

[1;3;38;5;200mThought: I can use the sub_question_query_engine tool to help me analyze the financial risks for TSLA.
Action: sub_question_query_engine
Action Input: {'input': 'What are the financial risks for TSLA?'}
[0mGenerated 6 sub questions.
[1;3;38;2;237;90;200m[vector_index_TSLA_ratios] Q: What is the current debt-to-equity ratio for TSLA?
[0m[1;3;38;2;90;149;237m[vector_index_TSLA_cash_statement] Q: What are the trends in TSLA's operating cash flow over the past five years?
[0m[1;3;38;2;11;159;203m[vector_index_TSLA_earnings] Q: Has TSLA's earnings been volatile over the past few quarters?
[0m[1;3;38;2;155;135;227m[vector_index_TSLA_ratios] Q: What is the interest coverage ratio for TSLA?
[0m[1;3;38;2;237;90;200m[vector_index_TSLA_cash_statement] Q: What are the major components of TSLA's cash outflows?
[0m[1;3;38;2;90;149;237m[vector_index_TSLA_earnings] Q: How does TSLA's earnings compare to its debt obligations?
[0m[1;3;38;2;155;135;227m[vector_index_TSLA_rati

In [1]:
response = query_engine.query(
    """
    Summarize all of your previous findings above in a neat, succint bullet-point list.
    """
)

print("=======")
print(response)

NameError: name 'query_engine' is not defined

In [58]:
print(response)

TSLA has $7,552,000,000 of free cash flow on hand for the year ending December 31, 2022. This amount is growing, as evidenced by the consistent upward trend in free cash flow over the past four quarters, increasing from -$221,714,000 in 2018 to $968,000,000 in 2019, $2,701,000,000 in 2020, $3,483,000,000 in 2021, and then to $7,552,000,000 in 2022.
