# COA RUNDOWN

In [1]:

import os

os.environ["OPENAI_API_KEY"] = ""

import nest_asyncio

nest_asyncio.apply()

In [2]:
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

Settings.embed_model = OpenAIEmbedding(
    model="text-embedding-3-small", embed_batch_size=256
)
Settings.llm = OpenAI(model="gpt-4-turbo", temperature=0.1)

In [3]:
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'

--2024-08-20 17:47:23--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1880483 (1.8M) [application/octet-stream]
Saving to: ‘data/10k/uber_2021.pdf’


2024-08-20 17:47:23 (41.1 MB/s) - ‘data/10k/uber_2021.pdf’ saved [1880483/1880483]

--2024-08-20 17:47:23--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1440303 (1.4M) [appl

In [4]:
from llama_index.core import StorageContext, load_index_from_storage

try:
    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/lyft"
    )
    lyft_index = load_index_from_storage(storage_context)

    storage_context = StorageContext.from_defaults(
        persist_dir="./storage/uber"
    )
    uber_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

In [5]:
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

# (OPTIONAL) -- Use LlamaParse for loading PDF documents
file_extractor = {
    ".pdf": LlamaParse(
        result_type="markdown",
        api_key="llx-...",
    )
}

if not index_loaded:
    # load data
    lyft_docs = SimpleDirectoryReader(
        input_files=["./data/10k/lyft_2021.pdf"],
        # file_extractor=file_extractor,
    ).load_data()
    uber_docs = SimpleDirectoryReader(
        input_files=["./data/10k/uber_2021.pdf"],
        # file_extractor=file_extractor,
    ).load_data()

    # build index
    lyft_index = VectorStoreIndex.from_documents(lyft_docs)
    uber_index = VectorStoreIndex.from_documents(uber_docs)

    # persist index
    lyft_index.storage_context.persist(persist_dir="./storage/lyft")
    uber_index.storage_context.persist(persist_dir="./storage/uber")

In [6]:
from llama_index.core.tools import QueryEngineTool

lyft_engine = lyft_index.as_query_engine(similarity_top_k=2)
uber_engine = uber_index.as_query_engine(similarity_top_k=2)

query_engine_tools = [
    QueryEngineTool.from_defaults(
        query_engine=lyft_engine,
        name="lyft_10k",
        description=(
            "Provides information about Lyft financials for year 2021. "
            "Use a detailed plain text question as input to the tool."
        ),
    ),
    QueryEngineTool.from_defaults(
        query_engine=uber_engine,
        name="uber_10k",
        description=(
            "Provides information about Uber financials for year 2021. "
            "Use a detailed plain text question as input to the tool."
        ),
    ),
]

In [11]:
# needs llama_index-packs-agents-coa
from llama_index.packs.agents_coa import CoAAgentPack

pack = CoAAgentPack(tools=query_engine_tools, llm=Settings.llm)

In [12]:
response = pack.run("How did Ubers revenue growth compare to Lyfts in 2021?")

==== Available Parsed Functions ====
def lyft_10k(input: string):
   """Provides information about Lyft financials for year 2021. Use a detailed plain text question as input to the tool."""
    ...
def uber_10k(input: string):
   """Provides information about Uber financials for year 2021. Use a detailed plain text question as input to the tool."""
    ...
==== Generated Chain of Abstraction ====
To compare Uber's revenue growth to Lyft's in 2021, we need to obtain the revenue growth figures for both companies for that year.

1. Retrieve Uber's revenue growth for 2021 by querying the Uber financial tool with a specific question about revenue growth:
   [FUNC uber_10k("What was Uber's revenue growth in 2021?") = y1]

2. Retrieve Lyft's revenue growth for 2021 by querying the Lyft financial tool with a similar question:
   [FUNC lyft_10k("What was Lyft's revenue growth in 2021?") = y2]

3. Compare the outputs y1 and y2 to determine which company had higher revenue growth in 2021. This co

In [13]:
print(str(response))

In 2021, Uber's revenue growth was reported at 57%. To compare this with Lyft's growth, we calculate Lyft's percentage increase from their 2020 revenue of $2,364,681,000 to their 2021 revenue of $3,208,323,000. This calculation [(3,208,323,000 - 2,364,681,000) / 2,364,681,000 * 100] results in approximately a 35.7% increase in revenue for Lyft. Therefore, Uber's revenue growth of 57% was higher than Lyft's growth of about 35.7% in 2021.


# COA Breakdown

In [54]:
import re
from collections import defaultdict
from typing import Dict, Tuple
import networkx as nx
import json


In [55]:
reasoning_plans = """To compare Uber's revenue growth to Lyft's in 2021, we need to obtain the revenue growth figures for both companies for that year.

1. Retrieve Uber's revenue growth for 2021 by querying the Uber financial tool with a specific question about revenue growth:
   - [FUNC uber_10k("What was Uber's revenue growth in 2021?") = y1]

2. Retrieve Lyft's revenue growth for 2021 by querying the Lyft financial tool with a similar question about revenue growth:
   - [FUNC lyft_10k("What was Lyft's revenue growth in 2021?") = y2]

3. Compare the revenue growth figures obtained (y1 and y2) to determine which company had higher growth in 2021. This comparison will be done by the reader after the function calls have been executed."""

In [56]:
placeholders = set()
for match in re.finditer(r"\[FUNC (\w+)\((.*?)\) = (\w+)\]", reasoning_plans):
    placeholders.add(match.group(3))

print(placeholders)

{'y1', 'y2'}


In [57]:
func_calls = re.findall(r"\[FUNC (\w+)\((.*?)\) = (\w+)\]", reasoning_plans)
print(func_calls)

[('uber_10k', '"What was Uber\'s revenue growth in 2021?"', 'y1'), ('lyft_10k', '"What was Lyft\'s revenue growth in 2021?"', 'y2')]


In [58]:
graph = []
for func_name, inputs, output in func_calls:
    inputs = json.loads("[" + inputs + "]")
    graph.append((output, func_name, inputs))
    # graph.add_node(output, func_name=func_name, inputs=inputs)
    for inp in inputs:
        graph.append((inp, output))
        # graph.add_edge(inp, output)

In [59]:
graph

[('y1', 'uber_10k', ["What was Uber's revenue growth in 2021?"]),
 ("What was Uber's revenue growth in 2021?", 'y1'),
 ('y2', 'lyft_10k', ["What was Lyft's revenue growth in 2021?"]),
 ("What was Lyft's revenue growth in 2021?", 'y2')]

In [60]:
# Create a dependency graph
graph = nx.DiGraph()
for func_name, inputs, output in func_calls:
    inputs = json.loads("[" + inputs + "]")
    graph.add_node(output, func_name=func_name, inputs=inputs)
    for inp in inputs:
        graph.add_edge(inp, output)

In [61]:

# Find the execution levels
execution_levels = defaultdict(list)
for node in nx.topological_sort(graph):
    level = (
        max(
            [execution_levels[pred] for pred in graph.predecessors(node)],
            default=-1,
        )
        + 1
    )
    execution_levels[node] = level

In [62]:
execution_levels

defaultdict(list,
            {"What was Uber's revenue growth in 2021?": 0,
             "What was Lyft's revenue growth in 2021?": 0,
             'y1': 1,
             'y2': 1})