# 10K Analysis

In [418]:
from llama_index import SimpleDirectoryReader, LLMPredictor, ServiceContext, GPTVectorStoreIndex
from llama_index.response.pprint_utils import pprint_response
from langchain import OpenAI

In [419]:
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1, streaming=True))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

# Lyft

In [420]:
lyft_docs = SimpleDirectoryReader(input_files=["../data/10k/lyft_2021.pdf"]).load_data()

In [240]:
lyft_index = GPTVectorStoreIndex.from_documents(lyft_docs)

In [421]:
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)

In [451]:
response = lyft_engine.query('what is the revenue growth in the last year, show me the reference page number')

In [452]:
pprint_response(response, show_source=True)

Final Response: The revenue growth in the last year is 36%, as
referenced on page 63.
______________________________________________________________________
Source Node 1/3
Document ID: 6a1711e2-51be-46c1-8152-72361c192ed9
Similarity: 0.8045102189652867
Text: page_label: 63  Results of OperationsThe following table summar
izes our historical consolidated statements of operations data:Year
Ended December 31, 2021 2020 2019 (in thousands) Revenue $ 3,208,323 $
2,364,681 $ 3,615,960 Costs and expenses Cost of revenue 1,649,532
1,447,516 2,176,469 Operations and support 402,233 453,963 636,116
Research a...
______________________________________________________________________
Source Node 2/3
Document ID: c15baf48-6db4-4892-88ba-8be2a6f195a5
Similarity: 0.8030623987053549
Text: page_label: 19  changing industries. If our assumptions
regarding these risks and uncertainties, which we use to plan and
operate ourbusiness,  are incorrect or change, or if we do not address
these risks successful

# Uber

In [424]:
uber_docs = SimpleDirectoryReader(input_files=["../data/10k/uber_2021.pdf"]).load_data()

In [425]:
uber_index = GPTVectorStoreIndex.from_documents(uber_docs)

In [429]:
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

In [430]:
response = uber_engine.query("what is the revenue growth in the last year, show me the reference page number")

In [431]:
pprint_response(response, show_source=True)

Final Response: The revenue growth in the last year was 57%, as
referenced on page 57.
______________________________________________________________________
Source Node 1/3
Document ID: 0c8abeea-57cd-41bf-a94c-1c5d195bb3ae
Similarity: 0.8059365413378133
Text: page_label: 57  The following table sets forth the components of
our consolidated statements of operations for each of the periods
presented as a percentage of revenue : Year Ended December 31, 2020
2021 Revenue 100 %100 %Costs and expenses Cost of revenue, exclusive
of dep reciation and amortization shown separately below46 %54
%Operations and ...
______________________________________________________________________
Source Node 2/3
Document ID: 012bb7d8-36ca-42ee-8d9e-9d4754ce599f
Similarity: 0.7980283554264312
Text: page_label: 60  Provision for (Benefit from) Income TaxesYear
Ended December 31, 2020 to 2021 % Change (In millions, except
percentages) 2020 2021 Provision for (benefit fro m) income taxes$
(192) $ (492) (156) % E

# Compare and Contrast

In [149]:
from llama_index import ComposableGraph, GPTListIndex

In [471]:
graph = ComposableGraph.from_indices(
    GPTListIndex,
    children_indices=[lyft_index, uber_index],
    index_summaries=[
        "Provides information about Lyft financials for year 2021",
        "Provides information about Uber financials for year 2021",
    ]
)

In [472]:
from llama_index.query_engine.transform_query_engine import TransformQueryEngine
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform

# define decompose_transform
decompose_transform = DecomposeQueryTransform(verbose=True)

# define custom query engines
custom_query_engines = {}
for index in [lyft_index, uber_index]:
    query_engine = index.as_query_engine(service_context=service_context)
    query_engine = TransformQueryEngine(
        query_engine,
        query_transform=decompose_transform,
        transform_extra_info={'index_summary': index.index_struct.summary},
    )
    custom_query_engines[index.index_id] = query_engine

custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
    service_context=service_context,
    streaming=True,
)

In [473]:
# define graph
g_engine = graph.as_query_engine(
    custom_query_engines=custom_query_engines
)

In [489]:
response = g_engine.query('Compare Uber and Lyft revenue growth in 2021.')

[33;1m[1;3m> Current query: Compare Uber and Lyft revenue growth in 2021.
[0m[38;5;200m[1;3m> New query:  What is Lyft's revenue growth in 2021?
[0m[33;1m[1;3m> Current query: Compare Uber and Lyft revenue growth in 2021.
[0m[38;5;200m[1;3m> New query:  What is Lyft's revenue growth in 2021?
[0m[33;1m[1;3m> Current query: Compare Uber and Lyft revenue growth in 2021.
[0m[38;5;200m[1;3m> New query:  What is Uber's revenue growth in 2021?
[0m[33;1m[1;3m> Current query: Compare Uber and Lyft revenue growth in 2021.
[0m[38;5;200m[1;3m> New query:  What is Uber's revenue growth in 2021?
[0m

In [490]:
response.print_response_stream()


Uber's revenue growth in 2021 is higher than Lyft's revenue growth in 2021, at 57% compared to 38.2%.

# Appendix

## Using Agent

In [491]:
llm = OpenAI(temperature=0., max_tokens=-1)

In [497]:
from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool, LlamaToolkit

uber_config = IndexToolConfig(
    query_engine=uber_engine, 
    name=f"Uber 10K 2021",
    description=f"Provides information about Lyft financials for year 2021",
    tool_kwargs={"return_direct": False}
)

lyft_config = IndexToolConfig(
    query_engine=lyft_engine,
    name=f"Lyft 10K 2021",
    description=f"Provides information about Uber financials for year 2021",
    tool_kwargs={"return_direct": False}
)

In [498]:
toolkit = LlamaToolkit(
    index_configs=[uber_config, lyft_config],
)

In [499]:
from llama_index.langchain_helpers.agents import create_llama_agent

agent_chain = create_llama_agent(
    toolkit,
    llm,
    memory=memory,
    verbose=True
)

In [500]:
agent_chain.run(input="Compare Uber and Lyft revenue growth in 2021.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to look at the financials of both companies.
Action: Uber 10K 2021
Action Input: Revenue growth[0m
Observation: [36;1m[1;3min 2021 was primarily driven by what?

Revenue growth in 2021 was primarily driven by an increase in Delivery Gross Bookings of 71%, or 66% on a constant currency basis, due to an increase in food delivery orders and higher basket sizes as a result of stay-at-home order demand related to COVID-19, as well as continued expansion across U.S. and international markets. Additionally, Mobility Gross Bookings growth of 38%, or 36% on a constant currency basis, due to increases in Trip volumes as the business recovers from the impacts of COVID-19, contributed to the revenue growth.[0m
Thought:[32;1m[1;3m I need to compare this to Lyft's revenue growth.
Action: Lyft 10K 2021
Action Input: Revenue growth[0m
Observation: [33;1m[1;3m
Revenue increased $843.6 million, or 36%, in 2021 as compared to t

"Uber's revenue growth in 2021 was primarily driven by an increase in Delivery Gross Bookings of 71%, or 66% on a constant currency basis, while Lyft's revenue increased $843.6 million, or 36%, in 2021 as compared to the prior year, driven primarily by the significant increase in the number of Active Riders in 2021 as compared to the prior year."

## Playground 

In [533]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.query_engine.sub_question_query_engine import SubQuestionQueryEngine

ImportError: cannot import name 'ToolMetadata' from 'llama_index.tools' (/Users/suo/dev/llama_index/llama_index/tools/__init__.py)

In [523]:
prompt_template = """
Given the user question, and a list of tools, output a list of relevant sub-questions that when composed can help answer the full question:


# Example 1

<Tools>
```json
{{
    "uber 10k": "Provides information about Uber financials for year 2021"
    "lyft 10k": "Provides information about Lyft financials for year 2021"
}}

<User Question>
Compare and constrast the revenue growth and EBITDA of Uber and Lyft for year 2021

<Output>
```json
[
    {{
        "question": "What is the revenue growth of Uber",
        "tool": "uber 10k"
    }},
    {{
        "question": "What is the EbITDA of Uber",
        "tool": "uber 10k"
    }},
    {{
        "question": "What is the revenue growth of Lyft",
        "tool": "lyft 10k"
    }},
    {{
        "question": "What is the EbITDA of Lyft",
        "tool": "lyft 10k"
    }},
    
]
```

# Example 2
<Tools>
```json
{{
    "uber 10k": "Provides information about Uber financials for year 2021"
    "lyft 10k": "Provides information about Lyft financials for year 2021"
}}

<User Question>
{question}

<Output>
```
"""

In [530]:
response = llm.predict(prompt_template.format(question='Which customer segments and geographies grew the fastest?'))

In [531]:
print(response)

[
    {
        "question": "What customer segments grew the fastest for Uber",
        "tool": "uber 10k"
    },
    {
        "question": "What geographies grew the fastest for Uber",
        "tool": "uber 10k"
    },
    {
        "question": "What customer segments grew the fastest for Lyft",
        "tool": "lyft 10k"
    },
    {
        "question": "What geographies grew the fastest for Lyft",
        "tool": "lyft 10k"
    },
    
]


In [532]:
lyft_engine.query("What customer segments grew the fastest for Lyft")

Response(response="\nThe customer segments that grew the fastest for Lyft were riders and drivers. Riders are diverse and dynamic, representing all adult age groups and backgrounds, and drivers are active members of their communities. Both riders and drivers benefit from Lyft's technology, insurance, community standards, and support.", source_nodes=[NodeWithScore(node=Node(text=' demographic  shifts,  such  as  the  growingpercentage of the U.S. population \nthat is accustomed to on-demand services and has digital-first preferences.•\nGrow Our Share of Consumers’ Transportation Spend. Lyft’s  transportation network is designed to address a wide range of mobility needs. The Lyft networkspans\n rideshare, car rentals, bikes, scooters, transit and vehicle services. By integrating the fragmented transportation ecosystem, we are well positioned to deliverthe best holistic exper\nience to all of our riders and to capture significantly more of our market opportunity.•\nDeliver  Increasing  Va