In [1]:
from llama_index import SimpleDirectoryReader

  from .autonotebook import tqdm as notebook_tqdm


In [94]:
from llama_index import LLMPredictor, ServiceContext
from langchain import OpenAI
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=512))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

# Lyft

In [4]:
reader = SimpleDirectoryReader(input_files=["../data/10k/lyft_2021.pdf"])

In [5]:
docs = reader.load_data()

In [8]:
from llama_index import GPTVectorStoreIndex

In [10]:
index = GPTVectorStoreIndex.from_documents(docs)

In [11]:
query_engine = index.as_query_engine()

In [39]:
response = query_engine.query('what is the revenue growth in the last year, show me the reference page number')

In [40]:
from llama_index.response.pprint_utils import pprint_response

In [41]:
pprint_response(response, show_source=True)

Final Response: The revenue growth in the last year is 36%, as
referenced on page 63.
______________________________________________________________________
Source Node 1/2
Document ID: e100795b-0e47-4516-b613-47838060fced
Similarity: 0.8044517437695049
Text: page_label: 63  Results of OperationsThe following table summar
izes our historical consolidated statements of operations data:Year
Ended December 31, 2021 2020 2019 (in thousands) Revenue $ 3,208,323 $
2,364,681 $ 3,615,960 Costs and expenses Cost of revenue 1,649,532
1,447,516 2,176,469 Operations and support 402,233 453,963 636,116
Research a...
______________________________________________________________________
Source Node 2/2
Document ID: 4a47a6b4-c6af-40a2-8c55-4b324687c69f
Similarity: 0.8030018672359885
Text: page_label: 19  changing industries. If our assumptions
regarding these risks and uncertainties, which we use to plan and
operate ourbusiness,  are incorrect or change, or if we do not address
these risks successful

# Uber

In [44]:
reader = SimpleDirectoryReader(input_files=["../data/10k/uber_2021.pdf"])
uber_docs = reader.load_data()

In [45]:
uber_index = GPTVectorStoreIndex.from_documents(uber_docs)

In [48]:
query_engine = uber_index.as_query_engine()

In [49]:
response = query_engine.query("what is the revenue growth in the last year, show me the reference page number")

In [50]:
pprint_response(response, show_source=True)

Final Response: The revenue growth in the last year was 57%, as
referenced on page 57.
______________________________________________________________________
Source Node 1/2
Document ID: d27df4a8-52a0-42e5-9236-54578bd527de
Similarity: 0.8059365413378133
Text: page_label: 57  The following table sets forth the components of
our consolidated statements of operations for each of the periods
presented as a percentage of revenue : Year Ended December 31, 2020
2021 Revenue 100 %100 %Costs and expenses Cost of revenue, exclusive
of dep reciation and amortization shown separately below46 %54
%Operations and ...
______________________________________________________________________
Source Node 2/2
Document ID: 689b7af6-964b-4cac-814b-0594848210c2
Similarity: 0.7980283554264312
Text: page_label: 60  Provision for (Benefit from) Income TaxesYear
Ended December 31, 2020 to 2021 % Change (In millions, except
percentages) 2020 2021 Provision for (benefit fro m) income taxes$
(192) $ (492) (156) % E

In [51]:
response = query_engine.query("what's key factor for revenue growth")

In [52]:
pprint_response(response)

Final Response: Growing supply and demand on the platform, increasing
existing platform users' activity on the platform, continuing to
introduce the platform to new markets, providing high-quality support
to Drivers, consumers, merchants, shippers, and carriers, expanding
the business and increasing market share and category position,
competing with the products and offerings of, and pricing and
incentives offered by, competitors, developing new products,
offerings, and technologies, identifying and acquiring or investing in
businesses, products, offerings, or technologies that could complement
or expand the platform, penetrating suburban and rural areas and
increasing the number of rides taken on the platform outside
metropolitan areas, reducing the costs of the Mobility offering to
better compete with personal vehicle ownership and usage and other
low-cost alternatives like public transportation, maintaining existing
local regulations in key markets where the company operates, enteri

In [None]:
print(uber_docs[2].text)

In [95]:
prompt_template = """
Extract the table of contents given the following parsed PDF page

<PDF Input>
{pdf_text}

Return response formatted as Markdown json block, similar to 
```json
{{
    "Item 1. Business": 4,
    "Item 1A. Risk Factors": 11,
}}
"""

response = service_context.llm_predictor.llm.predict


In [98]:
from langchain import OpenAI

In [99]:
llm = OpenAI()

In [None]:
llm.predict(prompt_template.format(pdf_text=uber_docs[2].text))