In [35]:
import os

os.environ["OPENAI_API_KEY"] = "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"

import nest_asyncio

nest_asyncio.apply()

In [2]:
# set text wrapping
from IPython.display import HTML, display


def set_css():
    display(
        HTML(
            """
  
    pre {
        white-space: pre-wrap;
    }
  
  """
        )
    )


get_ipython().events.register("pre_run_cell", set_css)

In [34]:
from llama_index.readers.file import UnstructuredReader
from pathlib import Path

years = [2022, 2021, 2020, 2019]

loader = UnstructuredReader()
doc_set = {}
all_docs = []
for year in years:
    year_docs = loader.load_data(
        file=Path(f"../data/UBER/UBER_{year}.html"), split_documents=False
    )
    # insert year metadata into each year
    for d in year_docs:
        d.metadata = {"year": year}
    doc_set[year] = year_docs
    all_docs.extend(year_docs)

In [73]:
len(all_docs)

4

In [36]:
# initialize simple vector indices
# NOTE: don't run this cell if the indices are already loaded!
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

Settings.chunk_size = 512
Settings.chunk_overlap = 64
Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults()
    cur_index = VectorStoreIndex.from_documents(
        doc_set[year],
        storage_context=storage_context,
    )
    index_set[year] = cur_index
    storage_context.persist(persist_dir=f"./storage/{year}")

In [76]:
len(doc_set[2019])

1

In [None]:
# Load indices from disk
from llama_index.core import load_index_from_storage

index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults(
        persist_dir=f"./storage/{year}"
    )
    cur_index = load_index_from_storage(
        storage_context,
    )
    index_set[year] = cur_index

In [37]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=index_set[year].as_query_engine(),
        metadata=ToolMetadata(
            name=f"vector_index_{year}",
            description=(
                "useful for when you want to answer queries about the"
                f" {year} SEC 10-K for Uber"
            ),
        ),
    )
    for year in years
]

In [39]:
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
)

In [40]:
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine",
        description=(
            "useful for when you want to answer queries that require analyzing"
            " multiple SEC 10-K documents for Uber"
        ),
    ),
)

In [41]:
tools = individual_query_engine_tools + [query_engine_tool]

In [42]:
from llama_index.agent.openai import OpenAIAgent

agent = OpenAIAgent.from_tools(tools, verbose=True)

In [43]:
response = agent.chat(
    "What were some of the biggest risk factors in 2020 for Uber?"
)
print(str(response))

Added user message to memory: What were some of the biggest risk factors in 2020 for Uber?
=== Calling Function ===
Calling function: vector_index_2020 with args: {"input":"biggest risk factors"}
Got output: The biggest risk factors include the adverse effects of the COVID-19 pandemic on the business, potential reclassification of Drivers, intense competition in the industries, significant losses and uncertain path to profitability, challenges in attracting and retaining platform users, operational and compliance challenges, negative impact on brand reputation, difficulties in managing growth, safety incidents affecting platform users, risks associated with substantial investments in new offerings and technologies, and the uncertainty surrounding the ultimate impact of the pandemic on business operations and financial results.

Some of the biggest risk factors for Uber in 2020 included the adverse effects of the COVID-19 pandemic on the business, potential reclassification of Drivers, 

In [45]:
response = agent.chat(
    "What were some of the biggest risk factors in 2020 for Uber compare to 2019?"
)
print(str(response))

Added user message to memory: What were some of the biggest risk factors in 2020 for Uber compare to 2019?
=== Calling Function ===
Calling function: vector_index_2020 with args: {"input": "biggest risk factors"}
Got output: The biggest risk factors include the adverse effects of the COVID-19 pandemic on the business, potential reclassification of Drivers, intense competition in the industries, significant losses and uncertain path to profitability, challenges in maintaining a critical mass of platform users, operational, compliance, and cultural challenges, negative impact on brand reputation, difficulties in optimizing organizational structure and managing growth, safety incidents affecting platform users, risky investments in new offerings and technologies, and the uncertainty surrounding the pandemic's impact on business operations and financial results.

=== Calling Function ===
Calling function: vector_index_2019 with args: {"input": "biggest risk factors"}
Got output: The bigges

In [63]:
response = agent.chat(
    "What can you help me?"
)
print(str(response))

Added user message to memory: What can you help me?
I can assist you with a wide range of tasks and queries. Here are some examples of how I can help:

1. Providing information and insights on various topics.
2. Answering questions related to specific companies, industries, or financial data.
3. Conducting research on specific subjects or trends.
4. Summarizing and analyzing data from reports or documents.
5. Assisting with decision-making by providing relevant information.
6. Offering guidance on best practices or strategies in different areas.
7. Exploring and comparing data from different sources.
8. Generating reports or summaries based on specific criteria.

Feel free to ask me anything you need help with, and I'll do my best to assist you!


In [47]:
response = agent.chat(
    "Do you remember my previuous question?"
)
print(str(response))

Added user message to memory: Do you remember my previuous question?
Yes, your previous question was about the biggest risk factors for Uber in 2020 compared to 2019. If you have any more questions or need further assistance, feel free to let me know!


In [49]:
response = agent.chat(
    "Quelle est la température moyenne en France ce jour là?"
)
print(str(response))

Added user message to memory: Quelle est la température moyenne en France ce jour là?
Je suis désolé, mais je ne suis pas capable de fournir des informations en temps réel telles que la température actuelle en France. Pour obtenir des données météorologiques précises, je vous recommande de consulter un site Web météorologique fiable ou une application météo. Si vous avez d'autres questions ou besoin d'aide sur un autre sujet, n'hésitez pas à me le faire savoir.


In [55]:
response = agent.chat(
    "Comment"
)
print(str(response))

Added user message to memory: pouvez vous me fournir la suite de cette phrase:'Because the techniques used to obtain unauthorized access, disable or degrade services, or sabotage systems change frequently and are often unrecognizable unt'?
Je suis désolé, mais je ne dispose pas de la suite de la phrase que vous avez fournie. Si vous avez besoin d'aide pour compléter la phrase ou si vous avez d'autres questions, n'hésitez pas à me le faire savoir afin que je puisse vous aider davantage.


In [82]:
agent.chat_history[3]

ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Some of the biggest risk factors for Uber in 2020 included the adverse effects of the COVID-19 pandemic on the business, potential reclassification of Drivers, intense competition in the industries, significant losses and uncertain path to profitability, challenges in attracting and retaining platform users, operational and compliance challenges, negative impact on brand reputation, difficulties in managing growth, safety incidents affecting platform users, risks associated with substantial investments in new offerings and technologies, and the uncertainty surrounding the ultimate impact of the pandemic on business operations and financial results.', additional_kwargs={})

In [66]:
from llama_index.core.agent import ReActAgent
custom_agent = ReActAgent.from_tools(tools, llm=OpenAI(model="gpt-3.5-turbo"), verbose=True)

In [67]:
response = custom_agent.chat(
    "Quelle est la température moyenne en France ce jour là?"
)
print(str(response))

[1;3;38;5;200mThought: The current language of the user is: French. I need to use a tool to help me answer the question.
Action: sub_question_query_engine
Action Input: {'input': 'Quelle est la température moyenne en France ce jour là?'}
[0mGenerated 4 sub questions.
[1;3;38;2;237;90;200m[vector_index_2022] Q: What is the average temperature in France on that day?
[0m[1;3;38;2;90;149;237m[vector_index_2021] Q: What is the average temperature in France on that day?
[0m[1;3;38;2;11;159;203m[vector_index_2020] Q: What is the average temperature in France on that day?
[0m[1;3;38;2;155;135;227m[vector_index_2019] Q: What is the average temperature in France on that day?
[0m[1;3;38;2;155;135;227m[vector_index_2019] A: I'm unable to provide an answer to that query as it is not related to the context information provided.
[0m[1;3;38;2;237;90;200m[vector_index_2022] A: I'm unable to provide an answer to the query as there is no information in the context provided regarding the aver

In [60]:
response = custom_agent.chat(
    "What can you help me?"
)
print(str(response))

[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: sub_question_query_engine
Action Input: {'input': 'What types of assistance can you provide?'}
[0mGenerated 4 sub questions.
[1;3;38;2;237;90;200m[vector_index_2022] Q: What information is available in the 2022 SEC 10-K for Uber?
[0m[1;3;38;2;90;149;237m[vector_index_2021] Q: What information is available in the 2021 SEC 10-K for Uber?
[0m[1;3;38;2;11;159;203m[vector_index_2020] Q: What information is available in the 2020 SEC 10-K for Uber?
[0m[1;3;38;2;155;135;227m[vector_index_2019] Q: What information is available in the 2019 SEC 10-K for Uber?
[0m[1;3;38;2;237;90;200m[vector_index_2022] A: The 2022 SEC 10-K for Uber includes information about the company's pilot programs for Drivers to engage in dialogue, regular meetings with Driver associations, surveys for feedback on the app and support services, as well as a risk factor related to attrac

In [64]:
from llama_index.core.agent import AgentRunner

agent3 = AgentRunner.from_llm(tools, llm=OpenAI(model="gpt-3.5-turbo"), verbose=True)

In [65]:
response = agent3.chat(
    "What can you help me?"
)
print(str(response))

Added user message to memory: What can you help me?
I can assist you with a variety of tasks related to analyzing SEC 10-K documents for Uber. You can ask me questions about specific information in the 10-K reports, compare data across different years, or analyze trends and insights from the reports. Just let me know what specific information or analysis you are looking for, and I'll do my best to help you!


In [68]:
response = agent3.chat(
    "Quelle est la température moyenne en France ce jour là?"
)
print(str(response))

Added user message to memory: Quelle est la température moyenne en France ce jour là?
Je suis désolé, mais je ne suis pas en mesure de fournir des informations en temps réel telles que la température actuelle en France. Je suis spécialisé dans l'analyse des documents SEC 10-K pour Uber. Si vous avez des questions ou des requêtes liées à ces rapports, n'hésitez pas à me les poser. Je serai ravi de vous aider avec des informations sur Uber à partir de ces documents.


In [98]:
custom_agent.chat_history

[ChatMessage(role=<MessageRole.USER: 'user'>, content='Quelle est la température moyenne en France ce jour là?', additional_kwargs={}),
 ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content="Je ne peux pas répondre à la question car il n'y a pas d'informations spécifiques sur la température moyenne en France ce jour-là.", additional_kwargs={})]

In [87]:
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.core.memory import ChatMemoryBuffer

chat_store = SimpleChatStore()

chat_memory = ChatMemoryBuffer.from_defaults(
    token_limit=3000,
    chat_store=chat_store,
    chat_store_key="user1",
)


agent3 = AgentRunner.from_llm(tools, llm=OpenAI(model="gpt-3.5-turbo"), memory=chat_memory, verbose=True)


In [116]:
response = agent3.chat(
    "Salut?"
)
print(str(response))

Added user message to memory: Salut?


Retrying llama_index.llms.openai.base.OpenAI._chat in 0.2410541831543428 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}.
Retrying llama_index.llms.openai.base.OpenAI._chat in 1.3119118499648639 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}.


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [89]:
chat_store

SimpleChatStore(store={})

In [100]:
dir(agent.memory)

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompose_class',
 '_enfo

In [111]:
chat_test = agent.memory.chat_store.store["chat_history"][:2]

In [112]:
len(chat_test)

2

In [113]:
len(agent.memory.chat_store.store["chat_history"])

33

In [114]:
agent.memory.set(chat_test)

In [115]:
len(agent.memory.chat_store.store["chat_history"])

2

In [118]:
agent.memory

ChatMemoryBuffer(chat_store=SimpleChatStore(store={'chat_history': [ChatMessage(role=<MessageRole.USER: 'user'>, content='What were some of the biggest risk factors in 2020 for Uber?', additional_kwargs={}), ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content=None, additional_kwargs={'tool_calls': [ChatCompletionMessageToolCall(id='call_hdHHsuRhqWt8wpNUHXMKH20r', function=Function(arguments='{"input":"biggest risk factors"}', name='vector_index_2020'), type='function')]})]}), chat_store_key='chat_history', token_limit=12288, tokenizer_fn=functools.partial(<bound method Encoding.encode of <Encoding 'cl100k_base'>>, allowed_special='all'))