In [1]:
import os
import sys
import random
from langchain_community.vectorstores import Chroma
from langchain_experimental.utilities import PythonREPL
from IPython.display import display, Markdown
import jsonlines
import chromadb
import yaml

current_dir = os.getcwd()
kit_dir = os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(kit_dir, ".."))

sys.path.append(kit_dir)
sys.path.append(repo_dir)

from enterprise_knowledge_retriever.src.document_retrieval import DocumentRetrieval
from utils.agents.static_RAG_with_coding import CodeRAG

examples = []

CONFIG_PATH = os.path.join(kit_dir, "config.yaml")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_config(filename: str) -> dict:
    try:
        with open(filename, "r") as file:
            return yaml.safe_load(file)
    except FileNotFoundError:
        raise FileNotFoundError(
            f"The YAML configuration file {filename} was not found."
        )
    except yaml.YAMLError as e:
        raise RuntimeError(f"Error parsing YAML file: {e}")

In [6]:
documentRetrieval = DocumentRetrieval()
*_, embedding_model_info, retrieval_info, _ = documentRetrieval.get_config_info()
embeddings = documentRetrieval.load_embedding_model()

2024-07-12 10:52:18,894 [INFO] - Load pretrained SentenceTransformer: intfloat/e5-large-v2


load INSTRUCTOR_Transformer


2024-07-12 10:52:20,921 [INFO] - Use pytorch device: cpu


max_seq_length  512


In [7]:
vectorstore = Chroma(
    persist_directory=kit_dir
    + "/data/uber_lyft.chromadb",  # Create for your own experiments.
    embedding_function=embeddings,
)

2024-07-12 10:52:20,942 [INFO] - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [8]:
# instantiate rag
rag = CodeRAG(
    configs=CONFIG_PATH,
    embeddings=embeddings,
    vectorstore=vectorstore,
    examples=examples,
)

# Initialize chains
rag.initialize()

# Build nodes
workflow = rag.create_rag_nodes()
print(workflow)

# Build graph
app = rag.build_rag_graph(workflow)

<langgraph.graph.state.StateGraph object at 0x2b72e4f10>


In [9]:
# rag.display_graph(app)  # Uncomment to display graph

In [10]:
def log_answers(question, output_file):
    try:
        response = rag.call_rag(app, question)
    except:
        response = {}
        response["answer"] = "Failed"

    display(Markdown("---Response---"))
    display(Markdown(response["answer"]))

    if not os.path.exists(kit_dir + "/outputs"):
        os.makedirs(kit_dir + "/outputs")
    output_dir = kit_dir + "/outputs"

    with jsonlines.open(output_dir + f"/{output_file}", "a") as writer:
        writer.write({"question": question, "response": response["answer"]})

In [11]:
questions = [
    "Could the trading price of lyft stock be volatile?",
    "What was the difference in revenue for Uber between 2020 and 2021?",
    "What was the change in revenue for Lyft for 2020 and 2021 as a percentage?",
    "Provide the business overviews for Uber and Lyft.",
    "What are the growth strategies for Lyft and Uber?",
    "How do the growth strategies differ between Lyft and Uber?",
]

In [12]:
for question in questions:
    log_answers(question, "lyft_stock_price_Llama3_70B_Instruct.jsonl")

---Initializing---
Could the trading price of lyft stock be volatile?
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
answer_generation
---ROUTE QUESTION TO LLM---
---DETERMINING ENTITIES---
entities:  ['lyft']
['lyft.pdf']
---ANSWERING QUESTION---
Could the trading price of lyft stock be volatile?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
As of December 31, 2021, there were six stockholders of record of our Class B common stock. All shares of Class B common stock are beneficially owned by either Logan Green or John Zimmer.

Dividend Policy

We have never paid cash dividends on our capital stock and we do not anticipate paying any cash dividends in the foreseeable future.

Stock Performance Graph

This performance graph shall not be deemed “filed” with the SEC for purposes of Section 18 of the Exchange Act or incorporated by reference into any filing of Lyft, Inc. under the Securities Act.

The graph below

---Response---

Yes, the trading price of Lyft stock could be volatile.

---Initializing---
What was the difference in revenue for Uber between 2020 and 2021?
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
subquery_generation
---ROUTE QUESTION TO SUBQUERY_GENERATION---
---SUBQUESTIONS---
• What was the revenue for Uber in 2020?
• What was the revenue for Uber in 2021?
---DETERMINING ENTITIES---
entities:  ['Uber']
['uber.pdf']
---ANSWERING SUBQUESTIONS---
• What was the revenue for Uber in 2020?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
<table><thead><th rowspan="4">(In millions, except percentages) Monthly Active Platform Consumers (“MAPCs”) 2) 3) Trips @) Gross Bookings )</th><th colspan="3" rowspan="3">2020 93 5,025</th><th>2021</th><th colspan="2">2020 to 2021 % Change</th><th rowspan="4">2020 to 2021 %</th><th rowspan="4">Change 53%</th></thead><thead><th></th><th colspan="3"></th><th rowspan="3">118 6,368 90,415</th><th>27</th><th>%</th><th></th><th></th></thead><thead><



Generated code: 
 revenue_2020 = 11139
revenue_2021 = 17455

difference = revenue_2021 - revenue_2020

print("The difference in revenue for Uber between 2020 and 2021 was ${} million.".format(difference))
---QCING CODE---
The difference in revenue for Uber between 2020 and 2021 was $6316 million.



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


{'runnable': 'exception'}
Code status:  exception
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  0
exception
--ROUTING TO CODE REFACTOR---
--REFACTORING CODE---
---CODE TO REFACTOR---
revenue_2020 = 11139
revenue_2021 = 17455

difference = revenue_2021 - revenue_2020

print("The difference in revenue for Uber between 2020 and 2021 was ${} million.".format(difference))
updated codegen counter:  1
---CODE--- 
 revenue_2020 = 11139
revenue_2021 = 17455

difference = revenue_2021 - revenue_2020

print("The difference in revenue for Uber between 2020 and 2021 was ${} million.".format(difference))
---REFACTORED CODE--- 

revenue_2020 = 11139
revenue_2021 = 17445

difference = revenue_2021 - revenue_2020

print("The difference in revenue for Uber between 2020 and 2021 was ${} million.".format(difference))
---TESTING CODE---
---QCING CODE---


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


The difference in revenue for Uber between 2020 and 2021 was $6306 million.

{'runnable': 'executed'}
Code status:  executed
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  1
executed
---EXECUTABLE---
---Final Generation---
The difference in revenue for Uber between 2020 and 2021 was $6306 million.



---Response---

The difference in revenue for Uber between 2020 and 2021 was $6306 million.

---Initializing---
What was the change in revenue for Lyft for 2020 and 2021 as a percentage?
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
subquery_generation
---ROUTE QUESTION TO SUBQUERY_GENERATION---
---SUBQUESTIONS---
• What was the revenue for Lyft in 2020?
• What was the revenue for Lyft in 2021?
---DETERMINING ENTITIES---
entities:  ['Lyft', '2020']
['lyft.pdf', '2020.pdf']
---ANSWERING SUBQUESTIONS---
• What was the revenue for Lyft in 2020?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
Cash flows from investing activities Purchases of marketable securities Purchase of non-marketable security Purchases of term deposits Proceeds from sales of marketable securities Proceeds from maturities of marketable securities Proceeds from maturities of term deposits Purchases of property and equipment and scooter fleet Cash paid for acquisitions, net of cash acquired Sales of property and equipment Proceeds from

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


{'runnable': 'exception'}
Code status:  exception
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  0
exception
--ROUTING TO CODE REFACTOR---
--REFACTORING CODE---
---CODE TO REFACTOR---
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
updated codegen counter:  1
---CODE--- 
 print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
---REFACTORED CODE--- 

```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
The code provided does not contain any errors, and the exception message is "None", which means there is no error. The code is a simple print statement and does not require any imports or corrections.
---TESTING CODE---
---QCING CODE---


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


SyntaxError('invalid syntax', ('<string>', 2, 1, '```\n', 2, 2))
{'runnable': 'exception'}
Code status:  exception
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  1
exception
--ROUTING TO CODE REFACTOR---
--REFACTORING CODE---
---CODE TO REFACTOR---
```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
The code provided does not contain any errors, and the exception message is "None", which means there is no error. The code is a simple print statement and does not require any imports or corrections.
updated codegen counter:  2
---CODE--- 
 ```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
The code provided does not contain any errors, and the exception message is "None", which means there is no error. The code is a simple print statement and does

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.

{'runnable': 'exception'}
Code status:  exception
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  2
exception
--ROUTING TO CODE REFACTOR---
--REFACTORING CODE---
---CODE TO REFACTOR---
```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
updated codegen counter:  3
---CODE--- 
 ```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
---REFACTORED CODE--- 

```
python
print("I apologize, but the exact revenue figure for Lyft in 2021 was not found. Please refer to the suggested sources to obtain the necessary information.")
```
The code provided does not have any syntax errors and does not require any impo

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


SyntaxError('invalid syntax', ('<string>', 2, 1, '```\n', 2, 2))
{'runnable': 'exception'}
Code status:  exception
---DETERMINING IF REFACTOR IS NEEDED---
Attempt:  3
exception
---Final Generation---
SyntaxError('invalid syntax', ('<string>', 2, 1, '```\n', 2, 2))


---Response---

I apologize, but I cannot provide the final answer due to a lack of grounding in documents or an ability to generate and run code that is required to solve the task. The provided generation appears to be a syntax error and does not contain any relevant information about Lyft's revenue for 2020 and 2021. To find the answer, I would suggest searching for Lyft's annual reports or quarterly earnings reports for 2020 and 2021, which should provide the necessary revenue data to calculate the percentage change.

---Initializing---
Provide the business overviews for Uber and Lyft.
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
subquery_generation
---ROUTE QUESTION TO SUBQUERY_GENERATION---
---SUBQUESTIONS---
• What is the business overview for Uber?
• What is the business overview for Lyft?
---DETERMINING ENTITIES---
entities:  Uber
['uber.pdf']
---ANSWERING SUBQUESTIONS---
• What is the business overview for Uber?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
which the primary purpose is to operate, commercialize or develop autonomous or self-driving vehicles, or technology related thereto (including Apparate International C.V., Apparate Canada, Inc., UATC, LLC and their respective subsidiaries), (d) entities for which the primary purpose is to operate, commercialize or develop class 6 or above trucking or freight brokerage services, or technology related thereto (including Uber Freight, LLC and its subsidiaries), (e

---Response---

Here are the business overviews for Uber and Lyft:

**Uber:** Uber is a technology platform that connects consumers with independent providers of ride services, restaurants, and stores, offering a range of services including ridesharing, meal delivery, and freight transportation. The company's proprietary technology applications and operational excellence enable it to facilitate these connections efficiently.

**Lyft:** Lyft is a multimodal transportation network that connects riders with drivers through its mobile application, providing a range of transportation options to improve people's lives.

---Initializing---
What are the growth strategies for Lyft and Uber?
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
subquery_generation
---ROUTE QUESTION TO SUBQUERY_GENERATION---
---SUBQUESTIONS---
• What are the growth strategies for Lyft?
• What are the growth strategies for Uber?
---DETERMINING ENTITIES---
entities:  Lyft
['lyft.pdf']
---ANSWERING SUBQUESTIONS---
• What are the growth strategies for Lyft?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
Demand for our offerings is highly sensitive to the price of rides, the rates for time and distance driven, incentives paid to drivers and the fees we charge drivers. Many factors, including operating costs, legal and regulatory requirements or constraints and our current and future competitors’ pricing and marketing strategies including increased incentives for drivers, could significantly affect our pricing strategies. Certain of our competitors offer, or m

---Response---

The growth strategies for Lyft and Uber include:

**Lyft:**
Unfortunately, I couldn't find specific growth strategies for Lyft. However, you can try searching for Lyft's company reports, investor presentations, news articles, research firm reports, and social media channels to find more information on their growth strategies.

**Uber:**

1. Growing supply and demand on the platform
2. Increasing existing platform users' activity
3. Expanding to new markets
4. Providing high-quality support to users
5. Expanding business and increasing market share
6. Competing with competitors
7. Developing new products and technologies
8. Acquiring or investing in complementary businesses
9. Penetrating suburban and rural areas
10. Reducing costs to compete with personal vehicle ownership and public transportation.

I apologize, but I cannot provide a comprehensive answer for Lyft's growth strategies due to a lack of information.

---Initializing---
How do the growth strategies differ between Lyft and Uber?
---Decision---
{'generate_or_example': 'answer_generation'}
---ROUTE ORIGINAL QUESTION---
---ROUTING QUESTION---
subquery_generation
---ROUTE QUESTION TO SUBQUERY_GENERATION---
---SUBQUESTIONS---
• What is the growth strategy of Lyft?
• What is the growth strategy of Uber?
---DETERMINING ENTITIES---
entities:  Lyft
['lyft.pdf']
---ANSWERING SUBQUESTIONS---
• What is the growth strategy of Lyft?
---RERANKING DOCUMENTS---
---NUM DOCUMENTS RETRIEVED---
3
Demand for our offerings is highly sensitive to the price of rides, the rates for time and distance driven, incentives paid to drivers and the fees we charge drivers. Many factors, including operating costs, legal and regulatory requirements or constraints and our current and future competitors’ pricing and marketing strategies including increased incentives for drivers, could significantly affect our pricing strategies. Certain of our competitors offer, or may 

---Response---

The growth strategies of Lyft and Uber differ in their focus and approach. Lyft's strategy is centered around optimizing its core ridesharing business by leveraging its technology platform, scale, and user network density to improve efficiency and develop new offerings. In contrast, Uber's strategy is more diversified, focusing on expanding into various businesses beyond ridesharing, such as autonomous vehicles, trucking, food delivery, and personal mobility devices, among others.