In [6]:
from datasets import load_dataset
import random

ds = load_dataset("corbyrosset/researchy_questions")

questions = random.sample(ds["train"]["question"], 10)

In [7]:
from ReAct.react_agent import ReAct_agent

In [8]:
react_system_prompt = """You are an AI Agent based on the ReAct framework, and your task is to answer questions through the "observation -> reasoning -> action" cycle.

## **Reasoning and Action Rules**
1. **Reasoning (Reason)**
    - You should think about why you are performing an action before each action.
    - You should make a brief summary of the current situation and decide what to do next.
    - Your reasoning should be based on existing information and clearly explain why this is the best next step.
    - Your reasoning should be embedded in the `reason` parameter and executed within the `search` or `final_answer` tools.  

2. **Action (Act)**
    - You **cannot simply respond with text**, but must **use Function Calling to perform actions**.
    - You have the following tools available:
        - **`search(reason: str, query: str) -> List[Dict]`**
            - Use the `search` tool when you need additional information.
            - The `reason` parameter should explain why this search is necessary.
            - The `query` parameter is the keyword you want to search for.
        - **`final_answer(reason: str) -> str`**
            - Use the `final_answer` tool when you have collected enough information to answer the question.
            - The `reason` parameter should explain why you believe the current information is sufficient to answer the question.

## **Observation and Response Rules**
- You will receive the last search result each time, and you need to update your reasoning based on this information.
- You should analyze this information and decide:
    - Whether further search is needed?
    - Whether there is enough information to answer the question?

## **Example**

### **Using `search` to obtain additional information**
'''
Based on the question, I need to search "Who discovered dark energy?" to obtain background knowledge.
'''
_(At this point, you should use the `search` tool to perform the search)_

**Correct Usage**
```json
{
    "tool_call": {
        "name": "search",
        "arguments": {
            "reason": "Based on the question, I need to search 'Who discovered dark energy?' to obtain background knowledge.",
            "query": "Who discovered dark energy?"
        }
    }
}
```

### Using final_answer to provide the final answer
'''
Based on all the information collected, I can now answer the question.
'''
_(At this point, you should use the final_answer tool to respond)_

**Correct Usage**
```json
{
  "tool_call": {
    "name": "final_answer",
    "arguments": {
      "reason": "Based on all the information collected, I can now answer the question."
    }
  }
}
```
"""

In [9]:
for question in questions:
    react_agent = ReAct_agent(llm="gpt-4o", system_prompt=react_system_prompt, max_turns=9)
    react_agent.run(question=question)

Error processing https://www.ganvc.org/research/EVAL-2011-Final.pdf: 'NoneType' object has no attribute 'get_text'
Error processing https://iopscience.iop.org/article/10.1088/1748-9326/abe88f: 'NoneType' object has no attribute 'get_text'
Error processing https://ijcrt.org/papers/IJCRT22A6128.pdf: 'NoneType' object has no attribute 'get_text'
Error processing https://www.therec.io/unlocking-the-power-of-curiosity-a-comprehensive-guide-to-cultivating-a-curious-mindset/: HTTPSConnectionPool(host='www.therec.io', port=443): Max retries exceeded with url: /unlocking-the-power-of-curiosity-a-comprehensive-guide-to-cultivating-a-curious-mindset/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1006)')))
Error processing https://www.therec.io/unlocking-the-power-of-curiosity-how-exploring-the-unknown-benefits-our-lives/: HTTPSConnectionPool(host='www.therec.io', port=443): Max retries exceeded with url

In [19]:
import json

with open('react_summary.json', 'r') as f:
    data = json.load(f)
    
prompt_tokens = 0
completion_tokens = 0

first_prompt_tokens = 0
first_completion_tokens = 0
second_completion_tokens = 0
second_prompt_tokens = 0
third_prompt_tokens = 0
third_completion_tokens = 0
for conversation in data:
    token_usage = conversation["token_usage"]
    for log in token_usage:
        print(log)
        prompt_tokens += log["prompt_tokens"]
        completion_tokens += log["completion_tokens"]
    print('---')
    
    for i, log in enumerate(token_usage):
        if i == 0:
            first_prompt_tokens += log["prompt_tokens"]
            first_completion_tokens += log["completion_tokens"]
        elif i == 1:
            second_prompt_tokens += log["prompt_tokens"]
            second_completion_tokens += log["completion_tokens"]
        elif i == 2:
            third_prompt_tokens += log["prompt_tokens"]
            third_completion_tokens += log["completion_tokens"]

print(f"average first_prompt_tokens: {first_prompt_tokens / len(data)}")
print(f"average first_completion_tokens: {first_completion_tokens / len(data)}")
print(f"average second_prompt_tokens: {second_prompt_tokens / len(data)}")
print(f"average second_completion_tokens: {second_completion_tokens / len(data)}")
print(f"average third_prompt_tokens: {third_prompt_tokens / len(data)}")
print(f"average third_completion_tokens: {third_completion_tokens / len(data)}")
print('---')

print(f"Total prompt tokens: {prompt_tokens}")
print(f"Total completion tokens: {completion_tokens}")

cost_prompt_tokens = prompt_tokens * 0.000125
cost_completion_tokens = completion_tokens * 0.0005
print(f"Cost of prompt tokens: ${cost_prompt_tokens}")
print(f"Cost of completion tokens: ${cost_completion_tokens}")
print(f"Total cost: ${cost_prompt_tokens + cost_completion_tokens}")

lcost_prompt_tokens = prompt_tokens * 50 * 0.59 / 1000000
lcost_completion_tokens = completion_tokens * 50 * 0.79 / 1000000
print(f"Cost of prompt tokens: ${lcost_prompt_tokens}")
print(f"Cost of completion tokens: ${lcost_completion_tokens}")
print(f"Total cost: ${lcost_prompt_tokens + lcost_completion_tokens}")

{'prompt_tokens': 674, 'completion_tokens': 44, 'total_tokens': 718}
{'prompt_tokens': 17568, 'completion_tokens': 240, 'total_tokens': 17808}
{'prompt_tokens': 109, 'completion_tokens': 115, 'total_tokens': 224}
---
{'prompt_tokens': 681, 'completion_tokens': 46, 'total_tokens': 727}
{'prompt_tokens': 12315, 'completion_tokens': 284, 'total_tokens': 12599}
{'prompt_tokens': 118, 'completion_tokens': 283, 'total_tokens': 401}
---
{'prompt_tokens': 675, 'completion_tokens': 43, 'total_tokens': 718}
{'prompt_tokens': 3986, 'completion_tokens': 256, 'total_tokens': 4242}
{'prompt_tokens': 109, 'completion_tokens': 86, 'total_tokens': 195}
---
{'prompt_tokens': 674, 'completion_tokens': 48, 'total_tokens': 722}
{'prompt_tokens': 11186, 'completion_tokens': 366, 'total_tokens': 11552}
{'prompt_tokens': 113, 'completion_tokens': 235, 'total_tokens': 348}
---
{'prompt_tokens': 673, 'completion_tokens': 55, 'total_tokens': 728}
{'prompt_tokens': 5522, 'completion_tokens': 178, 'total_tokens': 