In [None]:
! pip install -q litellm tqdm PyMuPDF Pillow instructor openai python-dotenv weaviate-client pandas

In [27]:
import json
from openai import OpenAI
import os
import weaviate
from weaviate.classes.init import Auth
from litellm import completion
import pandas as pd

client = OpenAI()
O1_MODEL = 'o1-mini'
GPT_MODEL = 'gpt-4o-mini'

In [28]:
def get_weaviate_client():
    wcd_url = os.environ["WCD_URL"]
    wcd_api_key = os.environ["WCD_API_KEY"]
    openai_api_key = os.environ["OPENAI_API_KEY"]
    voyageai_api_key = os.environ["VOYAGEAI_API_KEY"]

    headers = {
        "X-VoyageAI-Api-Key": voyageai_api_key,
        "X-OpenAI-Api-Key": openai_api_key
    }

    client = weaviate.connect_to_weaviate_cloud(
        cluster_url=wcd_url,
        auth_credentials=Auth.api_key(wcd_api_key),
        headers=headers,
    )

    if client.is_ready():
        print("Weaviate client is ready")
        return client
    else:
        print("Weaviate client is not ready")
        return None

workspace_name = "Agent_RAG"
weaviate_client = get_weaviate_client()

Weaviate client is ready


            Please make sure to close the connection using `client.close()`.


In [29]:
o1_prompt = """
You are a financial analysis assistant. 
The first input you will receive will be a complex financial task that needs to be carefully reasoned through to solve. 
Your task is to review the challenge, and create a detailed plan to analyze financial statements, assess risks, and provide insights.

You will have access to an LLM agent that is responsible for executing the plan that you create and will return results.


The LLM agent has access to the following functions:
    - fetch_context(Question)
        - The function fetches context data from relevant financial statements based on the provided question.
        
When creating a plan for the LLM to execute, break your instructions into a logical, step-by-step order, using the specified format:
    - **Main actions are numbered** (e.g., 1, 2, 3).
    - **Sub-actions are lettered** under their relevant main actions (e.g., 1a, 1b).
        - **Sub-actions should start on new lines**
    - **Specify conditions using clear 'if...then...else' statements** (e.g., 'If the financial statement shows a profit, then...').
    - **For actions that require using one of the above functions defined**, write a step to call a function using backticks for the function name (e.g., `call the fetch_context function`).
        - Ensure that the proper input arguments are given to the model for instruction. There should not be any ambiguity in the inputs.
    - **The last step** in the instructions should always be calling the `instructions_complete` function. This is necessary so we know the LLM has completed all of the instructions you have given it.
    - **Detailed steps** The plan generated must be extremely detailed and thorough with explanations at every step.
Use markdown format when generating the plan with each step and sub-step.

Please find the scenario below.
"""

In [30]:
gpt4o_system_prompt = """
You are a helpful assistant responsible for executing the policy on handling financial analysis tasks. 
Your task is to follow the policy exactly as it is written and perform the necessary actions.

You must explain your decision-making process across various steps.

# Steps

1. **Read and Understand Policy**: Carefully read and fully understand the given policy on handling financial analysis tasks.
2. **Identify the exact step in the policy**: Determine which step in the policy you are at, and execute the instructions according to the policy.
3. **Decision Making**: Briefly explain your actions and why you are performing them.
4. **Action Execution**: Perform the actions required by calling any relevant functions and input parameters.
5. **Use of Own Knowledge**: If specific industry trends or data are required and not available in the policy or context, use your own knowledge to provide insights.

POLICY:
{policy}
"""

In [31]:
TOOLS = [
{
        "type": "function",
        "function": {
            "name": "fetch_context",
            "description": "Fetches context data from relevant financial statements based on the provided question.",
            "parameters": {
                "type": "object",
                "properties": {
                    "Question": {
                        "type": "string",
                        "description": "The question to fetch context data for."
                    }
                },
                "required": ["Question"],
                "additionalProperties": False,
                "strict": True,
            },
        },
    },    
    {
        "type": "function",
        "function": {
            "name": "instructions_complete",
            "description": "Function should be called when we have completed ALL of the instructions.",
        },
    }
]

In [32]:
def fetch_context(Question):
    questions = weaviate_client.collections.get(workspace_name)

    response = questions.query.hybrid(
        query=Question,
        limit=5,
        alpha=0.5
    )

    def weaviate_objects_to_text(objects):
        text = ""
        for obj in objects:
            text += f"<Content>\n{obj.properties['content']}\n</Content>\n\n"
        return text

    llm_input = weaviate_objects_to_text(response.objects)

    SYSTEM_PROMPT = """
    You are a specialized Financial Analysis AI focused on extracting concise, data-driven insights from financial documents. Provide short, precise answers with a brief explanation.

      **Remember**:  
      - Stick strictly to information from the documents.  
      - Keep responses focused on numerical data and short insights.  
      - Avoid speculation or overly detailed elaboration.  
      - Provide a brief but clear thinking process, then a concise final answer.
    """

    USER_PROMPT = f"""
      Context:
      <Context>
         {llm_input}
      </Context>

      Question: 
      <Question>
         {Question}
      </Question>

      Please provide a concise, reader-friendly answer.
      - Provide direct, concise answers suitable for FAQ format
      - Focus on accuracy and readability
      - Include only information found in the provided context
      - If information is incomplete or unclear, state this explicitly
    """

    response = completion(
        model="openrouter/google/gemini-flash-1.5",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_PROMPT}
        ]
    )
    print("Question: \n")
    print(Question)
    print("Answer: \n")
    print(response.choices[0].message.content.strip())
    return response.choices[0].message.content.strip()

function_mapping = {
    'fetch_context': fetch_context,
}

In [33]:
def append_message(message_list, message):
    message_list.append(message)
    # Optionally, print the message for immediate feedback
    message_type = message.get('type', '')
    if message_type == 'status':
        print(message['message'])
    elif message_type == 'plan':
        print("\nPlan:\n", message['content'])
    elif message_type == 'assistant':
        print("\nAssistant:\n", message['content'])
    elif message_type == 'function_call':
        print(f"\nFunction call: {message['function_name']} with arguments {message['arguments']}")
    elif message_type == 'function_response':
        print(f"\nFunction response for {message['function_name']}: {message['response']}")
    else:
        # Handle any other message types or default case
        print(message.get('content', ''))

In [34]:
def call_o1(scenario):
    prompt = f"""
    {o1_prompt}
        
    Scenario:
    {scenario}

    Please provide the next steps in your plan.
    """
    
    response = client.chat.completions.create(
        model=O1_MODEL,
        messages=[{'role': 'user', 'content': prompt}]
    )
    plan = response.choices[0].message.content
    return plan

In [35]:
def call_gpt4o(message_list, plan):
    gpt4o_policy_prompt = gpt4o_system_prompt.replace("{policy}", plan)
    messages = [
        {'role': 'system', 'content': gpt4o_policy_prompt},
    ]

    while True:
        response = client.chat.completions.create(
            model=GPT_MODEL,
            messages=messages,
            tools=TOOLS,
            parallel_tool_calls=False
        )
        
        assistant_message = response.choices[0].message.to_dict()
        print(assistant_message)
        messages.append(assistant_message)

        append_message(message_list, {'type': 'assistant', 'content': assistant_message.get('content', '')})

        if (response.choices[0].message.tool_calls and
            response.choices[0].message.tool_calls[0].function.name == 'instructions_complete'):
            break

        if not response.choices[0].message.tool_calls:
            continue

        for tool in response.choices[0].message.tool_calls:
            tool_id = tool.id
            function_name = tool.function.name
            input_arguments_str = tool.function.arguments

            append_message(message_list, {'type': 'tool_call', 'function_name': function_name, 'arguments': input_arguments_str})

            try:
                input_arguments = json.loads(input_arguments_str)
            except (ValueError, json.JSONDecodeError):
                continue

            if function_name in function_mapping:
                print("## Function Call", function_name)
                try:
                    function_response = function_mapping[function_name](**input_arguments)
                except Exception as e:
                    function_response = {'error': str(e)}
            else:
                function_response = {'error': f"Function '{function_name}' not implemented."}

            try:
                serialized_output = json.dumps(function_response)
            except (TypeError, ValueError):
                serialized_output = str(function_response)

            messages.append({
                "role": "tool",
                "tool_call_id": tool_id,
                "content": serialized_output
            })

            append_message(message_list, {'type': 'tool_response', 'function_name': function_name, 'response': serialized_output})

    return messages

In [36]:
def process_scenario(message_list, scenario):
    append_message(message_list, {'type': 'status', 'message': 'Generating plan...'})

    plan = call_o1(scenario)

    append_message(message_list, {'type': 'plan', 'content': plan})

    append_message(message_list, {'type': 'status', 'message': 'Executing plan...'})

    messages = call_gpt4o(message_list, plan)

    append_message(message_list, {'type': 'status', 'message': 'Processing complete.'})

    return messages

In [37]:
# scenario_text = ("If we exclude the impact of M&A, which segment has dragged down 3M's overall growth in 2022?")

# messages = process_scenario(scenario_text)

In [38]:
df_questions = pd.read_json("financebench_open_source.jsonl", lines=True)
df_questions.head(3)

filtered_df = df_questions[df_questions["doc_name"].str.contains("3M", case=False, na=False)]
len(filtered_df)

8

In [39]:
from tqdm import tqdm

answers = []

# Use tqdm to wrap the iterable for a progress bar
for question in tqdm(filtered_df['question'], desc="Processing Questions"):
    message_list = [] 
    answer = process_scenario(message_list, question)
    print(answer[-2])
    answers.append(answer)

Processing Questions:   0%|          | 0/8 [00:00<?, ?it/s]

Generating plan...

Plan:
 ```markdown
# Detailed Analysis Plan for Determining 3M's FY2018 Capital Expenditure

## 1. Initiate Data Retrieval Process
   a. **Action:** Utilize the `fetch_context` function to obtain the cash flow statement for 3M for the fiscal year 2018.
      - **Reasoning:** The capital expenditure amount is typically detailed within the cash flow statement under investing activities.
      - **Implementation:** Call the function as follows:
        ```
        `fetch_context("Provide the cash flow statement for 3M for FY2018.")`
        ```
   b. **Condition:** 
      - **If** the cash flow statement is successfully retrieved,
        - **Then** proceed to the next step.
      - **Else**, terminate the process and log an error indicating the inability to retrieve the cash flow statement.

## 2. Extract Capital Expenditure Data
   a. **Action:** Analyze the retrieved cash flow statement to locate the section detailing investing activities.
      - **Reasoning:** Cap

Processing Questions:  12%|█▎        | 1/8 [00:38<04:28, 38.36s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_oUg0I8tvgXTbdyhw5uOUVZrw', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': '### Step 1: Review of Data Retrieval\nThe retrieval of the cash flow statement for 3M FY2018 was unsuccessful in providing a complete document. The response indicates that while there are fragments of the statement on page 60, the entire cash flow statement is missing, making it impossible to extract the necessary capital expenditure data accurately.\n\n### Step 2: Conditional Termination\nAccording to the policy, since the cash flow statement could not be fully retrieved, I need to terminate the process and log an error.\n\n### Decision Making\nSince the primary data source (the cash flow statement) is incomplete, I cannot proceed with the extraction of capital expenditure data. Therefore, I will log this issue and conclude the analysis p

Processing Questions:  25%|██▌       | 2/8 [01:23<04:13, 42.20s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_ngErhYKgERQsQSRchuCAZrJ8', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': '1. **Read and Understand Policy**: I have successfully fetched the total Property, Plant, and Equipment (PP&E) for 3M for FY2018, which is $8,738 million.\n\n2. **Identify the exact step in the policy**: Now, I will proceed to step 1b, which involves fetching the accumulated depreciation for PP&E as reported in the balance sheet for 3M in FY2018.\n\n3. **Decision Making**: Since the accumulated depreciation information is already provided in the response I received, I will extract this data and move forward to the calculation of the net PP&E.\n\n4. **Action Execution**: I now have the following data:\n   - Total PP&E: $24,873 million\n   - Accumulated Depreciation: $16,135 million\n\n   To proceed, I will verify whether both the Total PP&E



{'content': "### Decision-Making Explanation\n\n1. **Data Retrieval**: I have successfully retrieved the income statement and cash flow statement for 3M for FY2022. However, I faced challenges with the balance sheet and industry financial ratios, as the data retrieved does not constitute complete documents or industry benchmarks.\n  \n2. **Analyzing Available Data**:\n   - **Income Statement**: Key figures include net sales of $34,229 million and net income of $5,777 million.\n   - **Cash Flow Statement**: The net cash provided by operating activities was $7,454 million, and capital expenditure (CapEx) reflected purchases of property, plant, and equipment at -$1,603 million.\n\n### Next Steps\n\nGiven that I have the required CapEx from the Cash Flow Statement but not the Total Assets from the balance sheet — which is essential for calculating the capital intensity ratio — I will need to identify alternative reliable sources for the missing data about Total Assets and Fixed Assets.\n\n

Processing Questions:  38%|███▊      | 3/8 [02:39<04:49, 57.93s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_fnMZFWpcFXLcBKF2CZYan9iX', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': "### Decision-Making Explanation\n\n1. **Data Retrieval**: I have successfully retrieved the income statement and cash flow statement for 3M for FY2022. However, I faced challenges with the balance sheet and industry financial ratios, as the data retrieved does not constitute complete documents or industry benchmarks.\n  \n2. **Analyzing Available Data**:\n   - **Income Statement**: Key figures include net sales of $34,229 million and net income of $5,777 million.\n   - **Cash Flow Statement**: The net cash provided by operating activities was $7,454 million, and capital expenditure (CapEx) reflected purchases of property, plant, and equipment at -$1,603 million.\n\n### Next Steps\n\nGiven that I have the required CapEx from the Cash Flow S

Processing Questions:  50%|█████     | 4/8 [05:20<06:33, 98.29s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_zoUYXFzY22hOU9uc0mSpk2DQ', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': "1. **Read and Understand Policy**: The relevant metrics for net profit are acquired, but the details for EBITDA, ROA, and ROE for 3M in FY2022 remain unavailable. \n\n2. **Identify the exact step in the policy**: I am now at Step 5, where I am assessing the relevance of the operating margin as a performance metric for 3M.\n\n3. **Decision Making**: The outcome of the previous analyses indicates that 3M's net profit for FY2022 is $5,777 million. However, without EBITDA, ROA, and ROE, I do not have a comprehensive range of metrics for a solid comparison.\n\n4. **Action Execution**:\n   - **Insights from Metrics**:\n     - Operating Margin (FY2022): 19.1%\n     - Net Profit Margin = \\(\\left( \\frac{5,777}{34,229} \\right) \\times 100 \\appr

Processing Questions:  62%|██████▎   | 5/8 [06:21<04:15, 85.16s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_JEDZ5JnMWpgLLIEX8gEsZq9P', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': "Now that I have retrieved all the necessary financial data, I can proceed to the next part of the analysis according to the policy.\n\n1. **Consolidated Income Statement Summary for 3M in 2022**:\n   - Net Sales: $34,229 million\n   - Total Operating Expenses: $27,690 million\n   - Operating Income: $6,539 million\n   - Income Before Income Taxes: $6,392 million\n   - Net Income Attributable to 3M: $5,777 million\n   - Basic Earnings per Share: $10.21\n   - Diluted Earnings per Share: $10.18\n\n2. **Segment-Wise Revenue for 3M in 2022**:\n   - Safety and Industrial: $11,981 million\n   - Transportation and Electronics: $9,262 million\n   - Health Care: $8,597 million\n   - Consumer: $5,513 million\n   - Corporate and Unallocated: $2 millio

Processing Questions:  75%|███████▌  | 6/8 [07:49<02:52, 86.10s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_VJIrYq3PL0nOm8eQz5Xx454j', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': "## Step 3: Decision Making \nI have attempted to retrieve information on the components of the Quick Ratio and its relevance as a liquidity measure for 3M but was unable to find this information in the source documents.\n\nGiven that we have no specific data on the Quick Ratio itself or its components, I will now proceed to a general evaluation of the relevance of the Quick Ratio. \n\n## Step 3a: Assess the Relevance of the Quick Ratio as a Liquidity Measure\n### Quick Ratio Assessment:\n- As mentioned earlier, the Quick Ratio is calculated using the formula: \n  \\[\n  \\text{Quick Ratio} = \\frac{\\text{Current Assets} - \\text{Inventories} - \\text{Prepaid Expenses}}{\\text{Current Liabilities}}\n  \\]\n- The Quick Ratio is considered a

Processing Questions:  88%|████████▊ | 7/8 [08:54<01:19, 79.24s/it]

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_UyE7AB6jnGK09SYlU4O38Zns', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'role': 'tool', 'tool_call_id': 'call_gipZX73FgFTEWepZR6vRybcU', 'content': '"Thinking process: The provided text mentions 1.500% Notes due 2031 in several places,  but lacks specific financial details for Q2 2023.  The 2022 10-K shows  \\u20ac500 million were initially issued with a carrying value of \\u20ac563 million as of December 31, 2022.  The Q2 2023 10-Q lists it as one of the registered securities traded on the New York Stock Exchange under the symbol MMM31, confirming its existence. However, no Q2 2023 financial data is available.\\n\\nFinal Answer:  The 1.500% Notes due 2031 had an initial issuance of \\u20ac500 million;  a carrying value of \\u20ac563 million as of December 31, 2022 is noted. No Q2 2023  financial information, such as carr

Processing Questions: 100%|██████████| 8/8 [11:55<00:00, 89.45s/it] 

{'content': None, 'refusal': None, 'role': 'assistant', 'tool_calls': [{'id': 'call_AjiWZY9fwlA2guhJt8jUtZhT', 'function': {'arguments': '{}', 'name': 'instructions_complete'}, 'type': 'function'}]}

Assistant:
 None
Processing complete.
{'content': "1. **Read and Understand Response**: The information retrieved only includes partial financial data for 3M, specifically for the years 2019 and 2020, along with some data for the first half of 2023. The available information is as follows:\n\n   **2020:**\n   - Net Debt: $13,727 million\n   - Free Cash Flow: $6,612 million\n   - Debt-to-Equity Ratio: 1.06\n\n   **2019:**\n   - Net Debt: $17,819 million\n   - Free Cash Flow: $5,371 million\n   - Debt-to-Equity Ratio: 1.75\n\n   **First Half of 2023:**\n   - Net Debt: $11,650 million\n   - Free Cash Flow: $1,932 million\n\n   There is no comprehensive data for 2018 or prior, leaving gaps in the analysis for those years.\n\n2. **Identify the exact step in the policy**: I am now at Step 6, whe




In [40]:
i = 0
# Print each row's information
for idx, row in filtered_df.iterrows():
    print(f"\nQuestion {i+1}:")
    print("Q:", row['question'])
    print("Reference Answer:", row['answer'])
    print("Generated Answer:", answers[i])
    i = i + 1
    print("-" * 80)  # Print a separator line


Question 1:
Q: What is the FY2018 capital expenditure amount (in USD millions) for 3M? Give a response to the question by relying on the details shown in the cash flow statement.
Reference Answer: $1577.00
Generated Answer: [{'role': 'system', 'content': '\nYou are a helpful assistant responsible for executing the policy on handling financial analysis tasks. \nYour task is to follow the policy exactly as it is written and perform the necessary actions.\n\nYou must explain your decision-making process across various steps.\n\n# Steps\n\n1. **Read and Understand Policy**: Carefully read and fully understand the given policy on handling financial analysis tasks.\n2. **Identify the exact step in the policy**: Determine which step in the policy you are at, and execute the instructions according to the policy.\n3. **Decision Making**: Briefly explain your actions and why you are performing them.\n4. **Action Execution**: Perform the actions required by calling any relevant functions and inp