In [1]:
# ! pip install -q litellm tqdm PyMuPDF Pillow instructor openai python-dotenv weaviate-client pandas anthropic

In [54]:
import json
from openai import OpenAI
import os
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.query import Filter
from litellm import completion
import re
from anthropic import Anthropic
import pandas as pd

anthropic_client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])

client = OpenAI()
O1_MODEL = 'o1-mini'
GPT_MODEL = 'gpt-4o-mini'

In [55]:
def search_web(query):
    client = OpenAI(api_key=os.environ["PERPLEXITYAI_API_KEY"], base_url="https://api.perplexity.ai")
    
    messages = [
        {
            "role": "system",
            "content": (
                "You are an artificial intelligence assistant and you need to "
                "answer like a Data collection engine with as much information as possible."
            ),
        },
        {   
            "role": "user",
            "content": query,
        },
    ]

    response = client.chat.completions.create(
        model="sonar",
        messages=messages,
    )
    
    content = response.choices[0].message.content
    citations = response.citations

    def replace_citation_with_link(match):
        citation_num_str = match.group(0)[1:-1]
        citation_idx = int(citation_num_str) - 1
        
        if citation_idx < 0 or citation_idx >= len(citations):
            return match.group(0)
        return f"({citations[citation_idx]})"
    
    content_with_links = re.sub(r'\[\d+\]', replace_citation_with_link, content)
    return content_with_links


def calculate(num1: float, num2: float, operator: str) -> float:
        operators = {
            '+': lambda x, y: x + y,
            '-': lambda x, y: x - y,
            '*': lambda x, y: x * y,
            '/': lambda x, y: x / y if y != 0 else raise_(ZeroDivisionError("Cannot divide by zero."))
        }
        if operator not in operators:
            raise ValueError("Invalid operator. Expected one of '+', '-', '*', '/'.")
    
        return str(operators[operator](num1, num2))

def raise_(ex):
    """Helper function to raise exceptions in lambda functions."""
    raise ex

In [None]:
def get_weaviate_client():
    wcd_url = os.environ["WCD_URL"]
    wcd_api_key = os.environ["WCD_API_KEY"]
    openai_api_key = os.environ["OPENAI_API_KEY"]
    voyageai_api_key = os.environ["VOYAGEAI_API_KEY"]

    headers = {
        "X-VoyageAI-Api-Key": voyageai_api_key,
        "X-OpenAI-Api-Key": openai_api_key
    }

    client = weaviate.connect_to_weaviate_cloud(
        cluster_url=wcd_url,
        auth_credentials=Auth.api_key(wcd_api_key),
        headers=headers,
    )

    if client.is_ready():
        print("Weaviate client is ready")
        return client
    else:
        print("Weaviate client is not ready")
        return None

workspace_name = "Test2"
weaviate_client = get_weaviate_client()

In [57]:
with open("file_data.json", 'r') as json_file:
    existing_data = json.load(json_file)

formatted_content = ""
for item in existing_data:
    formatted_content += f"**{item['file_name']}**: {item['description']}\n"

In [58]:
o1_prompt = f"""
You are a financial analysis assistant. The first input you will receive will be a complex financial task that needs to be carefully reasoned through to solve. 
Your task is to review the challenge, and ccreate a focused and concise plan to analyze financial statements, assess risks, and provide insights.

You will have access to an LLM agent that is responsible for executing the plan that you create and will return results.

The LLM agent has access to the following functions:
    - fetch_context(Question, file_name)
        - The function fetches context data from relevant financial statements based on the provided question, which should include the file name.
    - search_web(query)
        - This function has access to the latest information from the web and can be used to fetch industry benchmarks and various other data that cannot be found in the statement documents.

**Available Files:**
    {formatted_content}

When creating a plan for the LLM to execute, break your instructions into a logical, step-by-step order, using the specified format:
    - **Main actions are numbered** (e.g., 1, 2, 3).
    - **Sub-actions are lettered** under their relevant main actions (e.g., 1a, 1b).
        - **Sub-actions should start on new lines**
    - **Specify conditions using clear 'if...then...else' statements** (e.g., 'If the financial statement shows a profit, then...').
    - **For actions that require using one of the above functions defined**, write a step to call a function using backticks for the function name (e.g., `call the fetch_context function`).
        - Ensure that the proper input arguments are given to the model for instruction. There should not be any ambiguity in the inputs.
    - **The last step** in the instructions should always be calling the `instructions_complete` function. This is necessary so we know the LLM has completed all of the instructions you have given it.
    - **Detailed steps** The plan generated must be extremely detailed and thorough with explanations at every step.
    - **Response Depth Analysis**
        - Provide only the requested information when the user asks for simple identification or specific numbers, avoiding any additional analysis or interpretation. 

    <example>
    <scenario>
    What was the percentage contribution of the healthcare segment to 3M's total revenue in 2022?
    </scenario>
    <response>
        # Financial Analysis Plan to Calculate Healthcare Segment's Revenue Contribution in 2022
        ## 1. Gather Relevant Financial Data  
        a. `fetch_context(Question="Provide total revenue for 3M in 2022.", file_name="3M_2022_10K.pdf")`  
        b. `fetch_context(Question="Provide revenue for the healthcare segment in 2022.", file_name="3M_2022_10K.pdf")`  
            
        ## 2. Calculate Percentage Contribution  
        a. Divide the healthcare segment's revenue by the total revenue for 2022:   
        b. Multiply the result by 100 to express the contribution as a percentage:  

        ## 3. Finalize and Complete Instructions  
        - `instructions_complete()`  
    </response>
    </example>
Use markdown format when generating the plan with each step and sub-step.
Please find the scenario below.
"""

In [59]:
claude_system_prompt = """
You are a helpful assistant responsible for executing the policy on handling financial analysis tasks. 
Your task is to follow the policy exactly as it is written and perform the necessary actions.

You must explain your decision-making process across various steps.

# Steps

1. **Read and Understand Policy**: Carefully read and fully understand the given policy on handling financial analysis tasks.
2. **Identify the exact step in the policy**: Determine which step in the policy you are at, and execute the instructions according to the policy.
3. **Decision Making**: Briefly explain your actions and why you are performing them.
4. **Action Execution**: Perform the actions required by calling any relevant functions and input parameters.
5. **Use of External Knowledge**: If specific industry trends or benchmark data are required and not available in the policy or context, use search_web function to fetch information.

POLICY:
{policy}
"""

In [60]:
TOOLS = [
    {
        "name": "fetch_context",
        "description": "Fetches context data from relevant financial statements based on the provided question.",
        "input_schema": {
            "type": "object",
            "properties": {
                "Question": {
                    "type": "string",
                    "description": "The question to fetch context data for."
                },
                "file_name": {
                    "type": "string",
                    "description": "Name of the file to fetch the context from."
                }
            },
            "required": ["Question", "file_name"]
        }
    },   
    {
        "name": "search_web",
        "description": "Function performs a web search and returns relevant information based on the provided query",
        "input_schema": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query to be processed"
                }
            },
            "required": ["query"]
        }
    },
    {
        "name": "calculate",
        "description": "Perform basic arithmetic calculations between two numbers",
        "input_schema": {
            "type": "object",
            "properties": {
                "num1": {
                    "type": "number",
                    "description": "The first number in the calculation"
                },
                "num2": {
                    "type": "number",
                    "description": "The second number in the calculation"
                },
                "operator": {
                    "type": "string",
                    "description": "The arithmetic operator to use",
                    "enum": ["+", "-", "*", "/"]
                }
            },
            "required": ["num1", "num2", "operator"]
        }
    },
    {
        "name": "instructions_complete",
        "description": "Function should be called when we have completed ALL of the instructions.",
        "input_schema": {
            "type": "object",
            "properties": {
                "final_report": {
                    "type": "string",
                    "description": "Final Report based on the analysis."
                }
            },
            "required": ["final_report"]
        }
    }
]

In [61]:
def fetch_context(Question, file_name):
    try:
        questions = weaviate_client.collections.get(workspace_name)

        response = questions.query.hybrid(
            query=Question,
            limit=15,
            alpha=0.5,
            filters=Filter.by_property("source").equal(file_name)
        )

        def weaviate_objects_to_text(objects):
            text = ""
            for obj in objects:
                text += f"<Content>\n{obj.properties['content']}\n</Content>\n\n"
            return text
        
        llm_input = weaviate_objects_to_text(response.objects)

        SYSTEM_PROMPT = """
        You are a specialized Financial Analysis AI focused on extracting concise, data-driven insights from financial documents. 
        Provide short, precise answers with a brief explanation.

        **Remember**:  
        - Stick strictly to information from the documents.  
        - Keep responses focused on numerical data and short insights.  
        - Avoid speculation or overly detailed elaboration.  
        - Provide a brief but clear thinking process, then a concise final answer.
        """

        USER_PROMPT = f"""
        Context:
        <Context>
            {llm_input}
        </Context>

        Question: 
        <Question>
            {Question}
        </Question>

        Please provide a concise, reader-friendly answer.
        - Provide direct, concise answers suitable for FAQ format
        - Focus on accuracy and readability
        - Include only information found in the provided context
        - If information is incomplete or unclear, state this explicitly
        """

        response = completion(
            model="openrouter/google/gemini-flash-1.5",
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": USER_PROMPT}
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return f"An error occurred while processing: {str(e)}"

In [62]:
function_mapping = {
    'fetch_context': fetch_context,
    'search_web': search_web,
    'calculate': calculate,
}

In [63]:
def append_message(message_list, message):
    message_list.append(message)
    message_type = message.get('type', '')
    if message_type == 'status':
        print(message['message'])
    elif message_type == 'plan':
        print("\nPlan:\n", message['content'])
    elif message_type == 'assistant':
        print("\nAssistant:\n", message['content'])
    elif message_type == 'function_call':
        print(f"\nFunction call: {message['function_name']} with arguments {message['arguments']}")
    elif message_type == 'function_response':
        print(f"\nFunction response for {message['function_name']}: {message['response']}")
    else:
        print(message.get('content', ''))

In [64]:
def call_o1(scenario):
    prompt = f"""
    {o1_prompt}
        
    Scenario:
    {scenario}

    Please provide the next steps in your plan.
    """
    
    response = client.chat.completions.create(
        model=O1_MODEL,
        messages=[{'role': 'user', 'content': prompt}]
    )
    plan = response.choices[0].message.content
    return plan

In [65]:
def call_anthropic(message_list, plan):
    messages = [
        {'role': 'user', 'content': plan}
    ]
    
    response = anthropic_client.messages.create(
        model="claude-3-5-haiku-20241022",
        max_tokens=1024,
        system=claude_system_prompt,
        tools=TOOLS,
        messages=messages
    )
    messages.append({"role": "assistant", "content": response.content})
    final_answer = ""
 
    while response.stop_reason == "tool_use":
        if response.content and len(response.content) > 0:
            text_block = response.content[0]
            if hasattr(text_block, 'text') and text_block.text:
                print("\nModel Response:", text_block.text)
            print("-" * 80)

        tool = response.content[-1]
        print("\nCalling Tool:", tool.name)
        print("-" * 80)

        if (tool.name and tool.name == 'instructions_complete'):
            final_answer = tool.input
            print("\n", final_answer)
            print("-" * 80)
            break

        if tool.name in function_mapping:
            input_arguments_str = tool.input
            print("\nInput Arguments:", input_arguments_str)
            print("-" * 80)
            res = function_mapping[tool.name](**input_arguments_str)
            print("\nTool Response:", res)
            print("-" * 80)
        else:
            raise ValueError(f"Unknown tool: {tool.name}")
            
        messages.append({"role": "user", "content": [{
            "type": "tool_result",
            "tool_use_id": tool.id,
            "content": res
        }]})

        response = anthropic_client.messages.create(
            model="claude-3-5-haiku-20241022",
            max_tokens=1024,
            system=claude_system_prompt,
            tools=TOOLS,
            messages=messages
        )
        messages.append({"role": "assistant", "content": response.content})

    return messages, final_answer
    

In [66]:
def process_scenario(message_list, scenario):
    append_message(message_list, {'type': 'status', 'message': 'Generating plan...'})
    plan = call_o1(scenario)
    print("-" * 80)
    append_message(message_list, {'type': 'plan', 'content': plan})
    print("-" * 80)
    append_message(message_list, {'type': 'status', 'message': 'Executing plan...'})
    print("-" * 80)
    messages, answer = call_anthropic(message_list, plan)
    append_message(message_list, {'type': 'status', 'message': 'Processing complete.'})
    return messages, answer

In [67]:
# scenario_text = ("What is the FY2019 fixed asset turnover ratio for Activision Blizzard? Fixed asset turnover ratio is defined as: FY2019 revenue / (average PP&E between FY2018 and FY2019). Round your answer to two decimal places. Base your judgments on the information provided primarily in the statement of income and the statement of financial position.")

# message_list = []
# messages = process_scenario(message_list, scenario_text)

In [None]:
df_questions = pd.read_json("financebench_open_source.jsonl", lines=True)
filtered_df = df_questions[df_questions["doc_name"].str.contains("AES|ADOBE|ACTIVISIONBLIZZARD|AMAZON|AMCOR", case=False, na=False)]
len(filtered_df)

In [69]:
i = 0
answers = []

In [None]:
for idx, row in filtered_df.iterrows():
    print(f"\nQuestion {i+1}:")
    print("Q:", row['question'])
    scenario_text = (row['question'])
    message_list = []
    messages, answer = process_scenario(message_list, scenario_text)
    answers.append(answer)
    i += 1

In [None]:
# i = 8
# for idx, row in filtered_df.iloc[8:].iterrows(): 
#     print(f"\nQuestion {i+1}:")
#     print("Q:", row['question'])
#     scenario_text = (row['question'])
#     message_list = []
#     messages, answer = process_scenario(message_list, scenario_text)
#     answers.append(answer)
#     i += 1

In [None]:
i = 0

for idx, row in filtered_df.iterrows():
    print(f"\nQuestion {i+1}:")
    print("Q:", row['question'])
    print("Reference Answer:", row['answer'])
    # print(row['justification'], row['evidence'])

    print("Generated Answer:", answers[i])
    i = i + 1
    print("-" * 80)

In [None]:
results = []  

for idx, row in filtered_df.iterrows():
    print(f"\nQuestion {i+1}:")
    print("Q:", row['question'])
    print("Reference Answer:", row['answer'])
    print("Generated Answer:", answers[i])

    if isinstance(answers[i], dict):
        final_report = answers[i].get('final_report', str(answers[i]))
    else:
        try:
            generated_answer = json.loads(answers[i])
            final_report = generated_answer.get('final_report', answers[i])
        except (json.JSONDecodeError, TypeError):
            final_report = answers[i] 

    result_dict = {
        "question_number": i + 1,
        "question": row['question'],
        "reference_answer": row['answer'],
        "generated_answer": final_report
    }
    results.append(result_dict)
    
    i = i + 1
    print("-" * 80)

with open('evaluation_results.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4, ensure_ascii=False)