In [1]:
# Step 1: Import necessary libraries
# --------------------------------------
# Import required libraries for document retrieval, reranking, and logging setup.
from sentence_transformers import CrossEncoder
import logging
import datetime
# link a path to ../financerag
import os

from financerag.rerank import CrossEncoderReranker
from financerag.retrieval import DenseRetrieval, SentenceTransformerEncoder
from financerag.tasks import FinDER, FinQABench, FinanceBench, TATQA, FinQA, ConvFinQA, MultiHiertt

import os  # Importing os to handle directory and file paths

# Setup basic logging configuration to show info level messages.
logging.basicConfig(level=logging.INFO)

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Step 3: Initialize DenseRetriever model
# -------------------------------------
# Initialize the retrieval model using SentenceTransformers. This model will be responsible
# for encoding both the queries and documents into embeddings.
#
# You can replace 'intfloat/e5-large-v2' with any other model supported by SentenceTransformers.
# For example: 'intfloat/e5-large-v2', 'Linq-AI-Research/Linq-Embed-Mistral', etc.
encoder_model = SentenceTransformerEncoder(
    #model_name_or_path='intfloat/e5-large-v2',
    model_name_or_path='/home/ec2-user/FinanceRAG/notebook/fine_tuned_sentence_transformer_e5-large-v2_valanced/',
    query_prompt='query: ',
    doc_prompt='passage: ',
)
"""
import torch
torch.cuda.empty_cache()
encoder_model = SentenceTransformerEncoder(
    model_name_or_path='Linq-AI-Research/Linq-Embed-Mistral',
    #model_name_or_path='ProsusAI/finbert',
    query_prompt='query: ',
    #query_prompt = "financial question about the company's financial status: ",
    doc_prompt='passage: ',
    #doc_prompt = 'inquiry about financial metrics and performance: '
)
"""


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: /home/ec2-user/FinanceRAG/notebook/fine_tuned_sentence_transformer_e5-large-v2/


'\nimport torch\ntorch.cuda.empty_cache()\nencoder_model = SentenceTransformerEncoder(\n    model_name_or_path=\'Linq-AI-Research/Linq-Embed-Mistral\',\n    #model_name_or_path=\'ProsusAI/finbert\',\n    query_prompt=\'query: \',\n    #query_prompt = "financial question about the company\'s financial status: ",\n    doc_prompt=\'passage: \',\n    #doc_prompt = \'inquiry about financial metrics and performance: \'\n)\n'

In [3]:
# Step 5: Initialize CrossEncoder Reranker
# --------------------------------------
# The CrossEncoder model will be used to rerank the retrieved documents based on relevance.
#
# You can replace 'cross-encoder/ms-marco-MiniLM-L-12-v2' with any other model supported by CrossEncoder.
# For example: 'cross-encoder/ms-marco-TinyBERT-L-2', 'cross-encoder/stsb-roberta-large', etc.
reranker = CrossEncoderReranker(
    model=CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
    #model=CrossEncoder('cross-encoder/stsb-roberta-large')
)


INFO:sentence_transformers.cross_encoder.CrossEncoder:Use pytorch device: cuda


In [None]:
yyyy_mm_dd = datetime.datetime.now().strftime("%Y-%m-%d-%H")
output_dir = './results/'+yyyy_mm_dd
# make folder if not exist
os.makedirs(output_dir, exist_ok=True)

# Step 2: Initialize Multiple Tasks and Perform Steps 4-7
# -------------------------------------------------------
tasks = [FinDER, FinQABench, FinanceBench, TATQA, FinQA, ConvFinQA, MultiHiertt]

for task in tasks:
    task_name = task.__name__  # Get the name of the task
    print(f"Running task: {task_name}")
    
    # Step 2: Initialize the task
    finder_task = task()

    # Step 4: Perform retrieval
    retrieval_model = DenseRetrieval(
        model=encoder_model  # Ensure that encoder_model is defined earlier in the notebook
    )

    retrieval_result = finder_task.retrieve(
        retriever=retrieval_model
    )

    print(f"Retrieved results for {len(retrieval_result)} queries for {task_name}. Here's an example of the top 5 documents for the first query:")
    for q_id, result in retrieval_result.items():
        print(f"\nQuery ID: {q_id}")
        sorted_results = sorted(result.items(), key=lambda x: x[1], reverse=True)
        for i, (doc_id, score) in enumerate(sorted_results[:5]):
            print(f"  Document {i + 1}: Document ID = {doc_id}, Score = {score}")
        break  # Only show the first query

    # Step 6: Perform reranking
    # -------------------------
    # Rerank the top 100 retrieved documents using the CrossEncoder model.
    reranking_result = finder_task.rerank(
        reranker=reranker,
        results=retrieval_result,
        top_k=100,  # Rerank the top 100 documents
        batch_size=64
    )

    # Print a portion of the reranking results to verify the output.
    print(f"Reranking results for {len(reranking_result)} queries. Here's an example of the top 5 documents for the first query:")

    for q_id, result in reranking_result.items():
        print(f"\nQuery ID: {q_id}")
        # Sort the result to print the top 5 document ID and its score
        sorted_results = sorted(result.items(), key=lambda x: x[1], reverse=True)

        for i, (doc_id, score) in enumerate(sorted_results[:5]):
            print(f"  Document {i + 1}: Document ID = {doc_id}, Score = {score}")

        break  # Only show the first query


    # Step 7: Save results with the task-specific name
    # ------------------------------------------------ 
    task_output_dir = os.path.join(output_dir, task_name)

    # Ensure the directory exists
    os.makedirs(task_output_dir, exist_ok=True)

    # Save the results with the task-specific filename
    finder_task.save_results(output_dir=task_output_dir)

    # Confirm the results have been saved with the correct task name
    print(f"Results have been saved to {task_output_dir}/results.csv")


INFO:financerag.common.loader:Loading Corpus...


Running task: FinDER


INFO:financerag.common.loader:Loaded 13867 Documents.
INFO:financerag.common.loader:Corpus Example: {'id': 'ADBE20230004', 'title': 'ADBE OVERVIEW', 'text': 'Adobe is a global technology company with a mission to change the world through personalized digital experiences. For over four decades, Adobe’s innovations have transformed how individuals, teams, businesses, enterprises, institutions, and governments engage and interact across all types of media. Our products, services and solutions are used around the world to imagine, create, manage, deliver, measure, optimize and engage with content across surfaces and fuel digital experiences. We have a diverse user base that includes consumers, communicators, creative professionals, developers, students, small and medium businesses and enterprises. We are also empowering creators by putting the power of artificial intelligence (“AI”) in their hands, and doing so in ways we believe are responsible. Our products and services help unleash crea

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:financerag.retrieval.dense:Sorting corpus by document length...
INFO:financerag.retrieval.dense:Encoding corpus in batches... This may take a while.
INFO:financerag.retrieval.dense:Encoding batch 1/1...


Batches:   0%|          | 0/217 [00:00<?, ?it/s]

INFO:financerag.rerank.cross_encoder:Starting To Rerank Top-100....


Retrieved results for 216 queries for FinDER. Here's an example of the top 5 documents for the first query:

Query ID: q00001
  Document 1: Document ID = JNJ20232101, Score = 0.9447249174118042
  Document 2: Document ID = JNJ20230392, Score = 0.9394426345825195
  Document 3: Document ID = JNJ20230894, Score = 0.9382066130638123
  Document 4: Document ID = JNJ20232185, Score = 0.9378847479820251
  Document 5: Document ID = JNJ20232340, Score = 0.936808705329895


Batches:   0%|          | 0/338 [00:00<?, ?it/s]

INFO:financerag.tasks.BaseTask:Output directory set to: ./results/2024-11-09-09/FinDER/FinDER
INFO:financerag.tasks.BaseTask:Saving top 10 results to CSV file: ./results/2024-11-09-09/FinDER/FinDER/results.csv
INFO:financerag.tasks.BaseTask:Writing header ['query_id', 'corpus_id'] to CSV.
INFO:financerag.tasks.BaseTask:Top 10 results saved successfully to ./results/2024-11-09-09/FinDER/FinDER/results.csv
INFO:financerag.common.loader:Loading Corpus...


Reranking results for 216 queries. Here's an example of the top 5 documents for the first query:

Query ID: q00001
  Document 1: Document ID = MSFT20230512, Score = -8.349985122680664
  Document 2: Document ID = ADBE20230484, Score = -10.789011001586914
  Document 3: Document ID = JNJ20230926, Score = -11.020696640014648
  Document 4: Document ID = JNJ20230404, Score = -11.096385955810547
  Document 5: Document ID = JNJ20232175, Score = -11.107455253601074
Results have been saved to ./results/2024-11-09-09/FinDER/results.csv
Running task: FinQABench


INFO:financerag.common.loader:Loaded 92 Documents.
INFO:financerag.common.loader:Corpus Example: {'id': 'd4aa0660c', 'title': '', 'text': 'Apple Inc.\nCONSOLIDATED STATEMENTS OF OPERATIONS\n(In millions, except number of shares which are reflected in thousands and per share amounts)\nYears ended\nSeptember 24,\n2022September 25,\n2021September 26,\n2020\nNet sales:\n   Products $ 316,199 $ 297,392 $ 220,747 \n   Services  78,129  68,425  53,768 \nTotal net sales  394,328  365,817  274,515 \nCost of sales:\n   Products  201,471  192,266  151,286 \n   Services  22,075  20,715  18,273 \nTotal cost of sales  223,546  212,981  169,559 \nGross margin  170,782  152,836  104,956 \nOperating expenses:\nResearch and development  26,251  21,914  18,752 \nSelling, general and administrative  25,094  21,973  19,916 \nTotal operating expenses  51,345  43,887  38,668 \nOperating income  119,437  108,949  66,288 \nOther income/(expense), net  (334)  258  803 \nIncome before provision for income taxes 

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:financerag.retrieval.dense:Sorting corpus by document length...
INFO:financerag.retrieval.dense:Encoding corpus in batches... This may take a while.
INFO:financerag.retrieval.dense:Encoding batch 1/1...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

INFO:financerag.rerank.cross_encoder:Starting To Rerank Top-100....


Retrieved results for 100 queries for FinQABench. Here's an example of the top 5 documents for the first query:

Query ID: q4aa0b116
  Document 1: Document ID = d4aa10922, Score = 0.012769793160259724
  Document 2: Document ID = d4aa05798, Score = 0.0027122662868350744
  Document 3: Document ID = d4aa0a52c, Score = 0.0026893301401287317
  Document 4: Document ID = d4a9ffd70, Score = 0.0025680395774543285
  Document 5: Document ID = d4aa0985c, Score = 0.0025219961535185575


Batches:   0%|          | 0/144 [00:00<?, ?it/s]

INFO:financerag.tasks.BaseTask:Output directory set to: ./results/2024-11-09-09/FinQABench/FinQABench
INFO:financerag.tasks.BaseTask:Saving top 10 results to CSV file: ./results/2024-11-09-09/FinQABench/FinQABench/results.csv
INFO:financerag.tasks.BaseTask:Writing header ['query_id', 'corpus_id'] to CSV.
INFO:financerag.tasks.BaseTask:Top 10 results saved successfully to ./results/2024-11-09-09/FinQABench/FinQABench/results.csv
INFO:financerag.common.loader:Loading Corpus...
INFO:financerag.common.loader:Loaded 180 Documents.
INFO:financerag.common.loader:Corpus Example: {'id': 'dd2af2336', 'title': 'PEPSICO_2022_10K', 'text': '6) Africa, Middle East and South Asia (AMESA), which includes all of our beverage and convenient food businesses in\nAfrica, the Middle East and South Asia; and\n7) Asia Pacific, Australia and New Zealand and China Region (APAC), which includes all of our beverage and convenient\nfood businesses in Asia Pacific, Australia and New Zealand, and China region.'}
INF

Reranking results for 100 queries. Here's an example of the top 5 documents for the first query:

Query ID: q4aa0b116
  Document 1: Document ID = d4aa0b1f2, Score = 7.899673938751221
  Document 2: Document ID = d4aa0a52c, Score = 5.015963554382324
  Document 3: Document ID = d4aa0a7d4, Score = 4.912598609924316
  Document 4: Document ID = d4aa10314, Score = 4.468316555023193
  Document 5: Document ID = d4aa0a9e6, Score = 4.2903971672058105
Results have been saved to ./results/2024-11-09-09/FinQABench/results.csv
Running task: FinanceBench


INFO:financerag.common.loader:Loaded 150 Queries.
INFO:financerag.common.loader:Query Example: {'id': 'qd2ac917a', 'text': 'What is the FY2019 - FY2020 total revenue growth rate for Block (formerly known as Square)? Answer in units of percents and round to one decimal place. Approach the question asked by assuming the standpoint of an investment banking analyst who only has access to the statement of income.'}
INFO:financerag.retrieval.dense:Encoding queries...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

INFO:financerag.retrieval.dense:Sorting corpus by document length...
INFO:financerag.retrieval.dense:Encoding corpus in batches... This may take a while.
INFO:financerag.retrieval.dense:Encoding batch 1/1...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

INFO:financerag.rerank.cross_encoder:Starting To Rerank Top-100....


Retrieved results for 150 queries for FinanceBench. Here's an example of the top 5 documents for the first query:

Query ID: qd2ac917a
  Document 1: Document ID = dd2acf5c0, Score = 0.014626741409301758
  Document 2: Document ID = dd2acf638, Score = 0.013571225106716156
  Document 3: Document ID = dd2acf912, Score = 0.013375810347497463
  Document 4: Document ID = dd2ac1222, Score = 0.012981723994016647
  Document 5: Document ID = dd2acf692, Score = 0.012376612983644009


Batches:   0%|          | 0/235 [00:00<?, ?it/s]

INFO:financerag.tasks.BaseTask:Output directory set to: ./results/2024-11-09-09/FinanceBench/FinanceBench
INFO:financerag.tasks.BaseTask:Saving top 10 results to CSV file: ./results/2024-11-09-09/FinanceBench/FinanceBench/results.csv
INFO:financerag.tasks.BaseTask:Writing header ['query_id', 'corpus_id'] to CSV.
INFO:financerag.tasks.BaseTask:Top 10 results saved successfully to ./results/2024-11-09-09/FinanceBench/FinanceBench/results.csv
INFO:financerag.common.loader:Loading Corpus...


Reranking results for 150 queries. Here's an example of the top 5 documents for the first query:

Query ID: qd2ac917a
  Document 1: Document ID = dd2acce74, Score = -0.29550787806510925
  Document 2: Document ID = dd2ac8f0e, Score = -2.4054250717163086
  Document 3: Document ID = dd2ac285c, Score = -5.096299648284912
  Document 4: Document ID = dd2ac8626, Score = -5.100713729858398
  Document 5: Document ID = dd2abf562, Score = -6.20806884765625
Results have been saved to ./results/2024-11-09-09/FinanceBench/results.csv
Running task: TATQA


INFO:financerag.common.loader:Loaded 2756 Documents.
INFO:financerag.common.loader:Corpus Example: {'id': 'd1b2e74c0', 'title': '', 'text': 'The following tables present the recorded investment by portfolio segment and by class, excluding commercial financing receivables and other miscellaneous financing receivables at December 31, 2019 and 2018. Commercial financing receivables are excluded from the presentation of financing receivables by portfolio segment, as they are short term in nature and the current estimated risk of loss and resulting impact to the company’s financing results are not material.\nWrite-offs of lease receivables and loan receivables were $16 million and $47 million, respectively, for the year ended December 31, 2019. Provisions for credit losses recorded for lease receivables and loan receivables were a release of $6 million and an addition of $2 million, respectively, for the year ended December 31, 2019.\nThe average recorded investment of impaired leases and l

Batches:   0%|          | 0/26 [00:00<?, ?it/s]

INFO:financerag.retrieval.dense:Sorting corpus by document length...
INFO:financerag.retrieval.dense:Encoding corpus in batches... This may take a while.
INFO:financerag.retrieval.dense:Encoding batch 1/1...


Batches:   0%|          | 0/44 [00:00<?, ?it/s]

Retrieved results for 1663 queries for TATQA. Here's an example of the top 5 documents for the first query:

Query ID: q1a73c1d4
  Document 1: Document ID = d1a71726c, Score = -0.013570000417530537
  Document 2: Document ID = d1a735c44, Score = -0.013681773096323013
  Document 3: Document ID = d1b39b600, Score = -0.013695847243070602
  Document 4: Document ID = d1b355fa6, Score = -0.013698129914700985
  Document 5: Document ID = d1b350d80, Score = -0.013710126280784607


INFO:financerag.rerank.cross_encoder:Starting To Rerank Top-100....


Batches:   0%|          | 0/2599 [00:00<?, ?it/s]

In [None]:
# Rerank the top 100 retrieved documents using the CrossEncoder model.
reranking_result = finder_task.rerank(
    reranker=reranker,
    results=retrieval_result,
    top_k=100,  # Rerank the top 100 documents
    batch_size=64
)

In [None]:
# merge all results
import pandas as pd
import glob

all_files = glob.glob(output_dir + "/*/*/results.csv")
li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

frame = pd.concat(li, axis=0, ignore_index=True)
frame.columns = ['query_id', 'corpus_id']
frame.to_csv(output_dir + "/all_results.csv", index=False)

In [None]:
#　Read the ground truth file