In [1]:
from langchain_ollama import OllamaLLM
from tqdm import tqdm
import pandas as pd
import json

llm = OllamaLLM(model="llama3.2")
df = pd.read_csv('prospectuses_data.csv')

# Filter out rows that have "failed parsing" in the Section ID column
df = df[df['Section ID'] != "failed parsing"]

# Define the questions corresponding to each column
questions = {
    "Market Dynamics - a": "Exposure to cyclical products",
    "Market Dynamics - b": "Impact of demographic and structural trends",
    "Market Dynamics - c": "Seasonal industry volatility"
}

# Ensure the answer and evidence columns are created with a compatible data type
for column_name in questions.keys():
    df[column_name] = ""  # Initialize answer columns as empty strings
    df[f"{column_name} - Evidence"] = ""  # Initialize evidence columns as empty strings


print(df.shape)
df = df.head(10)
print(df.shape)

(74, 18)
(10, 18)


In [2]:
def analyze_prospectus_row_single_question(row, question):
    # System and user prompts
    system_prompt = "You are an expert in analyzing bond prospectuses and identifying specific risk factors. Provide clear 'Yes' or 'No' answers to questions. If 'Yes', provide exact phrases or sentences from the text that support your answer."

    # Format the user prompt using the row's data
    prompt = f"""
    {system_prompt}

    Given the following excerpt from a bond prospectus:

    Subsubsection Title: {row['Subsubsection Title']}
    Subsubsection Text: {row['Subsubsection Text']}

    Please determine whether the text indicates the following risk:

    "{question}"

    Answer "Yes" or "No". If "Yes", please provide the exact phrases or sentences from the text that support your answer.

    Please provide your answer in the following JSON format:

    {{
      "Answer": "Yes" or "No",
      "Evidence": "The exact phrases or sentences from the text if 'Yes', otherwise leave blank"
    }}
    """
    # Run the prompt through the model
    response = llm.invoke(input=prompt)

    # Parse the response
    try:
        result = json.loads(response)
        answer = result.get("Answer", "").strip()
        evidence = result.get("Evidence", "").strip()
    except json.JSONDecodeError:
        answer = "Parsing Error"
        evidence = ""
    
    # Print intermediary results
    if answer == "yes":
        print(f"Processing Prospectus ID: {row['Prospectus ID']}, Question: {question}")
        print(f"Answer: {answer}")
        print(f"Evidence: {evidence}\n{'-'*60}\n")
    
    return answer, evidence

In [3]:
# Loop over each row in the DataFrame
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    for column_name, question in questions.items():
        answer, evidence = analyze_prospectus_row_single_question(row, question)
        df.at[index, column_name] = answer
        evidence_column = f"{column_name} - Evidence"
        df.at[index, evidence_column] = evidence

100%|██████████| 10/10 [00:21<00:00,  2.12s/it]


In [5]:
df

Unnamed: 0,Prospectus ID,Original Filename,Section ID,Section Title,Subsection ID,Subsection Title,Subsubsection ID,Subsubsection Title,Subsubsection Text,Market Dynamics - a,Market Dynamics - b,Market Dynamics - c,LLM Answer,Evidence Text,Parsing Error,Market Dynamics - a - Evidence,Market Dynamics - b - Evidence,Market Dynamics - c - Evidence
3,4,FR0014001YE4.pdf,1,RISK FACTORS,1.1,1. Risks related to the Issuer,1.1.1,,Risk factors relating to the Issuer and the Gr...,No,No,No,,,,,,
4,4,FR0014001YE4.pdf,1,RISK FACTORS,1.2,2. Risks related to the Bonds 2.1 Risks relati...,1.2.1,2.1.1 The Bonds may be redeemed prior to maturity,The Issuer reserves the right to purchase Bond...,No,No,No,,,,,,
5,4,FR0014001YE4.pdf,1,RISK FACTORS,1.2,2. Risks related to the Bonds 2.1 Risks relati...,1.2.2,2.1.2 Change of control put option,"In accordance with each Condition 4(d), upon t...",No,No,No,,,,,,
6,4,FR0014001YE4.pdf,1,RISK FACTORS,1.2,2. Risks related to the Bonds 2.1 Risks relati...,1.2.3,2.1.3 Interest rate risks,As provided for in Condition 3 of the Terms an...,No,No,No,,,,,,
7,4,FR0014001YE4.pdf,1,RISK FACTORS,1.3,2.2 Risks for the Bondholders as creditors of ...,1.3.1,2.2.1 French insolvency law,"As a société anonyme incorporated in France, F...",No,No,No,,,,,,
8,4,FR0014001YE4.pdf,1,RISK FACTORS,1.3,2.2 Risks for the Bondholders as creditors of ...,1.3.2,2.2.2 Modification of the Terms and Conditions...,"As provided by each Condition 8, there are pro...",No,No,No,,,,,,
9,4,FR0014001YE4.pdf,1,RISK FACTORS,1.4,2.3 Risks relating to the market,1.4.1,2.3.1 No active secondary or market trading fo...,Application has been made for the Bonds to be ...,No,No,No,,,,,,
10,4,FR0014001YE4.pdf,1,RISK FACTORS,1.4,2.3 Risks relating to the market,1.4.2,2.3.2 Market value of the Bonds,The market value of the Bonds will be affected...,No,No,No,,,,,,
11,4,FR0014001YE4.pdf,1,RISK FACTORS,1.4,2.3 Risks relating to the market,1.4.3,2.3.3 Exchange rate risks and exchange controls,The Issuer will pay principal and interest on ...,No,No,No,,,,,,
12,4,FR0014001YE4.pdf,1,RISK FACTORS,1.5,Currency”) other than Euro. These include the ...,1.5.1,2.3.4 Potential conflict of interest,Certain of the Joint Lead Managers (as defined...,No,No,No,,,,,,
