**In this Notebook we extract the remaining SubScore Labels**

In [19]:
from langchain_ollama import OllamaLLM
from tqdm import tqdm

import json
import numpy as np
import os
import pandas as pd
import re

**Code to extract unique score combinations**

In [None]:
!pip install --upgrade numpy

In [None]:
rms_with_fundamental_score = pd.read_pickle('./rms_with_fundamental_score.pkl')
rms_with_fundamental_score.head(1)

In [None]:
unique_score_combinations = rms_with_fundamental_score[['CategoryGroup', 'Category', 'TaggedCharacteristics']].drop_duplicates()
# Replace all types of newlines and excessive whitespace in TaggedCharacteristics
unique_score_combinations['TaggedCharacteristics'] = unique_score_combinations['TaggedCharacteristics'].str.replace(r'[\r\n]+', ' ', regex=True)

# Function to expand TaggedCharacteristics if it's a JSON string with multiple items
def expand_tagged_characteristics(row):
    try:
        characteristics = json.loads(row['TaggedCharacteristics'])
        if isinstance(characteristics, list):
            # Replace newlines within each CharacteristicText
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': char['CharacteristicText'].replace('\r', ' ').replace('\n', ' '),
                'CharacteristicInfluence': char.get('CharacteristicInfluence', None)  # Handle missing keys
            } for char in characteristics])
        else:
            # If it's a single item or not a list, replace newlines if it's a string
            if isinstance(characteristics, str):
                characteristics = characteristics.replace('\r', ' ').replace('\n', ' ')
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': characteristics,
                'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
            }])
    except (json.JSONDecodeError, TypeError):
        # If parsing fails, replace newlines in the original TaggedCharacteristics
        cleaned_text = row['TaggedCharacteristics'].replace('\r', ' ').replace('\n', ' ')
        return pd.DataFrame([{
            'CategoryGroup': row['CategoryGroup'],
            'Category': row['Category'],
            'TaggedCharacteristics': cleaned_text,
            'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
        }])

# Applying the function to each row and combining results
expanded_unique_score_combinations = pd.concat(
    unique_score_combinations.apply(expand_tagged_characteristics, axis=1).to_list(),
    ignore_index=True
)

# Sort, drop duplicates, and save to CSV
unique_score_combinations = expanded_unique_score_combinations.sort_values(by=['CategoryGroup', 'Category', 'CharacteristicInfluence']).drop_duplicates()
unique_score_combinations.to_csv('unique_score_combinations.csv', index=False)

**Defining the questions and instantiating the LLM**

In [20]:
# Define the questions corresponding to each column
questions_market_dynamics = {
    "Market Dynamics - a": "Does the text mention that the company is exposed to risks associated with cyclical products?",
    "Market Dynamics - b": "Does the text mention risks related to demographic or structural trends affecting the market?",
    "Market Dynamics - c": "Does the text mention risks due to seasonal volatility in the industry?"
}
questions_intra_industry_competition = {
    "Intra-Industry Competition - a": "Does the text mention that market pricing for the company's products or services is irrational or not based on fundamental factors?",
    "Intra-Industry Competition - b": "Does the text mention that the market is highly fragmented with no clear leader or that there is only one dominant leader?",
    "Intra-Industry Competition - c": "Does the text mention low barriers to entry in the industry, making it easy for new competitors to enter the market?"
}
questions_regulatory_framework = {
    "Regulatory Framework - a": "Does the text mention that the industry is subject to a high degree of regulatory scrutiny?",
    "Regulatory Framework - b": "Does the text mention a high dependency on regulation or being a beneficiary from regulation in an unstable regulatory environment?"
}
questions_technology_risk = {
    "Technology Risk - a": "Does the text mention that the industry is susceptible to rapid technological advances or innovations?",
    "Technology Risk - b": "Does the text mention that the company is perceived as a disruptor or is threatened by emerging technological changes?"
}

all_question_dicts = [
    questions_market_dynamics,
    questions_intra_industry_competition,
    questions_regulatory_framework,
    questions_technology_risk
]

# Original questions
questions_market_dynamics_original = {
    "Market Dynamics - a": "Exposure to cyclical products",
    "Market Dynamics - b": "Impact of demographic and structural trends",
    "Market Dynamics - c": "Seasonal industry volatility"
}
questions_intra_industry_competition_original = {
    "Intra-Industry Competition - a": "Market pricing has not shown to be rational",
    "Intra-Industry Competition - b": "Highly fragmented market with no clear leader or only one leader",
    "Intra-Industry Competition - c": "Low barriers to entry"
}
questions_regulatory_framework_original = {
    "Regulatory Framework - a": "Industry has high degree of regulatory scrutiny",
    "Regulatory Framework - b": "High dependency on regulation or is a beneficiary from regulation in an unstable regulatory environment"
}
questions_technology_risk_original = {
    "Technology Risk - a": "Industry susceptibility to technological advances",
    "Technology Risk - b": "Company viewed as a disruptee/threatened by technological change"
}


In [21]:
# Initialize the language model
llm = OllamaLLM(model="llama3.2")

# Check if the processed file exists; if not, process the raw data
processed_file_path = '../data/prospectuses_data_processed.csv'
raw_file_path = '../data/prospectuses_data.csv'

# Check if processed file exists
if os.path.exists(processed_file_path):
    df = pd.read_csv(processed_file_path)
else:
    print("Processed file not found. Processing raw data...")
    df = pd.read_csv(raw_file_path)
    # Filter out rows that have "failed parsing" in the Section ID column
    df = df[df['Section ID'] != "failed parsing"]

# Ensure the relevance and evidence columns are created with a compatible data type
for question_dict in all_question_dicts:
    # Iterate through each question key in the current dictionary
    for column_name in question_dict.keys():
        if column_name in df.columns:
            df[column_name] = df[column_name].astype('string')
        else:
            df[column_name] = ""

df.head(2)
# print(df.shape)

Unnamed: 0,Prospectus ID,Original Filename,Section ID,Section Title,Subsection ID,Subsection Title,Subsubsection ID,Subsubsection Title,Subsubsection Text,Market Dynamics - a,Market Dynamics - b,Market Dynamics - c,Parsing Error,Intra-Industry Competition - a,Intra-Industry Competition - b,Intra-Industry Competition - c,Regulatory Framework - a,Regulatory Framework - b,Technology Risk - a,Technology Risk - b
0,235,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,,1.1.1,,_An investment in the Notes involves a high de...,Not Relevant,Highly Relevant: the risks described below,Not Relevant,,Not Relevant,Not Relevant,Not Relevant,Highly Relevant: Subsubsection Title: ... and ...,Highly Relevant,Not Relevant,Not Relevant
1,16,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,"Risks Relating to the Group’s Business, Techno...",1.1.1,The Group faces significant competition in eac...,The French telecommunications market is a matu...,Highly Relevant: Various evidence throughout t...,Highly Relevant,Highly Relevant,,Highly Relevant,Highly Relevant: ...the Group also competes wi...,Highly Relevant: The exact phrases or sentence...,Highly Relevant: Several evidence are presente...,Highly Relevant,Highly Relevant: This is a highly relevant ans...,Highly Relevant: The Group also faces competit...


In [22]:
def extract_fields(response):
    # Remove any newlines and extra spaces
    response = ' '.join(response.strip().split())

    # Extract the Relevance field
    relevance_match = re.search(r'"Relevance"\s*:\s*"([^"]+)"', response)
    if relevance_match:
        relevance = relevance_match.group(1).strip()
    else:
        relevance = "Parsing Error"

    # Extract the Evidence field(s)
    evidence_match = re.search(r'"Evidence"\s*:\s*(.+?)(?:,?\s*"[^"]+"\s*:|\s*}$)', response)
    if evidence_match:
        evidence_str = evidence_match.group(1).strip()
        # Remove any trailing commas or braces
        evidence_str = evidence_str.rstrip(', }')
        # Split the evidence_str into individual evidence items
        # Evidence items are strings enclosed in double quotes
        evidence_items = re.findall(r'"([^"]+)"', evidence_str)
        evidence = evidence_items
    else:
        evidence = []

    return relevance, evidence


def analyze_prospectus_row_single_question(row, question):
    # System and user prompts
    system_prompt = "You are an expert in analyzing bond prospectuses and identifying specific risk factors."

    # Format the user prompt using the row's data
    prompt = f"""
{system_prompt}

For the following question and text, judge whether the text is "Highly Relevant", "Somewhat Relevant", or "Not Relevant".

Question:
{question}

Text:
Subsubsection Title: {row['Subsubsection Title']}
Subsubsection Text: {row['Subsubsection Text']}


Please provide your answer in the following JSON format:

{{
  "Relevance": "Highly Relevant", "Somewhat Relevant", or "Not Relevant",
  "Evidence": "The exact phrases or sentences from the document that support your assessment; otherwise, leave blank."
}}

Note: Only provide the JSON response without any additional text.
"""
    # Run the prompt through the model
    response = llm.invoke(input=prompt)

    # Parse the response
    try:
        # Extract the Relevance and Evidence fields
        relevance, evidence_list = extract_fields(response)
        # Join multiple evidence items into a single string
        evidence = '; '.join(evidence_list)
    except Exception as e:
        relevance = "Parsing Error"
        evidence = ""

    # Combine relevance and evidence
    if relevance in ["Highly Relevant", "Somewhat Relevant"] and evidence:
        combined_answer = f"{relevance}: {evidence}"
    elif relevance in ["Highly Relevant", "Somewhat Relevant"]:
        combined_answer = relevance
    elif relevance == "Not Relevant":
        combined_answer = "Not Relevant"
    else:
        combined_answer = "Parsing Error"

    # For debugging
    if combined_answer == "Parsing Error":
        print("Parsing Error encountered. Response was:")
        print(response)

    return combined_answer

**Run the LLM Processing**

In [26]:
import time

# Initialize counter for new rows processed
new_rows_processed = 0

# Iterate over each row in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing Rows"):
    row_processed = False  # Flag to check if we processed any new data in this row

    for question_dict in all_question_dicts:
        for column_name, question in question_dict.items():
            # Check if the answer column is already filled
            if pd.notnull(df.at[index, column_name]) and df.at[index, column_name] != "":
                # Skip processing this row for this question
                continue
            combined_answer = analyze_prospectus_row_single_question(row, question)
            df.at[index, column_name] = combined_answer
            row_processed = True  # We processed new data in this row

    if row_processed:
        new_rows_processed += 1

    # Save progress every 50 rows
    if (index + 1) % 50 == 0:
        df.to_csv(processed_file_path, index=False)
        # print(f"Progress saved at row {index + 1}")

    # After processing 10 new rows, sleep for 30 seconds
    if new_rows_processed >= 10:
        df.to_csv(processed_file_path, index=False)  # Save before sleeping
        print(f"Processed 10 new rows. Pausing for 30 seconds.")
        time.sleep(30)
        new_rows_processed = 0  # Reset counter

# Save the final DataFrame after processing all rows
df.to_csv(processed_file_path, index=False)
print("All rows have been processed and saved.")

Processing Rows:  52%|█████▏    | 4168/7952 [03:15<4:52:02,  4.63s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4178/7952 [06:47<16:20:02, 15.58s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4188/7952 [09:15<12:57:02, 12.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4198/7952 [11:59<14:04:01, 13.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4208/7952 [15:41<19:57:27, 19.19s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4218/7952 [18:29<14:53:32, 14.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4228/7952 [22:36<25:53:15, 25.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4238/7952 [25:25<15:05:28, 14.63s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  53%|█████▎    | 4248/7952 [28:22<15:15:03, 14.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▎    | 4258/7952 [31:33<17:35:35, 17.15s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▎    | 4268/7952 [34:52<17:06:03, 16.71s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4277/7952 [37:25<12:37:14, 12.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4288/7952 [41:37<21:17:07, 20.91s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4298/7952 [44:01<11:22:17, 11.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4308/7952 [46:55<17:01:20, 16.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4318/7952 [50:05<14:08:43, 14.01s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  54%|█████▍    | 4328/7952 [53:06<14:59:45, 14.90s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▍    | 4338/7952 [56:41<21:36:24, 21.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▍    | 4348/7952 [1:00:43<20:43:28, 20.70s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▍    | 4358/7952 [1:04:19<22:46:11, 22.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▍    | 4368/7952 [1:09:03<25:19:28, 25.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▌    | 4378/7952 [1:12:28<18:25:18, 18.56s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▌    | 4388/7952 [1:15:27<14:22:29, 14.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▌    | 4398/7952 [1:18:23<13:32:37, 13.72s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  55%|█████▌    | 4408/7952 [1:22:25<22:56:23, 23.30s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4418/7952 [1:26:37<20:52:31, 21.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4428/7952 [1:31:10<18:05:30, 18.48s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4438/7952 [1:34:31<22:37:58, 23.19s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4448/7952 [1:39:40<25:15:18, 25.95s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4458/7952 [1:42:34<12:04:26, 12.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▌    | 4468/7952 [1:46:01<20:04:09, 20.74s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▋    | 4478/7952 [1:49:24<17:24:28, 18.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  56%|█████▋    | 4488/7952 [1:52:07<15:01:10, 15.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4498/7952 [1:54:59<12:46:25, 13.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4508/7952 [1:58:30<15:27:37, 16.16s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4518/7952 [2:02:05<17:11:35, 18.02s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4528/7952 [2:04:56<13:18:29, 13.99s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4538/7952 [2:08:11<15:58:32, 16.85s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4548/7952 [2:12:46<17:28:47, 18.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4558/7952 [2:16:03<15:17:12, 16.21s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  57%|█████▋    | 4568/7952 [2:20:38<25:31:06, 27.15s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4578/7952 [2:23:37<14:23:30, 15.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4588/7952 [2:26:52<13:31:16, 14.47s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4598/7952 [2:30:59<18:22:25, 19.72s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4608/7952 [2:35:01<18:29:14, 19.90s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4618/7952 [2:38:30<17:46:48, 19.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4628/7952 [2:42:55<18:29:47, 20.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4638/7952 [2:45:33<11:27:32, 12.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  58%|█████▊    | 4648/7952 [2:49:09<21:31:33, 23.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▊    | 4658/7952 [2:51:56<12:15:01, 13.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▊    | 4668/7952 [2:55:02<14:32:17, 15.94s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4678/7952 [2:58:16<16:18:59, 17.94s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4688/7952 [3:01:21<13:40:56, 15.09s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4698/7952 [3:04:45<15:47:09, 17.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4708/7952 [3:08:20<16:54:19, 18.76s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4718/7952 [3:12:01<17:17:05, 19.24s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  59%|█████▉    | 4728/7952 [3:15:55<15:27:20, 17.26s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|█████▉    | 4738/7952 [3:19:36<18:25:27, 20.64s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|█████▉    | 4748/7952 [3:23:08<17:43:33, 19.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|█████▉    | 4758/7952 [3:25:54<11:45:21, 13.25s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|█████▉    | 4768/7952 [3:30:26<19:54:02, 22.50s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|██████    | 4778/7952 [3:33:36<19:57:55, 22.64s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|██████    | 4788/7952 [3:38:15<21:07:36, 24.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|██████    | 4798/7952 [3:42:03<23:26:00, 26.75s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  60%|██████    | 4808/7952 [3:45:31<15:57:00, 18.26s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4818/7952 [3:48:38<15:23:00, 17.67s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4828/7952 [3:52:18<14:55:10, 17.19s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4838/7952 [3:55:48<17:10:37, 19.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4848/7952 [3:59:14<16:05:48, 18.67s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4858/7952 [4:04:14<27:05:39, 31.53s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████    | 4868/7952 [4:08:40<24:51:20, 29.01s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████▏   | 4878/7952 [4:13:13<22:59:10, 26.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  61%|██████▏   | 4888/7952 [4:17:57<20:03:08, 23.56s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4898/7952 [4:21:55<18:55:41, 22.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4908/7952 [4:25:04<16:15:33, 19.23s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4918/7952 [4:29:10<18:20:32, 21.76s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4928/7952 [4:33:57<32:22:41, 38.55s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4938/7952 [4:37:21<13:31:05, 16.15s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4948/7952 [4:40:08<11:23:16, 13.65s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4958/7952 [4:43:35<22:21:19, 26.88s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  62%|██████▏   | 4968/7952 [4:48:31<21:23:43, 25.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 4978/7952 [4:52:36<17:54:08, 21.67s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 4988/7952 [4:55:45<15:05:10, 18.32s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 4998/7952 [4:59:46<23:01:51, 28.07s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 5008/7952 [5:04:29<21:41:00, 26.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 5018/7952 [5:09:04<24:23:20, 29.93s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 5028/7952 [5:13:08<15:12:00, 18.71s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 5038/7952 [5:16:09<16:07:01, 19.91s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  63%|██████▎   | 5048/7952 [5:19:47<14:52:26, 18.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▎   | 5058/7952 [5:22:38<13:01:13, 16.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▎   | 5068/7952 [5:26:15<17:02:27, 21.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5078/7952 [5:30:50<17:02:08, 21.34s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5088/7952 [5:34:35<16:09:07, 20.30s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5098/7952 [5:38:31<21:00:54, 26.51s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5108/7952 [5:42:44<15:08:37, 19.17s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5118/7952 [5:46:12<14:20:26, 18.22s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  64%|██████▍   | 5128/7952 [5:49:38<16:21:18, 20.85s/it]

Parsing Error encountered. Response was:
{"Relevance": "Highely Relevant", "Evidence": "We cannot assure you that our business will generate sufficient cash flows from operations... (refinancing of debt and restructuring obligations)"}
Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▍   | 5138/7952 [5:54:31<25:01:53, 32.02s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▍   | 5148/7952 [5:58:55<20:02:36, 25.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▍   | 5158/7952 [6:03:16<21:09:38, 27.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▍   | 5168/7952 [6:06:03<9:58:41, 12.90s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▌   | 5178/7952 [6:09:54<12:34:00, 16.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▌   | 5188/7952 [6:14:15<24:21:06, 31.72s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▌   | 5198/7952 [6:18:20<18:04:08, 23.62s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  65%|██████▌   | 5208/7952 [6:22:33<17:36:22, 23.10s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5218/7952 [6:26:56<18:56:52, 24.95s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5228/7952 [6:30:22<10:19:49, 13.65s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5238/7952 [6:34:20<17:27:14, 23.15s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5248/7952 [6:38:08<15:20:34, 20.43s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5258/7952 [6:42:22<21:41:39, 28.99s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▌   | 5268/7952 [6:45:48<13:49:00, 18.53s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▋   | 5278/7952 [6:49:08<13:12:41, 17.79s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  66%|██████▋   | 5288/7952 [6:53:15<18:41:20, 25.26s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5298/7952 [6:56:18<10:27:12, 14.18s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5308/7952 [6:59:52<14:45:07, 20.09s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5309/7952 [7:01:21<3:29:45,  4.76s/it] 


KeyboardInterrupt: 