**In this Notebook we extract the remaining SubScore Labels**

In [27]:
from langchain_ollama import OllamaLLM
from tqdm import tqdm

import json
import numpy as np
import os
import pandas as pd
import re

**Code to extract unique score combinations**

In [None]:
!pip install --upgrade numpy

In [None]:
rms_with_fundamental_score = pd.read_pickle('./rms_with_fundamental_score.pkl')
rms_with_fundamental_score.head(1)

In [None]:
unique_score_combinations = rms_with_fundamental_score[['CategoryGroup', 'Category', 'TaggedCharacteristics']].drop_duplicates()
# Replace all types of newlines and excessive whitespace in TaggedCharacteristics
unique_score_combinations['TaggedCharacteristics'] = unique_score_combinations['TaggedCharacteristics'].str.replace(r'[\r\n]+', ' ', regex=True)

# Function to expand TaggedCharacteristics if it's a JSON string with multiple items
def expand_tagged_characteristics(row):
    try:
        characteristics = json.loads(row['TaggedCharacteristics'])
        if isinstance(characteristics, list):
            # Replace newlines within each CharacteristicText
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': char['CharacteristicText'].replace('\r', ' ').replace('\n', ' '),
                'CharacteristicInfluence': char.get('CharacteristicInfluence', None)  # Handle missing keys
            } for char in characteristics])
        else:
            # If it's a single item or not a list, replace newlines if it's a string
            if isinstance(characteristics, str):
                characteristics = characteristics.replace('\r', ' ').replace('\n', ' ')
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': characteristics,
                'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
            }])
    except (json.JSONDecodeError, TypeError):
        # If parsing fails, replace newlines in the original TaggedCharacteristics
        cleaned_text = row['TaggedCharacteristics'].replace('\r', ' ').replace('\n', ' ')
        return pd.DataFrame([{
            'CategoryGroup': row['CategoryGroup'],
            'Category': row['Category'],
            'TaggedCharacteristics': cleaned_text,
            'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
        }])

# Applying the function to each row and combining results
expanded_unique_score_combinations = pd.concat(
    unique_score_combinations.apply(expand_tagged_characteristics, axis=1).to_list(),
    ignore_index=True
)

# Sort, drop duplicates, and save to CSV
unique_score_combinations = expanded_unique_score_combinations.sort_values(by=['CategoryGroup', 'Category', 'CharacteristicInfluence']).drop_duplicates()
unique_score_combinations.to_csv('unique_score_combinations.csv', index=False)

**Defining the questions and instantiating the LLM**

In [28]:
# Define the questions corresponding to each column
questions_market_dynamics = {
    "Market Dynamics - a": "Does the text mention that the company is exposed to risks associated with cyclical products?",
    "Market Dynamics - b": "Does the text mention risks related to demographic or structural trends affecting the market?",
    "Market Dynamics - c": "Does the text mention risks due to seasonal volatility in the industry?"
}
questions_intra_industry_competition = {
    "Intra-Industry Competition - a": "Does the text mention that market pricing for the company's products or services is irrational or not based on fundamental factors?",
    "Intra-Industry Competition - b": "Does the text mention that the market is highly fragmented with no clear leader or that there is only one dominant leader?",
    "Intra-Industry Competition - c": "Does the text mention low barriers to entry in the industry, making it easy for new competitors to enter the market?"
}
questions_regulatory_framework = {
    "Regulatory Framework - a": "Does the text mention that the industry is subject to a high degree of regulatory scrutiny?",
    "Regulatory Framework - b": "Does the text mention a high dependency on regulation or being a beneficiary from regulation in an unstable regulatory environment?"
}
questions_technology_risk = {
    "Technology Risk - a": "Does the text mention that the industry is susceptible to rapid technological advances or innovations?",
    "Technology Risk - b": "Does the text mention that the company is perceived as a disruptor or is threatened by emerging technological changes?"
}

all_question_dicts = [
    questions_market_dynamics,
    questions_intra_industry_competition,
    questions_regulatory_framework,
    questions_technology_risk
]

# Original questions
questions_market_dynamics_original = {
    "Market Dynamics - a": "Exposure to cyclical products",
    "Market Dynamics - b": "Impact of demographic and structural trends",
    "Market Dynamics - c": "Seasonal industry volatility"
}
questions_intra_industry_competition_original = {
    "Intra-Industry Competition - a": "Market pricing has not shown to be rational",
    "Intra-Industry Competition - b": "Highly fragmented market with no clear leader or only one leader",
    "Intra-Industry Competition - c": "Low barriers to entry"
}
questions_regulatory_framework_original = {
    "Regulatory Framework - a": "Industry has high degree of regulatory scrutiny",
    "Regulatory Framework - b": "High dependency on regulation or is a beneficiary from regulation in an unstable regulatory environment"
}
questions_technology_risk_original = {
    "Technology Risk - a": "Industry susceptibility to technological advances",
    "Technology Risk - b": "Company viewed as a disruptee/threatened by technological change"
}


In [29]:
# Initialize the language model
llm = OllamaLLM(model="llama3.2")

# Check if the processed file exists; if not, process the raw data
processed_file_path = '../data/prospectuses_data_processed.csv'
raw_file_path = '../data/prospectuses_data.csv'

# Check if processed file exists
if os.path.exists(processed_file_path):
    df = pd.read_csv(processed_file_path)
else:
    print("Processed file not found. Processing raw data...")
    df = pd.read_csv(raw_file_path)
    # Filter out rows that have "failed parsing" in the Section ID column
    df = df[df['Section ID'] != "failed parsing"]

# Ensure the relevance and evidence columns are created with a compatible data type
for question_dict in all_question_dicts:
    # Iterate through each question key in the current dictionary
    for column_name in question_dict.keys():
        if column_name in df.columns:
            df[column_name] = df[column_name].astype('string')
        else:
            df[column_name] = ""

df.head(2)
# print(df.shape)

Unnamed: 0,Prospectus ID,Original Filename,Section ID,Section Title,Subsection ID,Subsection Title,Subsubsection ID,Subsubsection Title,Subsubsection Text,Market Dynamics - a,Market Dynamics - b,Market Dynamics - c,Parsing Error,Intra-Industry Competition - a,Intra-Industry Competition - b,Intra-Industry Competition - c,Regulatory Framework - a,Regulatory Framework - b,Technology Risk - a,Technology Risk - b
0,235,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,,1.1.1,,_An investment in the Notes involves a high de...,Not Relevant,Highly Relevant: the risks described below,Not Relevant,,Not Relevant,Not Relevant,Not Relevant,Highly Relevant: Subsubsection Title: ... and ...,Highly Relevant,Not Relevant,Not Relevant
1,16,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,"Risks Relating to the Group’s Business, Techno...",1.1.1,The Group faces significant competition in eac...,The French telecommunications market is a matu...,Highly Relevant: Various evidence throughout t...,Highly Relevant,Highly Relevant,,Highly Relevant,Highly Relevant: ...the Group also competes wi...,Highly Relevant: The exact phrases or sentence...,Highly Relevant: Several evidence are presente...,Highly Relevant,Highly Relevant: This is a highly relevant ans...,Highly Relevant: The Group also faces competit...


In [30]:
def extract_fields(response):
    # Remove any newlines and extra spaces
    response = ' '.join(response.strip().split())

    # Extract the Relevance field
    relevance_match = re.search(r'"Relevance"\s*:\s*"([^"]+)"', response)
    if relevance_match:
        relevance = relevance_match.group(1).strip()
    else:
        relevance = "Parsing Error"

    # Extract the Evidence field(s)
    evidence_match = re.search(r'"Evidence"\s*:\s*(.+?)(?:,?\s*"[^"]+"\s*:|\s*}$)', response)
    if evidence_match:
        evidence_str = evidence_match.group(1).strip()
        # Remove any trailing commas or braces
        evidence_str = evidence_str.rstrip(', }')
        # Split the evidence_str into individual evidence items
        # Evidence items are strings enclosed in double quotes
        evidence_items = re.findall(r'"([^"]+)"', evidence_str)
        evidence = evidence_items
    else:
        evidence = []

    return relevance, evidence


def analyze_prospectus_row_single_question(row, question):
    # System and user prompts
    system_prompt = "You are an expert in analyzing bond prospectuses and identifying specific risk factors."

    # Format the user prompt using the row's data
    prompt = f"""
{system_prompt}

For the following question and text, judge whether the text is "Highly Relevant", "Somewhat Relevant", or "Not Relevant".

Question:
{question}

Text:
Subsubsection Title: {row['Subsubsection Title']}
Subsubsection Text: {row['Subsubsection Text']}


Please provide your answer in the following JSON format:

{{
  "Relevance": "Highly Relevant", "Somewhat Relevant", or "Not Relevant",
  "Evidence": "The exact phrases or sentences from the document that support your assessment; otherwise, leave blank."
}}

Note: Only provide the JSON response without any additional text.
"""
    # Run the prompt through the model
    response = llm.invoke(input=prompt)

    # Parse the response
    try:
        # Extract the Relevance and Evidence fields
        relevance, evidence_list = extract_fields(response)
        # Join multiple evidence items into a single string
        evidence = '; '.join(evidence_list)
    except Exception as e:
        relevance = "Parsing Error"
        evidence = ""

    # Combine relevance and evidence
    if relevance in ["Highly Relevant", "Somewhat Relevant"] and evidence:
        combined_answer = f"{relevance}: {evidence}"
    elif relevance in ["Highly Relevant", "Somewhat Relevant"]:
        combined_answer = relevance
    elif relevance == "Not Relevant":
        combined_answer = "Not Relevant"
    else:
        combined_answer = "Parsing Error"

    # For debugging
    if combined_answer == "Parsing Error":
        print("Parsing Error encountered. Response was:")
        print(response)

    return combined_answer

**Run the LLM Processing**

In [None]:
import time

# Initialize counter for new rows processed
new_rows_processed = 0

# Iterate over each row in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing Rows"):
    row_processed = False  # Flag to check if we processed any new data in this row

    for question_dict in all_question_dicts:
        for column_name, question in question_dict.items():
            # Check if the answer column is already filled
            if pd.notnull(df.at[index, column_name]) and df.at[index, column_name] != "":
                # Skip processing this row for this question
                continue
            combined_answer = analyze_prospectus_row_single_question(row, question)
            df.at[index, column_name] = combined_answer
            row_processed = True  # We processed new data in this row

    if row_processed:
        new_rows_processed += 1

    # Save progress every 50 rows
    if (index + 1) % 50 == 0:
        df.to_csv(processed_file_path, index=False)
        # print(f"Progress saved at row {index + 1}")

    # After processing 10 new rows, sleep for 30 seconds
    if new_rows_processed >= 10:
        df.to_csv(processed_file_path, index=False)  # Save before sleeping
        print(f"Processed 10 new rows. Pausing for 30 seconds.")
        # time.sleep(30)
        new_rows_processed = 0  # Reset counter

# Save the final DataFrame after processing all rows
df.to_csv(processed_file_path, index=False)
print("All rows have been processed and saved.")

Processing Rows:  67%|██████▋   | 5318/7952 [03:38<4:15:28,  5.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5328/7952 [07:06<11:48:16, 16.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5338/7952 [10:57<18:54:56, 26.05s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5348/7952 [14:38<13:31:24, 18.70s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  67%|██████▋   | 5358/7952 [18:24<11:40:35, 16.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5368/7952 [21:18<12:01:54, 16.76s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5378/7952 [25:12<14:27:47, 20.23s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5388/7952 [29:17<13:47:04, 19.35s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5398/7952 [33:49<18:29:38, 26.07s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5408/7952 [37:12<11:18:02, 15.99s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5418/7952 [40:52<10:24:19, 14.78s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5428/7952 [46:38<26:59:31, 38.50s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  68%|██████▊   | 5438/7952 [50:53<16:37:53, 23.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▊   | 5448/7952 [55:11<16:24:44, 23.60s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▊   | 5458/7952 [58:47<15:29:42, 22.37s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5468/7952 [1:02:33<12:02:53, 17.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5478/7952 [1:05:46<12:29:56, 18.19s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5488/7952 [1:10:00<13:00:36, 19.01s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5498/7952 [1:13:08<9:30:39, 13.95s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5508/7952 [1:17:50<18:28:23, 27.21s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  69%|██████▉   | 5518/7952 [1:20:35<9:44:09, 14.40s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|██████▉   | 5528/7952 [1:24:47<20:17:24, 30.13s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|██████▉   | 5538/7952 [1:28:52<14:11:25, 21.16s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|██████▉   | 5548/7952 [1:32:09<11:01:51, 16.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|██████▉   | 5558/7952 [1:37:51<21:26:55, 32.25s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|███████   | 5568/7952 [1:41:47<12:50:05, 19.38s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|███████   | 5578/7952 [1:45:26<10:57:41, 16.62s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|███████   | 5588/7952 [1:48:31<9:57:02, 15.15s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  70%|███████   | 5598/7952 [1:51:49<10:45:55, 16.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5608/7952 [1:56:12<18:10:50, 27.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5618/7952 [2:01:15<16:19:48, 25.19s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5628/7952 [2:06:03<15:02:58, 23.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5638/7952 [2:09:34<13:28:38, 20.97s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5648/7952 [2:13:38<13:36:02, 21.25s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████   | 5658/7952 [2:17:03<10:26:46, 16.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████▏  | 5668/7952 [2:20:22<9:35:40, 15.12s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  71%|███████▏  | 5678/7952 [2:23:24<9:51:52, 15.62s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5688/7952 [2:25:46<7:42:12, 12.25s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5698/7952 [2:30:03<10:21:38, 16.55s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5708/7952 [2:32:54<7:02:22, 11.29s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5718/7952 [2:36:07<9:55:41, 16.00s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5728/7952 [2:40:43<19:05:09, 30.89s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5738/7952 [2:43:51<8:46:08, 14.26s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5748/7952 [2:48:34<18:27:03, 30.14s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  72%|███████▏  | 5758/7952 [2:51:26<8:40:32, 14.24s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5768/7952 [2:54:54<8:43:14, 14.37s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5778/7952 [2:58:06<13:33:32, 22.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5788/7952 [3:01:36<8:56:42, 14.88s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5798/7952 [3:04:13<9:42:23, 16.22s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5808/7952 [3:07:38<11:09:54, 18.75s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5818/7952 [3:11:56<13:15:03, 22.35s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5828/7952 [3:16:03<18:20:57, 31.10s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  73%|███████▎  | 5838/7952 [3:19:28<9:45:57, 16.63s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▎  | 5848/7952 [3:22:24<10:15:46, 17.56s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▎  | 5858/7952 [3:25:34<10:22:40, 17.84s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5868/7952 [3:28:12<7:26:30, 12.86s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5878/7952 [3:32:15<11:25:53, 19.84s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5888/7952 [3:35:49<11:36:05, 20.24s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5898/7952 [3:39:58<15:29:49, 27.16s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5908/7952 [3:43:47<12:15:54, 21.60s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  74%|███████▍  | 5918/7952 [3:47:04<9:19:45, 16.51s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▍  | 5928/7952 [3:50:04<9:04:13, 16.13s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▍  | 5938/7952 [3:53:10<8:23:29, 15.00s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▍  | 5948/7952 [3:55:59<9:41:13, 17.40s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▍  | 5958/7952 [3:59:18<8:51:52, 16.00s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▌  | 5968/7952 [4:03:24<12:30:10, 22.69s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▌  | 5978/7952 [4:06:28<10:29:45, 19.14s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▌  | 5988/7952 [4:09:28<8:50:14, 16.20s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  75%|███████▌  | 5998/7952 [4:12:01<6:42:48, 12.37s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6008/7952 [4:15:29<9:42:13, 17.97s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6018/7952 [4:18:03<6:56:10, 12.91s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6028/7952 [4:20:25<6:13:41, 11.65s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6038/7952 [4:23:46<9:37:57, 18.12s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6043/7952 [4:25:34<8:58:10, 16.91s/it] 

Parsing Error encountered. Response was:
{
  "Relevance": "Very Highly Relevant",
  "Evidence": "English insolvency law may affect transactions entered into or payments made by any of the Guarantors during the period prior to its liquidation or administration. In addition, if it can be shown that a transaction entered into by an English company was made for less than fair value and was made to shield assets from creditors, then the transaction may be set aside under English insolvency law as a transaction defrauding creditors."
}


Processing Rows:  76%|███████▌  | 6048/7952 [4:27:59<12:04:24, 22.83s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▌  | 6058/7952 [4:31:52<9:18:51, 17.70s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▋  | 6068/7952 [4:35:02<8:28:25, 16.19s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  76%|███████▋  | 6078/7952 [4:38:21<9:53:24, 19.00s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6088/7952 [4:41:18<7:38:39, 14.76s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6098/7952 [4:44:59<9:25:53, 18.31s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6108/7952 [4:48:35<9:50:51, 19.23s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6118/7952 [4:52:27<11:51:18, 23.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6128/7952 [4:55:24<7:42:14, 15.21s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6138/7952 [4:58:08<7:19:40, 14.54s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6148/7952 [5:00:51<6:02:48, 12.07s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  77%|███████▋  | 6158/7952 [5:04:46<11:32:21, 23.16s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6168/7952 [5:08:25<8:51:08, 17.86s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6178/7952 [5:11:43<8:33:55, 17.38s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6188/7952 [5:15:05<7:39:52, 15.64s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6198/7952 [5:18:27<12:18:35, 25.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6208/7952 [5:23:00<12:52:30, 26.58s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6218/7952 [5:27:21<9:59:11, 20.73s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6228/7952 [5:32:16<14:19:35, 29.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  78%|███████▊  | 6238/7952 [5:35:53<9:24:03, 19.75s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▊  | 6248/7952 [5:39:50<9:12:32, 19.46s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▊  | 6258/7952 [5:42:54<8:18:30, 17.66s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6268/7952 [5:46:20<8:26:00, 18.03s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6278/7952 [5:50:13<8:11:57, 17.63s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6288/7952 [5:53:48<11:58:26, 25.91s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6298/7952 [5:57:57<9:06:16, 19.82s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6308/7952 [6:00:55<6:56:05, 15.19s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  79%|███████▉  | 6318/7952 [6:04:33<8:42:37, 19.19s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|███████▉  | 6328/7952 [6:08:16<7:34:34, 16.79s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|███████▉  | 6338/7952 [6:11:55<7:49:06, 17.44s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|███████▉  | 6348/7952 [6:16:06<9:44:50, 21.88s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|███████▉  | 6358/7952 [6:20:04<8:10:56, 18.48s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|████████  | 6368/7952 [6:24:08<8:16:28, 18.81s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|████████  | 6378/7952 [6:26:57<7:27:00, 17.04s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|████████  | 6388/7952 [6:30:28<8:07:18, 18.69s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  80%|████████  | 6398/7952 [6:33:40<9:08:47, 21.19s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6408/7952 [6:37:47<8:16:59, 19.31s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6418/7952 [6:41:00<7:02:36, 16.53s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6428/7952 [6:44:27<8:02:19, 18.99s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6438/7952 [6:48:38<8:09:08, 19.38s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6448/7952 [6:51:41<6:13:17, 14.89s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████  | 6458/7952 [6:54:53<7:33:43, 18.22s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████▏ | 6468/7952 [6:58:18<5:36:39, 13.61s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  81%|████████▏ | 6478/7952 [7:02:24<7:40:41, 18.75s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6488/7952 [7:05:34<6:39:40, 16.38s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6498/7952 [7:07:56<4:46:05, 11.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6508/7952 [7:10:54<6:19:45, 15.78s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6518/7952 [7:14:38<8:12:11, 20.59s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6528/7952 [7:17:13<4:38:43, 11.74s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6538/7952 [7:20:45<6:56:03, 17.65s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6548/7952 [7:23:35<5:18:02, 13.59s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  82%|████████▏ | 6558/7952 [7:26:22<5:39:32, 14.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6568/7952 [7:29:40<6:10:08, 16.05s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6578/7952 [7:32:36<5:08:52, 13.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6588/7952 [7:35:11<4:40:33, 12.34s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6598/7952 [7:39:02<7:22:33, 19.61s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6608/7952 [7:41:52<5:20:43, 14.32s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6618/7952 [7:45:48<6:26:47, 17.40s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6628/7952 [7:49:41<8:13:57, 22.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  83%|████████▎ | 6638/7952 [7:54:00<11:05:47, 30.40s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▎ | 6648/7952 [7:57:34<5:20:31, 14.75s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▎ | 6658/7952 [8:00:50<6:12:43, 17.28s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6668/7952 [8:03:50<4:31:46, 12.70s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6673/7952 [8:05:52<7:50:40, 22.08s/it]

Parsing Error encountered. Response was:
{"Relevance": "Highely Relevant", "Evidence": "If we are unable to maintain or increase our customer base or engagement, or effectively monetize our customer base\u2019s use of our products and product offerings, our revenue may be adversely affected. If our AMP growth rate slows, we become increasingly dependent on our ability to maintain or increase levels of customer engagement and monetization in order to drive revenue growth.\n\n- we fail to effectively anticipate or respond to customers' continuously changing and dynamic needs, demands and preferences, such as new casino games or poker variants, or innovative types of sports betting or betting related to new or popular sporting events, as well as emerging technological trends, or where our competitors more effectively anticipate or respond to the same."}


Processing Rows:  84%|████████▍ | 6678/7952 [8:07:38<7:01:19, 19.84s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6688/7952 [8:11:17<5:37:16, 16.01s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6698/7952 [8:16:14<10:03:04, 28.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6708/7952 [8:21:31<13:38:08, 39.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  84%|████████▍ | 6718/7952 [8:25:46<6:16:29, 18.31s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▍ | 6728/7952 [8:31:37<10:54:47, 32.10s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▍ | 6738/7952 [8:35:05<5:56:46, 17.63s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▍ | 6748/7952 [8:38:55<5:41:42, 17.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▍ | 6758/7952 [8:42:48<6:19:32, 19.07s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▌ | 6768/7952 [8:47:42<14:42:05, 44.70s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▌ | 6778/7952 [8:53:25<9:54:07, 30.36s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▌ | 6788/7952 [8:58:36<7:42:12, 23.83s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  85%|████████▌ | 6798/7952 [9:03:22<6:58:02, 21.74s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6808/7952 [9:08:17<8:34:41, 26.99s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6818/7952 [9:12:45<7:48:24, 24.78s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6828/7952 [9:16:40<5:41:30, 18.23s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6838/7952 [9:20:08<6:00:51, 19.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6848/7952 [9:23:00<4:04:52, 13.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▌ | 6858/7952 [9:26:55<5:23:12, 17.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▋ | 6868/7952 [9:34:03<9:56:44, 33.03s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  86%|████████▋ | 6878/7952 [9:38:44<6:40:25, 22.37s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6888/7952 [9:43:02<6:36:17, 22.35s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6898/7952 [9:47:39<6:59:22, 23.87s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6908/7952 [9:51:56<7:30:37, 25.90s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6918/7952 [9:56:16<5:01:21, 17.49s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6928/7952 [10:00:05<5:38:55, 19.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6938/7952 [10:04:07<6:50:41, 24.30s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  87%|████████▋ | 6948/7952 [10:07:19<6:15:56, 22.47s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 6958/7952 [10:10:41<4:44:34, 17.18s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 6968/7952 [10:14:56<6:14:58, 22.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 6978/7952 [10:19:05<7:00:00, 25.87s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 6988/7952 [10:23:31<6:28:34, 24.19s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 6998/7952 [10:27:34<7:20:50, 27.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 7008/7952 [10:32:31<6:50:56, 26.12s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 7018/7952 [10:36:20<5:24:08, 20.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  88%|████████▊ | 7028/7952 [10:41:03<7:37:35, 29.71s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▊ | 7038/7952 [10:44:12<3:39:10, 14.39s/it] 

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▊ | 7048/7952 [10:48:01<4:43:23, 18.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7058/7952 [10:51:23<4:21:48, 17.57s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7068/7952 [10:54:20<3:21:09, 13.65s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7078/7952 [10:57:38<5:07:47, 21.13s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7088/7952 [11:01:10<5:22:38, 22.41s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7098/7952 [11:03:58<3:06:07, 13.08s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  89%|████████▉ | 7108/7952 [11:06:40<2:55:09, 12.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|████████▉ | 7118/7952 [11:10:22<4:12:35, 18.17s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|████████▉ | 7128/7952 [11:14:29<3:44:48, 16.37s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|████████▉ | 7130/7952 [11:15:35<5:13:19, 22.87s/it]

Parsing Error encountered. Response was:
{"Relevance": "Highely Relevant", "Evidence": "You should consider carefully the risks and uncertainties described below, as well as under “Risk Factors” in Item 1A of the 2019 Annual Report and Item 1A of the Q2 Quarterly Report,"}


Processing Rows:  90%|████████▉ | 7138/7952 [11:17:41<3:29:14, 15.42s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|████████▉ | 7148/7952 [11:20:49<3:38:03, 16.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|█████████ | 7158/7952 [11:24:01<3:06:14, 14.07s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|█████████ | 7168/7952 [11:27:34<3:06:46, 14.29s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|█████████ | 7178/7952 [11:30:46<3:10:46, 14.79s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  90%|█████████ | 7188/7952 [11:34:40<5:42:26, 26.89s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7198/7952 [11:38:40<3:42:51, 17.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7208/7952 [11:41:56<3:57:04, 19.12s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7218/7952 [11:44:50<3:07:56, 15.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7228/7952 [11:48:03<2:49:20, 14.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7238/7952 [11:51:05<3:45:09, 18.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████ | 7248/7952 [11:54:35<3:05:03, 15.77s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████▏| 7258/7952 [11:58:17<4:18:17, 22.33s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████▏| 7268/7952 [12:01:25<2:51:21, 15.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  91%|█████████▏| 7273/7952 [12:03:23<4:04:03, 21.57s/it]

Parsing Error encountered. Response was:
{
  "Relevance": "Very Relevant",
  "Evidence": "Schemes of arrangement, examinership, liquidation, dividend, secured and unsecured creditors"
}


Processing Rows:  91%|█████████▏| 7274/7952 [12:04:20<6:04:17, 32.24s/it]

Parsing Error encountered. Response was:
{
  "Relevance": "Very Relevant",
  "Evidence": "Schemes of arrangement, Examinership, Trustee, Collateral Trustee, Notes, Guarantees, Security documents, Section 604 of the Companies Act, Section 608 of the Companies Act"
}


Processing Rows:  92%|█████████▏| 7278/7952 [12:05:05<3:05:35, 16.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  92%|█████████▏| 7288/7952 [12:08:44<2:33:54, 13.91s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  92%|█████████▏| 7298/7952 [12:13:15<4:39:36, 25.65s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  92%|█████████▏| 7308/7952 [12:16:43<2:47:43, 15.63s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  92%|█████████▏| 7318/7952 [12:20:22<3:23:42, 19.28s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  92%|█████████▏| 7323/7952 [12:22:25<1:03:46,  6.08s/it]


KeyboardInterrupt: 