**In this Notebook we extract the remaining SubScore Labels**

In [None]:
from langchain_ollama import OllamaLLM
from tqdm import tqdm

import json
import numpy as np
import os
import pandas as pd
import re

**Code to extract unique score combinations**

In [None]:
!pip install --upgrade numpy

In [None]:
rms_with_fundamental_score = pd.read_pickle('./rms_with_fundamental_score.pkl')
rms_with_fundamental_score.head(1)

In [None]:
unique_score_combinations = rms_with_fundamental_score[['CategoryGroup', 'Category', 'TaggedCharacteristics']].drop_duplicates()
# Replace all types of newlines and excessive whitespace in TaggedCharacteristics
unique_score_combinations['TaggedCharacteristics'] = unique_score_combinations['TaggedCharacteristics'].str.replace(r'[\r\n]+', ' ', regex=True)

# Function to expand TaggedCharacteristics if it's a JSON string with multiple items
def expand_tagged_characteristics(row):
    try:
        characteristics = json.loads(row['TaggedCharacteristics'])
        if isinstance(characteristics, list):
            # Replace newlines within each CharacteristicText
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': char['CharacteristicText'].replace('\r', ' ').replace('\n', ' '),
                'CharacteristicInfluence': char.get('CharacteristicInfluence', None)  # Handle missing keys
            } for char in characteristics])
        else:
            # If it's a single item or not a list, replace newlines if it's a string
            if isinstance(characteristics, str):
                characteristics = characteristics.replace('\r', ' ').replace('\n', ' ')
            return pd.DataFrame([{
                'CategoryGroup': row['CategoryGroup'],
                'Category': row['Category'],
                'TaggedCharacteristics': characteristics,
                'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
            }])
    except (json.JSONDecodeError, TypeError):
        # If parsing fails, replace newlines in the original TaggedCharacteristics
        cleaned_text = row['TaggedCharacteristics'].replace('\r', ' ').replace('\n', ' ')
        return pd.DataFrame([{
            'CategoryGroup': row['CategoryGroup'],
            'Category': row['Category'],
            'TaggedCharacteristics': cleaned_text,
            'CharacteristicInfluence': row.get('CharacteristicInfluence', None)
        }])

# Applying the function to each row and combining results
expanded_unique_score_combinations = pd.concat(
    unique_score_combinations.apply(expand_tagged_characteristics, axis=1).to_list(),
    ignore_index=True
)

# Sort, drop duplicates, and save to CSV
unique_score_combinations = expanded_unique_score_combinations.sort_values(by=['CategoryGroup', 'Category', 'CharacteristicInfluence']).drop_duplicates()
unique_score_combinations.to_csv('unique_score_combinations.csv', index=False)

**Defining the questions and instantiating the LLM**

In [11]:
# Define the questions corresponding to each column
questions_market_dynamics = {
    "Market Dynamics - a": "Does the text mention that the company is exposed to risks associated with cyclical products?",
    "Market Dynamics - b": "Does the text mention risks related to demographic or structural trends affecting the market?",
    "Market Dynamics - c": "Does the text mention risks due to seasonal volatility in the industry?"
}
questions_intra_industry_competition = {
    "Intra-Industry Competition - a": "Does the text mention that market pricing for the company's products or services is irrational or not based on fundamental factors?",
    "Intra-Industry Competition - b": "Does the text mention that the market is highly fragmented with no clear leader or that there is only one dominant leader?",
    "Intra-Industry Competition - c": "Does the text mention low barriers to entry in the industry, making it easy for new competitors to enter the market?"
}
questions_regulatory_framework = {
    "Regulatory Framework - a": "Does the text mention that the industry is subject to a high degree of regulatory scrutiny?",
    "Regulatory Framework - b": "Does the text mention a high dependency on regulation or being a beneficiary from regulation in an unstable regulatory environment?"
}
questions_technology_risk = {
    "Technology Risk - a": "Does the text mention that the industry is susceptible to rapid technological advances or innovations?",
    "Technology Risk - b": "Does the text mention that the company is perceived as a disruptor or is threatened by emerging technological changes?"
}

all_question_dicts = [
    questions_market_dynamics,
    questions_intra_industry_competition,
    questions_regulatory_framework,
    questions_technology_risk
]

# Original questions
questions_market_dynamics_original = {
    "Market Dynamics - a": "Exposure to cyclical products",
    "Market Dynamics - b": "Impact of demographic and structural trends",
    "Market Dynamics - c": "Seasonal industry volatility"
}
questions_intra_industry_competition_original = {
    "Intra-Industry Competition - a": "Market pricing has not shown to be rational",
    "Intra-Industry Competition - b": "Highly fragmented market with no clear leader or only one leader",
    "Intra-Industry Competition - c": "Low barriers to entry"
}
questions_regulatory_framework_original = {
    "Regulatory Framework - a": "Industry has high degree of regulatory scrutiny",
    "Regulatory Framework - b": "High dependency on regulation or is a beneficiary from regulation in an unstable regulatory environment"
}
questions_technology_risk_original = {
    "Technology Risk - a": "Industry susceptibility to technological advances",
    "Technology Risk - b": "Company viewed as a disruptee/threatened by technological change"
}


In [12]:
# Initialize the language model
llm = OllamaLLM(model="llama3.2")

# Check if the processed file exists; if not, process the raw data
processed_file_path = '../data/prospectuses_data_processed.csv'
raw_file_path = '../data/prospectuses_data.csv'

# Check if processed file exists
if os.path.exists(processed_file_path):
    df = pd.read_csv(processed_file_path)
else:
    print("Processed file not found. Processing raw data...")
    df = pd.read_csv(raw_file_path)
    # Filter out rows that have "failed parsing" in the Section ID column
    df = df[df['Section ID'] != "failed parsing"]

# Ensure the relevance and evidence columns are created with a compatible data type
for question_dict in all_question_dicts:
    # Iterate through each question key in the current dictionary
    for column_name in question_dict.keys():
        if column_name in df.columns:
            df[column_name] = df[column_name].astype('string')
        else:
            df[column_name] = ""

df.head(2)
# print(df.shape)

Unnamed: 0,Prospectus ID,Original Filename,Section ID,Section Title,Subsection ID,Subsection Title,Subsubsection ID,Subsubsection Title,Subsubsection Text,Market Dynamics - a,Market Dynamics - b,Market Dynamics - c,Parsing Error,Intra-Industry Competition - a,Intra-Industry Competition - b,Intra-Industry Competition - c,Regulatory Framework - a,Regulatory Framework - b,Technology Risk - a,Technology Risk - b
0,235,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,,1.1.1,,_An investment in the Notes involves a high de...,Not Relevant,Highly Relevant: the risks described below,Not Relevant,,Not Relevant,Not Relevant,Not Relevant,Highly Relevant: Subsubsection Title: ... and ...,Highly Relevant,Not Relevant,Not Relevant
1,16,Final Offerings 2020.pdf,1,RISK FACTORS,1.1,"Risks Relating to the Group’s Business, Techno...",1.1.1,The Group faces significant competition in eac...,The French telecommunications market is a matu...,Highly Relevant: Various evidence throughout t...,Highly Relevant,Highly Relevant,,Highly Relevant,Highly Relevant: ...the Group also competes wi...,Highly Relevant: The exact phrases or sentence...,Highly Relevant: Several evidence are presente...,Highly Relevant,Highly Relevant: This is a highly relevant ans...,Highly Relevant: The Group also faces competit...


In [13]:
def extract_fields(response):
    # Remove any newlines and extra spaces
    response = ' '.join(response.strip().split())

    # Extract the Relevance field
    relevance_match = re.search(r'"Relevance"\s*:\s*"([^"]+)"', response)
    if relevance_match:
        relevance = relevance_match.group(1).strip()
    else:
        relevance = "Parsing Error"

    # Extract the Evidence field(s)
    evidence_match = re.search(r'"Evidence"\s*:\s*(.+?)(?:,?\s*"[^"]+"\s*:|\s*}$)', response)
    if evidence_match:
        evidence_str = evidence_match.group(1).strip()
        # Remove any trailing commas or braces
        evidence_str = evidence_str.rstrip(', }')
        # Split the evidence_str into individual evidence items
        # Evidence items are strings enclosed in double quotes
        evidence_items = re.findall(r'"([^"]+)"', evidence_str)
        evidence = evidence_items
    else:
        evidence = []

    return relevance, evidence


def analyze_prospectus_row_single_question(row, question):
    # System and user prompts
    system_prompt = "You are an expert in analyzing bond prospectuses and identifying specific risk factors."

    # Format the user prompt using the row's data
    prompt = f"""
{system_prompt}

For the following question and text, judge whether the text is "Highly Relevant", "Somewhat Relevant", or "Not Relevant".

Question:
{question}

Text:
Subsubsection Title: {row['Subsubsection Title']}
Subsubsection Text: {row['Subsubsection Text']}


Please provide your answer in the following JSON format:

{{
  "Relevance": "Highly Relevant", "Somewhat Relevant", or "Not Relevant",
  "Evidence": "The exact phrases or sentences from the document that support your assessment; otherwise, leave blank."
}}

Note: Only provide the JSON response without any additional text.
"""
    # Run the prompt through the model
    response = llm.invoke(input=prompt)

    # Parse the response
    try:
        # Extract the Relevance and Evidence fields
        relevance, evidence_list = extract_fields(response)
        # Join multiple evidence items into a single string
        evidence = '; '.join(evidence_list)
    except Exception as e:
        relevance = "Parsing Error"
        evidence = ""

    # Combine relevance and evidence
    if relevance in ["Highly Relevant", "Somewhat Relevant"] and evidence:
        combined_answer = f"{relevance}: {evidence}"
    elif relevance in ["Highly Relevant", "Somewhat Relevant"]:
        combined_answer = relevance
    elif relevance == "Not Relevant":
        combined_answer = "Not Relevant"
    else:
        combined_answer = "Parsing Error"

    # For debugging
    if combined_answer == "Parsing Error":
        print("Parsing Error encountered. Response was:")
        print(response)

    return combined_answer

**Run the LLM Processing**

In [17]:
import time

# Initialize counter for new rows processed
new_rows_processed = 0

# Iterate over each row in the DataFrame with a progress bar
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing Rows"):
    row_processed = False  # Flag to check if we processed any new data in this row

    for question_dict in all_question_dicts:
        for column_name, question in question_dict.items():
            # Check if the answer column is already filled
            if pd.notnull(df.at[index, column_name]) and df.at[index, column_name] != "":
                # Skip processing this row for this question
                continue
            combined_answer = analyze_prospectus_row_single_question(row, question)
            df.at[index, column_name] = combined_answer
            row_processed = True  # We processed new data in this row

    if row_processed:
        new_rows_processed += 1

    # Save progress every 50 rows
    if (index + 1) % 50 == 0:
        df.to_csv(processed_file_path, index=False)
        # print(f"Progress saved at row {index + 1}")

    # After processing 10 new rows, sleep for 30 seconds
    if new_rows_processed >= 10:
        df.to_csv(processed_file_path, index=False)  # Save before sleeping
        print(f"Processed 10 new rows. Pausing for 30 seconds.")
        time.sleep(30)
        new_rows_processed = 0  # Reset counter


# Save the final DataFrame after processing all rows
df.to_csv(processed_file_path, index=False)
print("All rows have been processed and saved.")

Processing Rows:  10%|▉         | 760/7952 [03:22<12:14:19,  6.13s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|▉         | 770/7952 [06:34<27:05:56, 13.58s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|▉         | 780/7952 [10:17<37:40:45, 18.91s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|▉         | 790/7952 [13:45<39:32:24, 19.87s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|█         | 800/7952 [17:36<35:24:58, 17.83s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|█         | 810/7952 [20:11<28:40:52, 14.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|█         | 820/7952 [23:37<32:52:51, 16.60s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  10%|█         | 830/7952 [26:25<31:26:50, 15.90s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 840/7952 [29:24<29:52:05, 15.12s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 850/7952 [32:15<25:43:28, 13.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 860/7952 [34:41<28:04:20, 14.25s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 870/7952 [37:26<28:31:41, 14.50s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 880/7952 [40:02<30:19:17, 15.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█         | 890/7952 [42:36<25:00:31, 12.75s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█▏        | 900/7952 [46:52<44:35:20, 22.76s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  11%|█▏        | 910/7952 [49:39<24:28:47, 12.51s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 920/7952 [52:35<35:41:34, 18.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 930/7952 [55:39<29:58:07, 15.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 940/7952 [59:28<37:25:35, 19.22s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 950/7952 [1:02:53<32:09:03, 16.53s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 960/7952 [1:07:15<68:18:23, 35.17s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 970/7952 [1:11:44<55:33:04, 28.64s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 980/7952 [1:15:44<30:16:38, 15.63s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  12%|█▏        | 990/7952 [1:20:23<49:26:48, 25.57s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1000/7952 [1:23:24<33:06:04, 17.14s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1010/7952 [1:27:59<50:44:44, 26.32s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1020/7952 [1:31:43<33:53:38, 17.60s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1030/7952 [1:35:53<36:45:03, 19.11s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1040/7952 [1:39:37<46:47:00, 24.37s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1050/7952 [1:45:08<50:47:07, 26.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1060/7952 [1:49:03<45:22:57, 23.71s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  13%|█▎        | 1070/7952 [1:53:36<39:54:36, 20.88s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▎        | 1080/7952 [1:57:39<46:38:32, 24.43s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▎        | 1090/7952 [2:01:47<39:57:13, 20.96s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1100/7952 [2:06:08<37:02:09, 19.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1110/7952 [2:09:45<52:24:01, 27.57s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1120/7952 [2:14:42<50:42:02, 26.72s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1130/7952 [2:19:02<42:24:50, 22.38s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1140/7952 [2:23:55<52:03:29, 27.51s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  14%|█▍        | 1150/7952 [2:27:28<38:12:20, 20.22s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▍        | 1160/7952 [2:31:58<55:21:22, 29.34s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▍        | 1170/7952 [2:36:02<43:07:35, 22.89s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▍        | 1180/7952 [2:39:01<25:21:59, 13.48s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▍        | 1190/7952 [2:42:33<47:28:21, 25.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▌        | 1200/7952 [2:46:53<41:34:22, 22.17s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▌        | 1210/7952 [2:50:42<34:14:57, 18.29s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▌        | 1220/7952 [2:54:59<39:28:44, 21.11s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  15%|█▌        | 1230/7952 [2:59:43<44:54:05, 24.05s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1240/7952 [3:03:28<36:28:37, 19.56s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1250/7952 [3:06:04<23:23:26, 12.56s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1260/7952 [3:09:45<30:39:06, 16.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1270/7952 [3:12:40<24:55:14, 13.43s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1280/7952 [3:15:18<27:20:07, 14.75s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▌        | 1290/7952 [3:18:14<26:38:32, 14.40s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▋        | 1300/7952 [3:21:15<27:50:18, 15.07s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  16%|█▋        | 1310/7952 [3:23:59<20:23:24, 11.05s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1320/7952 [3:26:56<29:47:50, 16.17s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1330/7952 [3:29:34<28:43:09, 15.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1340/7952 [3:32:14<25:02:35, 13.64s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1350/7952 [3:35:14<29:14:20, 15.94s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1360/7952 [3:38:40<36:37:42, 20.00s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1370/7952 [3:43:34<39:02:15, 21.35s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1380/7952 [3:47:15<35:08:43, 19.25s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  17%|█▋        | 1390/7952 [3:51:15<35:50:46, 19.67s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1400/7952 [3:54:34<32:28:46, 17.85s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1410/7952 [3:57:46<27:19:41, 15.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1420/7952 [4:01:31<43:00:04, 23.70s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1430/7952 [4:05:51<38:00:01, 20.98s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1440/7952 [4:09:05<23:53:26, 13.21s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1450/7952 [4:12:19<25:21:38, 14.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1460/7952 [4:14:48<22:28:35, 12.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  18%|█▊        | 1470/7952 [4:17:43<35:01:02, 19.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▊        | 1480/7952 [4:20:19<24:27:33, 13.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▊        | 1486/7952 [4:23:56<62:06:54, 34.58s/it]

Parsing Error encountered. Response was:
{
  "Relevance": "Very Relevant",
  "Evidence": "Increased uncertainty exists on our ability to enforce collection terms on business to consumer secured loans. In November 2020, the EU adopted a directive on representative actions (Directive 2020/1828) to facilitate class actions both for injunctions and compensations, replacing the so-called \"injunctions directive\" (Directive 2009/22/EC), which could also entail higher levels of litigation and in turn jeopardize our collection ability."
}
Parsing Error encountered. Response was:
{
  "Relevance": "Very Relevant",
  "Evidence": "consumer credit market, installment plans, debt collection services, licensing requirements, NPL Directive, insolvency or debt reorganization proceedings"
}


Processing Rows:  19%|█▊        | 1490/7952 [4:26:09<53:16:53, 29.68s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1500/7952 [4:30:25<34:56:24, 19.50s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1510/7952 [4:34:07<30:29:27, 17.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1520/7952 [4:37:31<28:46:04, 16.10s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1530/7952 [4:41:27<38:38:47, 21.66s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1540/7952 [4:44:17<25:51:20, 14.52s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  19%|█▉        | 1550/7952 [4:47:39<33:23:37, 18.78s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|█▉        | 1560/7952 [4:52:22<44:11:49, 24.89s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|█▉        | 1570/7952 [4:55:59<38:06:45, 21.50s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|█▉        | 1580/7952 [4:59:20<30:42:10, 17.35s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|█▉        | 1590/7952 [5:02:36<27:35:07, 15.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|██        | 1600/7952 [5:05:52<34:28:03, 19.53s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|██        | 1610/7952 [5:09:29<30:44:21, 17.45s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|██        | 1620/7952 [5:12:16<23:16:29, 13.23s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  20%|██        | 1630/7952 [5:16:52<41:32:58, 23.66s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██        | 1640/7952 [5:20:51<36:14:56, 20.67s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██        | 1650/7952 [5:25:02<44:30:29, 25.43s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██        | 1660/7952 [5:28:46<36:15:05, 20.74s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██        | 1670/7952 [5:33:40<50:22:13, 28.87s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██        | 1680/7952 [5:38:46<51:42:42, 29.68s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██▏       | 1690/7952 [5:43:59<57:04:17, 32.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  21%|██▏       | 1700/7952 [5:47:10<26:06:40, 15.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1710/7952 [5:50:37<36:13:59, 20.90s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1720/7952 [5:55:39<41:18:36, 23.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1730/7952 [5:58:53<28:11:50, 16.31s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1740/7952 [6:01:57<27:19:26, 15.83s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1750/7952 [6:06:14<36:56:40, 21.44s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1760/7952 [6:09:28<29:29:58, 17.15s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1770/7952 [6:12:42<27:32:05, 16.03s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  22%|██▏       | 1776/7952 [6:15:41<51:09:22, 29.82s/it]

Parsing Error encountered. Response was:
{"Relevance": "Very Relevant", "Evidence": "increasing competition from non-traditional mobile voice and data services based on new mobile VoIP, particularly over the top services (“OTT services”), such as Facebook Messenger, FaceTime, Google Talk, Skype, Snapchat, Viber and WhatsApp."}
Parsing Error encountered. Response was:
{
  "Relevance": "Very High",
  "Evidence": "All of the above"
}


Processing Rows:  22%|██▏       | 1780/7952 [6:19:11<80:19:09, 46.85s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1790/7952 [6:23:37<42:15:53, 24.69s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1800/7952 [6:27:47<35:29:51, 20.77s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1810/7952 [6:31:57<34:57:00, 20.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1820/7952 [6:35:39<32:17:45, 18.96s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1830/7952 [6:40:06<33:41:08, 19.81s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1840/7952 [6:43:59<42:03:19, 24.77s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1850/7952 [6:49:08<37:45:43, 22.28s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  23%|██▎       | 1860/7952 [6:52:40<31:49:02, 18.80s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▎       | 1870/7952 [6:55:50<24:22:08, 14.42s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▎       | 1880/7952 [6:58:28<20:53:24, 12.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1890/7952 [7:01:02<21:57:32, 13.04s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1900/7952 [7:05:02<25:37:08, 15.24s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1910/7952 [7:08:29<28:00:26, 16.69s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1920/7952 [7:11:28<29:05:02, 17.36s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1930/7952 [7:15:23<52:03:58, 31.13s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  24%|██▍       | 1940/7952 [7:18:25<31:48:27, 19.05s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▍       | 1950/7952 [7:23:13<46:16:07, 27.75s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▍       | 1960/7952 [7:26:45<26:26:58, 15.89s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▍       | 1970/7952 [7:30:29<33:30:01, 20.16s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▍       | 1980/7952 [7:34:22<32:56:46, 19.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▌       | 1990/7952 [7:38:44<37:11:26, 22.46s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▌       | 2000/7952 [7:42:42<32:13:40, 19.49s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▌       | 2010/7952 [7:46:29<32:56:57, 19.96s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  25%|██▌       | 2020/7952 [7:50:43<42:33:45, 25.83s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2030/7952 [7:54:14<27:30:11, 16.72s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2040/7952 [7:57:39<34:02:49, 20.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2050/7952 [8:01:15<24:22:33, 14.87s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2060/7952 [8:04:18<24:40:33, 15.08s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2070/7952 [8:07:55<40:02:29, 24.51s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▌       | 2080/7952 [8:11:39<29:52:47, 18.32s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▋       | 2090/7952 [8:14:57<25:36:48, 15.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  26%|██▋       | 2100/7952 [8:19:17<32:39:40, 20.09s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2110/7952 [8:22:02<20:57:41, 12.92s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2120/7952 [8:27:25<44:43:30, 27.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2130/7952 [8:32:04<38:53:25, 24.05s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2140/7952 [8:35:17<25:09:23, 15.58s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2150/7952 [8:39:21<32:00:33, 19.86s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2160/7952 [8:43:49<34:44:42, 21.60s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2170/7952 [8:47:30<32:39:20, 20.33s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  27%|██▋       | 2180/7952 [8:51:02<20:47:57, 12.97s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2190/7952 [8:54:13<25:10:20, 15.73s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2200/7952 [8:57:36<24:24:06, 15.27s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2210/7952 [9:01:50<38:02:06, 23.85s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2220/7952 [9:04:51<25:11:07, 15.82s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2230/7952 [9:07:59<22:34:27, 14.20s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2240/7952 [9:10:40<20:37:39, 13.00s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2250/7952 [9:13:15<22:38:34, 14.30s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  28%|██▊       | 2260/7952 [9:16:13<24:02:31, 15.21s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▊       | 2270/7952 [9:19:43<31:53:41, 20.21s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▊       | 2280/7952 [9:22:23<18:18:28, 11.62s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▉       | 2290/7952 [9:25:57<30:50:20, 19.61s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▉       | 2300/7952 [9:29:55<32:00:20, 20.39s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▉       | 2310/7952 [9:33:37<28:14:02, 18.02s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▉       | 2320/7952 [9:37:22<26:35:48, 17.00s/it]

Processed 10 new rows. Pausing for 30 seconds.


Processing Rows:  29%|██▉       | 2320/7952 [9:37:53<23:22:51, 14.95s/it]


KeyboardInterrupt: 