# Generating SALSA-Annotations with LLMs

In [1]:
import json
import random
from dotenv import load_dotenv
import os
import csv
from anthropic import Anthropic
from openai import AzureOpenAI, OpenAI
import replicate
from anthropic import Anthropic
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime
import time

from helper_functions import get_api_type, QUALITY_EXPANDED_MAPPING

import en_core_web_sm

import concurrent.futures
import threading

nlp = en_core_web_sm.load()

In [None]:
azure_key = "INSERT_KEY_HERE"
anthropic_key = "INSERT_KEY_HERE"
replicate_key = "INSERT_KEY_HERE"

In [3]:
# set seed
seed = random.seed(1337)

In [None]:
# Setup AI APIs

from anthropic import Anthropic
import openai
from openai import OpenAI



LMstudio_client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)
model_gpt ="gpt-4o-2024-08-06"

from openai import AzureOpenAI

openai_client = AzureOpenAI(
        azure_endpoint = "YOUR_URL_HERE", 
        api_key=azure_key,  
        api_version="2024-08-01-preview"
        )


clientAnthropic = Anthropic(
    # This is the default and can be omitted
    api_key=os.environ.get("CLAUDE_API_KEY"),
)

## Actual Edit Identification Prompts

In [5]:
# read in prompt files (txt)
SYSTEM_file_name_to_use = "system_prompt_annotations.txt"
PROMPT_file_name_to_use = "salsa_prompt_instructions.txt"

system_prompt_import = open(f"prompts_LLM_annotations/{SYSTEM_file_name_to_use}", "r").read()
print(f"System prompt: {system_prompt_import}")

salsa_prompt_import = open(f"prompts_LLM_annotations/{PROMPT_file_name_to_use}", "r").read()
print(f"Salsa prompt: {salsa_prompt_import}")

#response_format_import = open("prompts_LLM_annotations/response_format_annotations.txt", "r").read()
#print(f"Response format: {response_format_import}")

# instruction prompt about SALSA Framework

System prompt: You are a helpful assistant and expert in text simplification annotations that can identify the changes between two sentences and annotate them using the specified framework.
Salsa prompt: You are tasked with annotating sentence pairs from a text simplification dataset. 
Your goal is to identify and categorize the edits made to simplify the original sentence, as well as to note any errors introduced in the process. 
Follow these instructions carefully:

1. You will be presented with two sentences:
<original_sentence>
{{ORIGINAL_SENTENCE}}
</original_sentence>

<simplified_sentence>
{{SIMPLIFIED_SENTENCE}}
</simplified_sentence>

2. Analyze the differences between the original and simplified sentences, focusing on the following categories of edits:

a) Phrase-level Edits:
   - Deletion Edits
   - Insertion Edits
   - Substitution Edits

b) Sentence-level Edits:
   - Splitting Edits
   - Reordering Edits
   - Structural Edits

When identifying edits, focus on the smallest 

In [6]:
# few-shot examples of SALSA annotations
prompt_few_shots_intro = "Here are three examples of how to annotate a sentence pair:"

# read in few-shot examples JSON file (indices have been converted to words from original character-based indices)
with open("prompts_LLM_annotations/example_data_SALSA_official_WordIndices.json", 'r') as file:
    few_shot_examples = json.load(file)

def generate_n_random_examples(n):
    return random.sample(few_shot_examples, n)

fewshot_test = generate_n_random_examples(1)
fewshot_test

[{'source': 'Widespread blackouts are reported in Western Ukraine as Russian forces launch another wave of cruise missile strikes, including the city of Lviv which has experienced a total blackout.',
  'target': 'Widespread blackouts are reported in Western Ukraine as Russian forces launch another wave of cruise missile strikes.',
  'metadata': {'annotator': 'annotator_1', 'system': 'new-wiki-1/T5-11B'},
  'edits': [{'id': 1,
    'category': 'deletion',
    'input_idx': [[17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]],
    'annotation': {'deletion_type': {'val': 'bad_deletion',
      'bad_deletion': 'minor'},
     'coreference': 'no',
     'grammar_error': 'no'}}]}]

In [7]:
def prompt_insert_sentence_pair(sentence1, sentence2):
    return f"""
 Here is the sentence pair you need to annotate:
<original_sentence>{sentence1}</original_sentence>
<simplified_sentence>{sentence2}</simplified_sentence>

And here is the index map for all words in the original sentence:
<index_map>{generate_word_index_map_simple(sentence1)}</index_map>

And here is the index map for all words in the simplified sentence:
<index_map>{generate_word_index_map_simple(sentence2)}</index_map>
"""

In [8]:
# read in prompt files (txt)
system_prompt_import = open("prompts_LLM_annotations/system_prompt_annotations.txt", "r").read()
print(f"System prompt: {system_prompt_import}")

salsa_prompt_COT_import = open("prompts_LLM_annotations/SALSA_Prompt_CoT_V01.txt", "r").read()
print(f"Salsa prompt: {salsa_prompt_COT_import}")

#response_format_import = open("prompts_LLM_annotations/response_format_annotations.txt", "r").read()
#print(f"Response format: {response_format_import}")

# instruction prompt about SALSA Framework

System prompt: You are a helpful assistant and expert in text simplification annotations that can identify the changes between two sentences and annotate them using the specified framework.
Salsa prompt: You are an expert text simplification analyst using the SALSA framework for a research project. 
Your task is to analyze the differences between an original sentence and its simplified version. Utmost careful work is paramount here.

You are first given some information on the SALSA Framework for Text Simplification Annotations:
<SALSA_Information>
The SALSA (Structured Annotation for Linguistic Simplification Analysis) Framework is a comprehensive method for evaluating text simplifications. 
It provides a structured approach to annotating and analyzing changes made between an original text and its simplified version.

SALSA recognizes 6 primary types of edits:
Deletion, Insertion, Substitution, Splitting, Reordering, and Structural Changes.

- Deletion:
       - Insignificant Deletion

In [9]:
# read in few-shot examples JSON file
with open("prompts_LLM_annotations/example_data_SALSA_official_WordIndices.json", 'r') as file:
    few_shot_examples = json.load(file)

def generate_n_random_examples(n):
    # add line breaks between each example
    examples_string = ""
    for example in random.sample(few_shot_examples, n):
        examples_string += json.dumps(example) + "\n\n"
    return examples_string

fewshot_test = generate_n_random_examples(1)
fewshot_test

'{"source": "The last president to run after leaving office was Theodore Roosevelt, who came in second in the 1912 election as the presidential nominee of the Progressive Party, although Herbert Hoover did briefly seek the Republican presidential nomination at several national conventions subsequent to leaving office in 1933.", "target": "The last president to run after leaving office was Theodore Roosevelt.", "metadata": {"annotator": "annotator_2", "system": "new-wiki-1/T5-3B"}, "edits": [{"id": 1, "category": "deletion", "input_idx": [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]], "annotation": {"deletion_type": {"val": "bad_deletion", "bad_deletion": "a lot"}, "coreference": "no", "grammar_error": "no"}}]}\n\n'

In [10]:
def prompt_insert_sentence_pair(sentence1, sentence2):
    return f"""
    
 Here is the sentence pair you need to annotate:
<original_sentence>{sentence1}</original_sentence>
<simplified_sentence>{sentence2}</simplified_sentence>

And here is the index map for all words in the original sentence:
<index_map>{generate_word_index_map_simple(sentence1)}</index_map>

And here is the index map for all words in the simplified sentence:
<index_map>{generate_word_index_map_simple(sentence2)}</index_map>
"""

In [11]:
import json
import re

def word_indices_to_char_indices(sentence, word_indices):
    char_index_map = generate_word_index_map(sentence)
    words = sentence.split()
    
    if not word_indices:
        return None
    
    start_word = words[word_indices[0]]
    end_word = words[word_indices[-1]]
    
    start_char = char_index_map[start_word][0]
    end_char = char_index_map[end_word][1]
    
    return [start_char, end_char]

def convert_indices_in_edit(edit, source_sentence, target_sentence):
    #print(f"Debug: edit: {edit}")
    if 'input_idx' in edit:
        edit['input_idx'] = [word_indices_to_char_indices(source_sentence, idx) for idx in edit['input_idx']]
        #print(f"Debug: input_idx: {edit['input_idx']}")
    if 'output_idx' in edit:
        edit['output_idx'] = [word_indices_to_char_indices(target_sentence, idx) for idx in edit['output_idx']]
        #print(f"Debug: output_idx: {edit['output_idx']}")
    if 'constituent_edits' in edit:
        for constituent in edit['constituent_edits']:
            if 'input_idx' in constituent:
                constituent['input_idx'] = [word_indices_to_char_indices(source_sentence, idx) for idx in constituent['input_idx']]
            if 'output_idx' in constituent:
                constituent['output_idx'] = [word_indices_to_char_indices(target_sentence, idx) for idx in constituent['output_idx']]
    return edit

def assemble_JSON_output(response_text, original_sentence, simplified_sentence):
    # Extract the JSON content from the response
    json_match = re.search(r'```json\s*(\{.*?\})\s*```', response_text, re.DOTALL)
    if json_match:
        print("DEBUG: found json block")
        json_content = json_match.group(1)
    else:
        print("DEBUG: no json block found, using entire <JSON_OUTPUT>")
        # If no JSON block is found, use the entire <JSON_OUTPUT> content
        json_content = response_text.split("<JSON_OUTPUT>")[1].split("</JSON_OUTPUT>")[0].strip()
    
    # Parse the extracted JSON
    try:
        edits = json.loads(json_content)["edits"]
    except json.JSONDecodeError:
        print("Error parsing JSON. Content:", json_content)
        raise
    
    # Convert word indices to character indices
    edits = [convert_indices_in_edit(edit, original_sentence, simplified_sentence) for edit in edits]
    
    # Assemble the full JSON output
    full_output = {
        "source": original_sentence,
        "target": simplified_sentence,
        "metadata": "",  # You can add metadata if needed
        "edits": edits
    }
    
    # Return the assembled JSON as a Python dictionary
    return full_output

# Usage
# assembled_output = assemble_JSON_output(response_extracted, original_sentence_testing, simplified_sentence_testing)
# print(assembled_output)

In [12]:
# first draft for data model
from typing import List, Dict, Any, Optional, Union, Literal
from pydantic import BaseModel, Field

class DeletionType(BaseModel):
    val: str
    good_deletion: Optional[str]
    bad_deletion: Optional[str]

class DeletionAnnotation(BaseModel):
    deletion_type: DeletionType
    coreference: str
    grammar_error: str

class SubstitutionAnnotation(BaseModel):
    substitution_info_change: Dict[str, Union[str, Dict[str, Union[str, Dict[str, str]]]]]
    grammar_error: str

class InsertionAnnotation(BaseModel):
    insertion_type: Dict[str, Union[str, Dict[str, Union[str, Dict[str, str]]]]]
    grammar_error: str

class ReorderAnnotation(BaseModel):
    reorder_level: Dict[str, Union[str, Dict[str, Union[str, Dict[str, str]]]]]
    grammar_error: str

class StructureAnnotation(BaseModel):
    structure_type: Dict[str, str]
    impact: Dict[str, Union[str, Dict[str, str]]]
    grammar_error: str

class SplitAnnotation(BaseModel):
    impact: Dict[str, Union[str, Dict[str, str]]]
    grammar_error: str

class ConstituentEdit(BaseModel):
    id: int
    category: str
    input_idx: Optional[List[Union[str, List[str]]]]
    output_idx: Optional[List[Union[str, List[str]]]]

# Define allowed categories
EditCategory = Literal[
    'deletion',
    'substitution', 
    'insertion',
    'reorder',
    'structure',
    'split'
]

class Edit(BaseModel):
    category: EditCategory  # Only allows the predefined values
    id: int
    annotation: Union[
        DeletionAnnotation,
        SubstitutionAnnotation,
        InsertionAnnotation,
        ReorderAnnotation,
        StructureAnnotation,
        SplitAnnotation
    ]
    input_idx: Optional[List[List[int]]]
    output_idx: Optional[List[List[int]]]
    constituent_edits: Optional[List[ConstituentEdit]]

# New main data model that includes source, target, and metadata
class DataModel(BaseModel):
    source: str = Field(description="The original complex sentence")
    target: str = Field(description="The simplified sentence")
    metadata: Dict[str, str] = Field(description="Metadata about the annotation, including annotator and system")
    edits: List[Edit] = Field(description="List of simplification edits")
    thresh_id: Optional[int] = Field(description=" ID for the annotation - leave empty")

# Semi-Complex Format (JSON) - Also for Taxonomy Validation

If we cannot Heineman's (somewhat intransparently organized) schema, can we replicate the final data we are extracting from it?


## Function Definitions

In [13]:
from openai import AzureOpenAI

openai_client = AzureOpenAI(
        azure_endpoint = "INSERT_HERE", 
        api_key="YOUR_KEY",  
        api_version="2024-08-01-preview"
        )

In [14]:
significance_map = {
    0: "trivial",
    1: "minor",
    2: "medium",
    3: "major"
}

# load df_edits from official SALSA EXAMPLES
df_edits = pd.read_csv("../data/salsa_annotations/df_edits_SALSA_ExampleData.csv")
# keep only cols: Source, Target, Edit Classification, Significance
#display(df_edits.head())

# filter out WikIDE (for now):
df_edits = df_edits[df_edits["Dataset"] != "wikiDE"]
# and DisSim
df_edits = df_edits[df_edits["System"] != "DisSim"]

df_edits['SentUID'] = df_edits['Sentence ID'].astype(str) + "_" + df_edits['Dataset'].astype(str)


df_edits = df_edits[[
    "SentUID",
    "Source",
    "Target",    
    "Input Text",
    "Output Text",
    #"Edit Classification", 
   "Edit Type",  
    "Quality",
     "Significance"
    ]]

# filter out Edit Classifications with "ERROR" in it
df_edits = df_edits[~df_edits["Edit Type"].str.contains("ERROR")]

# turn significance from 0,1,2,3 to "trivial", "minor", "medium", "major"
df_edits["Significance"] = df_edits["Significance"].map(significance_map)

quality_map = {
    "No Error": "good",
    "Error": "bad",
    "Trivial": "trivial"
}

# turn Edit Classification to "good", "bad", "trivial" based on Quality Column (No Error, Error, Trivial)
df_edits["Quality"] = df_edits["Quality"].map(quality_map)

df_edits_fewShotSemiComplex = df_edits.copy()

df_edits.head()

Unnamed: 0,SentUID,Source,Target,Input Text,Output Text,Edit Type,Quality,Significance
0,SALSA_EXAMPLES_001_SALSA_EXAMPLES,Further important aspects of Fungi in Art rela...,An important aspect of Fungi in Art is the pro...,(as for example from the 'Massee Art Grant' by...,,deletion,good,major
1,SALSA_EXAMPLES_001_SALSA_EXAMPLES,Further important aspects of Fungi in Art rela...,An important aspect of Fungi in Art is the pro...,"(fungal researchers),",,deletion,bad,minor
2,SALSA_EXAMPLES_001_SALSA_EXAMPLES,Further important aspects of Fungi in Art rela...,An important aspect of Fungi in Art is the pro...,"and contamination,",,deletion,good,medium
3,SALSA_EXAMPLES_001_SALSA_EXAMPLES,Further important aspects of Fungi in Art rela...,An important aspect of Fungi in Art is the pro...,initiatives fostering and supporting works able,,deletion,good,major
4,SALSA_EXAMPLES_001_SALSA_EXAMPLES,Further important aspects of Fungi in Art rela...,An important aspect of Fungi in Art is the pro...,Further important aspects,An important aspect,substitution,trivial,trivial


In [15]:
# read in prompt files (txt)
system_prompt_import = open("prompts_LLM_annotations/system_prompt_annotations.txt", "r").read()
print(f"System prompt: {system_prompt_import}")
print()

SemiComplex_Prompt_CoT_import = open("prompts_LLM_annotations/SemiComplex_Prompt_CoT.txt", "r").read()
print(f"Salsa prompt: {SemiComplex_Prompt_CoT_import}")

def prompt_assembly_semicomplex(sentence1, sentence2, n_fewshot_samples=3, prompt=SemiComplex_Prompt_CoT_import):
    if n_fewshot_samples == 0:
        try:
            prompt_combined = prompt.split("<examples>")[0]
            prompt_combined += prompt.split("</examples>")[1]
            prompt_combined += prompt_insert_sentence_pair_semicomplex(sentence1, sentence2)
        except:
            prompt_combined = prompt[:] # deep copy prompt:
            prompt_combined += prompt_insert_sentence_pair_semicomplex(sentence1, sentence2)

    else:
        prompt_combined = prompt.split("<examples>")[0]
        prompt_combined += "\n\n"
        prompt_combined += "<examples>"
        prompt_combined += "\n\n"
        prompt_combined += str(generate_n_random_examples_dfEdits(
            df_edits_fewShotSemiComplex, 
            n=n_fewshot_samples)) # samples of edits per sentence pair
        prompt_combined += "</examples>"
        prompt_combined += "\n\n"
        prompt_combined += prompt.split("</examples>")[1]
        prompt_combined += prompt_insert_sentence_pair_semicomplex(sentence1, sentence2)
    
    return prompt_combined

def prompt_insert_sentence_pair_semicomplex(sentence1, sentence2):
    return f"""

Here is the sentence pair you need to annotate:
<original_sentence>{sentence1}</original_sentence>
<simplified_sentence>{sentence2}</simplified_sentence>
"""



System prompt: You are a helpful assistant and expert in text simplification annotations that can identify the changes between two sentences and annotate them using the specified framework.

Salsa prompt: You are an expert text simplification analyst using the following framework for a research project. 
Your task is to analyze the differences between an original sentence and its simplified version. Utmost careful work is paramount here.

You are first given some information on the Framework to be used for Text Simplification Annotations:
<Framework_Information>
The following framework is a comprehensive method for evaluating text simplifications. 
It provides a structured approach to annotating and analyzing changes made between an original text and its simplified version.

The framework recognizes 6 primary types of edits:
Deletion, Insertion, Substitution, Split, Reordering, and Structural Changes.

- Deletion:
       - Good Deletion: Did it remove INSIGNIFICANT information (thus im

In [16]:
import numpy as np

def generate_n_random_examples_dfEdits(df_in, n,
                                     header_names = ['SentUID', "source", "target", "input_segment", 
                                                   "output_segment", "edit_type", "quality", "significance"]):
    df = df_in.copy()
    df.columns = header_names
    grouped = df.groupby('SentUID')
    formatted_examples = ""
    sentences_used = 0
    
    csv_header = "input_segment,output_segment,edit_type,quality,significance"
    edit_cols = ['input_segment', 'output_segment', 'edit_type', 'quality', 'significance']
    
    while sentences_used < n:
        sent_id = np.random.choice(df['SentUID'].unique())
        sent_group = grouped.get_group(sent_id)
        
        # Sentence pair formatting
        formatted_examples += f"<complex_sentence>\n{sent_group['source'].iloc[0]}\n</complex_sentence>\n\n"
        formatted_examples += f"<simplified_sentence>\n{sent_group['target'].iloc[0]}\n</simplified_sentence>\n\n"
        
        # CSV header
        formatted_examples += f"{csv_header}\n"
        # Edit rows in CSV format
        for _, edit in sent_group[edit_cols].iterrows():
            row = ','.join(str(edit[col]) for col in edit_cols)
            formatted_examples += f"{row}\n"
            
        formatted_examples += "\n---\n\n"
        sentences_used += 1
    
    return formatted_examples

#print(generate_n_random_examples_dfEdits(df_edits_fewShotSemiComplex, 10))

In [17]:
import re
import csv
import pandas as pd
from io import StringIO

def extract_all_output_blocks(response_text):
    """
    Finds *all* text blocks enclosed by <OUTPUT>...</OUTPUT>.
    May skip optional ``` delimiters (csv, plaintext, xml, etc.).
    Returns a list of extracted strings, one per block.
    """
    pattern = re.compile(
        r"<OUTPUT>"
        r"(?:```(?:csv|plaintext|xml)?\s*)?"  # Optional code fence marker
        r"(.*?)"                              # Capture block content
        r"(?:```)?"
        r"</OUTPUT>",
        re.DOTALL | re.IGNORECASE
    )
    matches = pattern.findall(response_text)
    # Strip each match to remove leading/trailing whitespace
    return [m.strip() for m in matches]




def parse_all_csv_blocks(response_text):
    """
    Extracts all <OUTPUT> blocks from the response_text, parses each into
    a DataFrame, and concatenates them into a single DataFrame.
    Returns an empty DataFrame if none is parsed successfully.
    """
    blocks = extract_all_output_blocks(response_text)
    dfs = []
    for block in blocks:
        df_block = parse_single_csv_block(block)
        if not df_block.empty:
            dfs.append(df_block)

    if dfs:
        return pd.concat(dfs, ignore_index=True)
    else:
        # Return an empty DataFrame with the correct columns if no valid CSV found
        return pd.DataFrame(
            columns=["input_segment","output_segment","edit_type","quality","significance"]
        )


def process_api_response(response):
    """
    For each API response, grab all <OUTPUT> CSV blocks, parse them,
    and return one combined DataFrame.
    """
    response_text = response.choices[0].message.content
    df_combined = parse_all_csv_blocks(response_text)
    return df_combined

## Two-Step Pipeline
### More Function Definitions

In [18]:
validation_prompt_import = open("prompts_LLM_annotations/SemiComplex_Prompt_ValidationStep.txt", "r").read()
validation_prompt_import

'You are part of a text simplification research project.\nPlease validate the extracted CSV/output content that is provided below and convert it\nto the specified JSON data model. Additionally, verify the correct handling of split edits as explained below.\n\nJSON data model:\n{\n     "source": "<Original complex sentence>",\n     "target": "<Simplified sentence>",\n     "edits": [\n     {\n     "category": "<Edit type: Deletion, Insertion, Substitution, Reordering, Split, Structural>",\n     "input_text": "<Affected words (if any) in the original sentence>",\n     "output_text": "<Affected words (if any) in the simplified sentence>",\n     "quality": "<Quality rating: good, bad, or trivial>",\n     "significance": <Significance level: 0 (trivial), 1 (minor), 2 (medium), 3 (major)>\n     }\n     ]\n}\n\nImportant Note About input_text and output_text:\nIf the edit affects a continuous span of words, store them as a single string inside the array.\nExample: ["The quick brown fox"], NOT 

In [19]:
from typing import List, Literal
from pydantic import BaseModel, Field

# Define allowed categories
EditCategory = Literal[
    'deletion',
    'substitution', 
    'insertion',
    'reorder',
    'structure',
    'split'
]

# Define quality types
EditQuality = Literal['good', 'bad', 'trivial']

# Define significance levels
SignificanceLevel = Literal[0, 1, 2, 3]

class EditSemiComplex(BaseModel):
    category: EditCategory
    input_text: List[str] = Field(description="The words in the complex (input) sentence affected by the edit")
    output_text: List[str] = Field(description="The words in the simplified (output) sentence affected by the edit")
    quality: EditQuality = Field(description="Quality of edit: good, bad, or trivial")
    significance: SignificanceLevel = Field(description="Significance level: 0 for trivial, 1-3 for good/bad depending on severity (minor: 1, medium: 2, major: 3)")

class DataModelSemiComplex(BaseModel):
    source: str = Field(description="The original complex sentence")
    target: str = Field(description="The simplified sentence") 
    edits: List[EditSemiComplex] = Field(description="List of simplification edits")

In [20]:
import re
import logging
import pandas as pd
import openai
from typing import Optional
from io import StringIO

import re

def extract_all_output_blocks(response_text):
    """
    Extracts all <OUTPUT>...</OUTPUT> blocks from the response_text.
    Handles optional code fences like ```csv, ```plaintext, etc.
    Falls back to text after the first "OUTPUT" occurrence if no blocks are found.
    If that also fails, returns the entire response text.

    Parameters:
    - response_text (str): The full text response from the API.

    Returns:
    - List[str]: A list of strings containing the extracted content.
    """
    pattern = re.compile(
        r"<OUTPUT>\s*(?:```(?:csv|plaintext|json|xml)?\s*)?(.*?)\s*(?:```)?\s*</OUTPUT>",
        re.DOTALL | re.IGNORECASE
    )
    matches = pattern.findall(response_text)

    if matches:
        return [match.strip() for match in matches]

    # Fallback: Try to extract everything after the first "OUTPUT"
    output_index = response_text.lower().find("output")
    if output_index != -1:
        return [response_text[output_index + len("output"):].strip()]

    # Final fallback: return the entire text
    return [response_text.strip()]



def isolate_csv_content(output_block):
    """
    Isolates the CSV or output content from a single <OUTPUT> block.
    
    Parameters:
    - output_block (str): The content within a single <OUTPUT>...</OUTPUT> block.
    
    Returns:
    - str: The isolated CSV or output content.
    """
    return output_block

def process_api_response(response):
    """
    Processes the API response to extract CSV/output annotations.

    Parameters:
    - response: The API response object.

    Returns:
    - List[str]: A list of extracted CSV/output content strings.
    """
    response_text = response.choices[0].message.content
    output_blocks = extract_all_output_blocks(response_text)
    
    if not output_blocks:
        logging.warning("No <OUTPUT> blocks found in the API response.")
    
    return output_blocks



def generate_semicomplex_annotations_for_csv(
    csv_file_path: str,
    output_json_path: str,
    output_csv_path: str,
    num_samples: Optional[int] = None,
    seed: int = 42,
    prompt: str = SemiComplex_Prompt_CoT_import,
    validation_prompt: str = validation_prompt_import,
    datamodel_validationstep = DataModelSemiComplex,
    n_fewshot_samples: int = 6,
    modelname: str = "gpt"
) -> None:
    """
    Generate semicomplex annotations, validate them, and output a structured JSON file.
    """
    try:
        df = pd.read_csv(csv_file_path)
        print(f"Successfully read CSV file: {csv_file_path}")
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return

    required_columns = {"complex_sentence", "simplified_sentence"}
    if not required_columns.issubset(df.columns):
        print(f"CSV must contain the columns: {required_columns}")
        # rename columns if needed
        df.rename(columns={"Original": "complex_sentence", "Simplified": "simplified_sentence"}, inplace=True)
        df.rename(columns={"original_sentence": "complex_sentence", "simplified_sentence": "simplified_sentence"}, inplace=True)

    if num_samples and num_samples < len(df):
        df = df.sample(n=num_samples, random_state=seed)
        print(f"Sampled {num_samples} rows.")

    annotations_list = []

    for index, row in df.iterrows():
        original_sentence = row["complex_sentence"]
        simplified_sentence = row["simplified_sentence"]

        print(f"\n### Processing row {index + 1}:")
        print(f"Original: {original_sentence}")
        print(f"Simplified: {simplified_sentence}")

        prompt_out = prompt_assembly_semicomplex(original_sentence, simplified_sentence, n_fewshot_samples=n_fewshot_samples, prompt=prompt)
        print("\n--- Generated Prompt ---\n", prompt_out)

        try:
            response = openai_client.chat.completions.create(
                model=model_gpt,
                messages=[{"role": "system", "content": "You are a helpful assistant in a text simplification research project."},
                          {"role": "user", "content": prompt_out}],
            )
            extracted_contents = process_api_response(response)
            print("\n--- Extracted Content ---\n", extracted_contents)

        except Exception as e:
            print(f"API request failed for row {index + 1}: {e}")
            continue

        try:
            validation_response = openai_client.chat.completions.create(
                model=model_gpt,
                messages=[{"role": "system", "content": "You are a helpful assistant in a text simplification research project."},
                            {"role": "user", "content": validation_prompt + 
                            f"\n\nOriginal Sentence: {original_sentence}\n\nSimplified Sentence: {simplified_sentence}\n\n" +
                            f"--- Extracted Annotations to validate and transform: ---\n\n" +
                            f"\n\n".join(extracted_contents)}],
                response_format={
                    "type": "json_schema",
                    "json_schema": {"name": "name1", "schema": datamodel_validationstep.model_json_schema()}
                }
            )
            validated_annotations = json.loads(validation_response.choices[0].message.content)

            print("\n--- Validated Annotations ---\n", json.dumps(validated_annotations, indent=4, ensure_ascii=False))

            annotation_entry = {
                "source": original_sentence,
                "target": simplified_sentence,
                "metadata": {"annotator": "annotator_0", "system": "UNDEFINED"}, 
                "edits": validated_annotations.get("edits", []),
                "_thresh_id": index + 1
            }
            annotations_list.append(annotation_entry)

        except Exception as e:
            print(f"⚠️ Validation request failed for row {index + 1}: {e}")
            continue

    # Save aggregated results as JSON
    with open(output_json_path, "w", encoding="utf-8") as json_file:
        json.dump(annotations_list, json_file, ensure_ascii=False, indent=4)
    print(f"\n Aggregated annotations saved to {output_json_path}")

    # Convert JSON to CSV and save
    output_df = pd.DataFrame(annotations_list)
    output_df.to_csv(output_csv_path, index=False, encoding="utf-8")
    print(f"\n Aggregated annotations saved to {output_csv_path}")

#### Enhanced API calls for multiple models

In [21]:
# Setup API Clients
replicateClient = replicate.Client(api_token=replicate_key)

clientAnthropic = Anthropic(api_key=anthropic_key)

LMstudio_client = OpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio"
)

In [45]:
def call_generation_api_concurrent(prompt: str, model: str) -> str:
    """
    Calls the correct API using a per-API lock to ensure that only one call is active per API type.
    Returns the raw response message as a string.
    """
    api_type = get_api_type(model)

    with API_LOCKS[api_type]:
        raw_message = ""
        if api_type == "replicate":
            print(f"[{model}] Sending request to Replicate...")
            output = replicateClient.run(model, input={"prompt": prompt})
            raw_message = "".join(output)
        elif api_type == "local":
            print(f"[{model}] Sending request to Local API...")
            time.sleep(np.random.randint(0, 2))
            response = LMstudio_client.chat.completions.create(
                    model="someString", 
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant in a text simplification research project."},
                        {"role": "user", "content": prompt}
                    ])
            raw_message = response.choices[0].message.content

        elif api_type == "anthropic":
            print(f"[{model}] Sending request to Anthropic...")
            time.sleep(np.random.randint(0, 10))
            response = clientAnthropic.messages.create(
                max_tokens=8096,
                messages=[{"role": "user", "content": prompt}],
                model=model
            )
            raw_message = response.content[0].text
        else:  # Azure OpenAI (GPT)
            print(f"[{model}] Sending request to Azure OpenAI...")
            time.sleep(np.random.randint(0, 10))
            response = openai_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant in a text simplification research project."},
                    {"role": "user", "content": prompt}
                ]
            )
            raw_message = response.choices[0].message.content

    print(f"[{model}] Received response.")
    return raw_message

def process_model(
    model: str,
    df: pd.DataFrame,
    n_fewshot_samples: int,
    prompt: str,
    validation_prompt: str,
    datamodel_validationstep,
    model_gpt: str
) -> List[dict]:
    """
    Processes the entire CSV file for a single model.
    Iterates sequentially through all rows and returns a list of annotation entries.
    """
    annotations_list = []
    for index, row in df.iterrows():
        original_sentence = row["complex_sentence"]
        simplified_sentence = row["simplified_sentence"]

        print(f"\n[{model}] Processing row {index + 1}:")
        print("Original:", original_sentence)
        print("Simplified:", simplified_sentence)

        # Assemble the prompt (assume prompt_assembly_semicomplex is defined elsewhere)
        prompt_out = prompt_assembly_semicomplex(
            original_sentence, simplified_sentence,
            n_fewshot_samples=n_fewshot_samples,
            prompt=prompt
        )
        print(f"[{model}] Generated Prompt:\n{prompt_out}")

        # --- Step 1: Generation API call ---
        try:
            raw_response = call_generation_api_concurrent(prompt_out, model)
            print(f"[{model}] Raw response: {raw_response}")
            extracted_contents = extract_all_output_blocks(raw_response)
            print(f"[{model}] Extracted Content:\n{extracted_contents}")
        except Exception as e:
            print(f"[{model}] API request failed for row {index + 1}: {e}")
            continue

        # --- Step 2: Validation via OpenAI ---
        try:
            validation_input = (
                validation_prompt +
                f"\n\nOriginal Sentence: {original_sentence}\n\nSimplified Sentence: {simplified_sentence}\n\n" +
                "--- Extracted Annotations to validate and transform: ---\n\n" +
                "\n\n".join(extracted_contents)
            )
            validation_response = openai_client.chat.completions.create(
                model=model_gpt,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant in a text simplification research project."},
                    {"role": "user", "content": validation_input}
                ],
                response_format={
                    "type": "json_schema",
                    "json_schema": {"name": "name1", "schema": datamodel_validationstep.model_json_schema()}
                }
            )
            validated_annotations = json.loads(validation_response.choices[0].message.content)
            print(f"[{model}] Validated Annotations:\n", json.dumps(validated_annotations, indent=4, ensure_ascii=False))
        except Exception as e:
            print(f"[{model}] Validation failed for row {index + 1}: {e}")
            continue

        annotation_entry = {
            "source": original_sentence,
            "target": simplified_sentence,
            "metadata": {"annotator": model, "system": get_api_type(model)},
            "edits": validated_annotations.get("edits", []),
            "_thresh_id": index + 1
        }
        annotations_list.append(annotation_entry)
    return annotations_list

def generate_semicomplex_annotations_for_csv_multi_model(
    csv_file_path: str,
    output_dir: str,
    models: List[str],
    num_samples: Optional[int] = None,
    seed: int = 42,
    prompt: str = "YOUR_SEMI_COMPLEX_PROMPT",
    validation_prompt: str = "YOUR_VALIDATION_PROMPT",
    datamodel_validationstep=None,
    n_fewshot_samples: int = 6,
    taxName_for_filename: str = 'noTaxName',
    ID_filter = None, # allow to filter for specific IDs
    model_gpt: str = "gpt-4"  # default GPT model for validation
):
    """
    Processes a CSV file. For each model, the entire file is processed sequentially,
    but the processing for different models happens concurrently.
    """
    try:
        df = pd.read_csv(csv_file_path, encoding="mac_roman")
        print(f"Successfully read CSV file: {csv_file_path}")
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return

    # Ensure required columns exist or rename them accordingly.
    required_columns = {"complex_sentence", "simplified_sentence"}
    if not required_columns.issubset(df.columns):
        print(f"CSV must contain the columns: {required_columns}")
        df.rename(columns={"Original": "complex_sentence", "Simplified": "simplified_sentence"}, inplace=True)
        df.rename(columns={"original_sentence": "complex_sentence", "simplified_sentence": "simplified_sentence"}, inplace=True)

    if num_samples and num_samples < len(df):
        df = df.sample(n=num_samples, random_state=seed)
        print(f"Sampled {num_samples} rows.")

    if ID_filter:  # filter index of df
        # subtract 1 from all IDs to match 0-based index
        ID_filter = [x - 1 for x in ID_filter]
        df = df[df.index.isin(ID_filter)]
        print(f"Filtered rows with IDs: {ID_filter}")

    annotations_by_model = {}
    now = datetime.now()
    current_time = now.strftime("%Y-%m-%d_%H-%M-%S")

    # Create one thread per model.
    with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
        future_to_model = {
            executor.submit(
                process_model,
                model,
                df,
                n_fewshot_samples,
                prompt,
                validation_prompt,
                datamodel_validationstep,
                model_gpt
            ): model for model in models
        }
        for future in concurrent.futures.as_completed(future_to_model):
            model = future_to_model[future]
            try:
                annotations_list = future.result()
                annotations_by_model[model] = annotations_list

                # Save JSON output.
                model_name_safe = model.replace("/", "_")
                json_output_path = os.path.join(output_dir, f"LLM_annotations_{taxName_for_filename}_{model_name_safe}_{current_time}.json")
                with open(json_output_path, "w", encoding="utf-8") as json_file:
                    json.dump(annotations_list, json_file, ensure_ascii=False, indent=4)
                print(f"[{model}] Saved JSON annotations to {json_output_path}")

                # Save CSV output.
                output_df = pd.DataFrame(annotations_list)
                csv_output_path = os.path.join(output_dir, f"LLM_annotations_{taxName_for_filename}_{model_name_safe}_{current_time}.csv")
                output_df.to_csv(csv_output_path, index=False, encoding="utf-8")
                print(f"[{model}] Saved CSV annotations to {csv_output_path}")
            except Exception as exc:
                print(f"[{model}] generated an exception: {exc}")

    return annotations_by_model

In [46]:
API_LOCKS = {
    "azure": threading.Lock(),
    "anthropic": threading.Lock(),
    "replicate": threading.Lock(),
    "local": threading.Lock()
}

### Execution - Peer Dataset

In [None]:
# # Execute (commented out after execution)
# df_res = generate_semicomplex_annotations_for_csv(
#     csv_file_path="../data/salsa_peer_annotations/salsa_peer_data_wikiDE_random.csv",
#     output_json_path="../data/salsa_annotations/LLM_annotations_output/LLM_annotations_001b.json",
#     output_csv_path="../data/salsa_annotations/LLM_annotations_output/LLM_annotations_001b.csv",
#     num_samples=None # None: run on entire set
# )
# # 
# df_res

In [47]:
# Heineman (base prompt, in semi-complex / unnested format)
prompt_heineman = open("prompts_LLM_annotations/SemiComplex_Prompt_CoT_HM.txt", "r").read()
validation_prompt_heineman = open("prompts_LLM_annotations/SemiComplex_Prompt_ValidationStep_HM.txt", "r").read()
#datamodel = DataModelSemiComplex_HM

prompt_heineman

'You are an expert text simplification analyst using the following framework for a research project. \nYour task is to analyze the differences between an original sentence and its simplified version. Utmost careful work is paramount here.\n\nYou are first given some information on the Framework to be used for Text Simplification Error Annotations:\n\n<Framework_Information>\nThe following framework is a comprehensive method for evaluating errors in LLM-generated text simplifications.\nIt provides a structured approach to annotating and analyzing erroneous changes made between an original text and its simplified version.\nThe framework govers both good and bad edit types, but you will later focus on the bad edit types.\n\nThe framework recognizes 6 primary types of edits:\nDeletion, Insertion, Substitution, Split, Reordering, and Structural Changes.\n\n- Deletion:\n       - Good Deletion: Did it remove INSIGNIFICANT information (thus improving the sentence)? Example: "Like so many hyped

In [None]:
# DISABLED AFTER SUCCESFUL EXECUTION

# # Define list of models.
# models = [
#     #"gpt-4o-latest", 
#     #"o1-preview",
#     #"gpt-4o-mini",
#     #"claude-3-5-sonnet-20241022",
#     #"deepseek-ai/deepseek-r1",
#     #"meta/meta-llama-3-70b-instruct",
#     "localLLAMA" #via LM STUDIO
# ]

# taxID_for_filename = "PeerSet_Heineman_ZeroShot_L8bn"

# # Call the function
# annotations = generate_semicomplex_annotations_for_csv_multi_model(
#     csv_file_path="../data/salsa_peer_annotations/salsa_peer_data_wikiDE_random.csv",
#     output_dir="../data/salsa_annotations/LLM_annotations_output",
#     models=models,
#     num_samples=None,        
#     seed=42,
#     prompt = prompt_heineman,
#     validation_prompt = validation_prompt_heineman,
#     datamodel_validationstep = DataModelSemiComplex_HM,


#     #ID_filter = [6], # allow to filter for specific IDs


#     n_fewshot_samples=0, # !!! fewshot samples per sentence pair

#     taxName_for_filename = taxID_for_filename
# )

### Execution - Taxonomy Validation Dataset

#### Define Data Models for Taxonomies
##### Heineman (Semi-Complex Adaptation) 
-> also used for LLM vs Human Evaluation

In [27]:
# Define allowed categories
EditCategory = Literal[
    'deletion',
    'substitution', 
    'insertion',
    'reorder',
    'structure',
    'split'
]

# Define quality types
EditQuality = Literal['good', 'bad', 'trivial']

# Define significance levels
SignificanceLevel = Literal[0, 1, 2, 3]

class EditSemiComplex(BaseModel):
    category: EditCategory
    input_text: List[str] = Field(description="The words in the complex (input) sentence affected by the edit")
    output_text: List[str] = Field(description="The words in the simplified (output) sentence affected by the edit")
    quality: EditQuality = Field(description="Quality of edit: good, bad, or trivial")
    significance: SignificanceLevel = Field(description="Significance level: 0 for trivial, 1-3 for good/bad depending on severity (minor: 1, somewhat: 2, a lot: 3)")

class DataModelSemiComplex_HM(BaseModel):
    source: str = Field(description="The original complex sentence")
    target: str = Field(description="The simplified sentence") 
    edits: List[EditSemiComplex] = Field(description="List of simplification edits")

In [28]:
# Heineman (for comparison):
prompt_heineman = open("prompts_LLM_annotations/SemiComplex_Prompt_CoT_HM.txt", "r").read()
validation_prompt_heineman = open("prompts_LLM_annotations/SemiComplex_Prompt_ValidationStep_HM.txt", "r").read()
datamodel = DataModelSemiComplex_HM

prompt_heineman

'You are an expert text simplification analyst using the following framework for a research project. \nYour task is to analyze the differences between an original sentence and its simplified version. Utmost careful work is paramount here.\n\nYou are first given some information on the Framework to be used for Text Simplification Error Annotations:\n\n<Framework_Information>\nThe following framework is a comprehensive method for evaluating errors in LLM-generated text simplifications.\nIt provides a structured approach to annotating and analyzing erroneous changes made between an original text and its simplified version.\nThe framework govers both good and bad edit types, but you will later focus on the bad edit types.\n\nThe framework recognizes 6 primary types of edits:\nDeletion, Insertion, Substitution, Split, Reordering, and Structural Changes.\n\n- Deletion:\n       - Good Deletion: Did it remove INSIGNIFICANT information (thus improving the sentence)? Example: "Like so many hyped

In [30]:
# function to convert heineman structure to a level1, level2, level3 structure

import json

def convert_old_to_new_format(old_json_path: str, new_json_path: str) -> None:
    """
    Reads a JSON file in the 'old' data format and converts it into a
    new data format with level1, level2, and level3 fields.
    """
    
    # 1. Load the old JSON data
    with open(old_json_path, 'r', encoding='utf-8') as infile:
        old_data = json.load(infile)
    
    new_data = []

    # 2. Iterate over each record in the old data
    for record in old_data:
        # Prepare the new record structure
        new_record = {
            "source": record["source"],
            "target": record["target"],
            "metadata": record.get("metadata", {}),
            "edits": []
        }
        
        # Carry over _thresh_id if it exists
        if "_thresh_id" in record:
            new_record["_thresh_id"] = record["_thresh_id"]

        # 3. Convert each edit from the old format to the new format
        for edit in record["edits"]:
            new_edit = {
                # level1 is just the old 'category'
                "edit_type_level1": edit["category"],
                
                # level2 is a combination of old 'quality' and 'category'
                "edit_type_level2": f"{edit['quality']} {edit['category']}",
                
                # level3 is the old 'significance'
                "edit_type_level3": str(edit.get("significance", [])),
                
                # Keep the input_text and output_text as is
                "input_text": edit.get("input_text", []),
                "output_text": edit.get("output_text", []),

                # with the other fields set to None
                "orthogonal_data": {
                    "severity": str(edit.get("significance", [])),
                    "scope": None,
                    "domain_sensitivity": None,
                    "factual_dependence": None,
                    "polarity_switch": None,
                    "simplification_direction": None
                }
            }
            new_record["edits"].append(new_edit)

        new_data.append(new_record)
    
    # 4. Save the new data structure to the specified file
    with open(new_json_path, 'w', encoding='utf-8') as outfile:
        json.dump(new_data, outfile, indent=4, ensure_ascii=False)
    print(f"Converted data saved to: {new_json_path}")


files_to_convert = [
    ("../data/LLM_annotations/LLM_annotations_N50_Heineman_GPT4o_FULL.json", "../data/LLM_annotations/LLM_annotations_N50_Heineman_GPT4o_FULL_LEVELSformat.json"),
    ("../data/LLM_annotations/LLM_annotations_N50_Heineman_ClaudeSonnet_FULL.json", "../data/LLM_annotations/LLM_annotations_N50_Heineman_ClaudeSonnet_FULL_LEVELSformat.json"),
    ("../data/LLM_annotations/LLM_annotations_N50_Heineman_LLAMA8b_FULL.json", "../data/LLM_annotations/LLM_annotations_N50_Heineman_LLAMA8b_FULL_LEVELSformat.json"),
   
]

for old_file, new_file in files_to_convert:
    convert_old_to_new_format(old_file, new_file)

Converted data saved to: ../data/LLM_annotations/LLM_annotations_N50_Heineman_GPT4o_FULL_LEVELSformat.json
Converted data saved to: ../data/LLM_annotations/LLM_annotations_N50_Heineman_ClaudeSonnet_FULL_LEVELSformat.json
Converted data saved to: ../data/LLM_annotations/LLM_annotations_N50_Heineman_LLAMA8b_FULL_LEVELSformat.json


##### Huidrom & Belz

In [31]:
from typing import Optional, List, Literal, Union
from pydantic import BaseModel, Field

# Define Level 1 Edit Types (Main categories)
EditTypeLevel1 = Literal["Omission", "Addition", "Substitution"]

# Define Level 2 Edit Types (Sub-categories)
EditTypeLevel2 = Literal[
    "Duplication", "Other",  # For Addition
    "Should Have Been Left Verbatim", "Should Not Have Been Left Verbatim", "Lexical Error", "Error in Input", "Reordering", "Other Wrongly Rendered Input"  # For Substitution
]

# Define Level 3 Edit Types (Sub-sub-categories, only relevant for Error in Input)
EditTypeLevel3 = Literal[
    "Disambiguation Error", "Multi-Word Expression Error", "Other Wrong Lexical Choice"
]

# Orthogonal Error Type: Deviation Types
MeaningDeviation = Literal[
    "NE Deviation", "Polarity Deviation", "Numerical Deviation", "Other Meaning Deviation"
]

# Orthogonal Error Type: Context vs. Function
ContextFunctionType = Literal["Content Words", "Function Words"]

# Orthogonal Error Type: Number of Words
NumWordsType = Literal["Single Word", "Multiple Words"]

# Orthogonal Error Type: Severity
SeverityType = Literal["Major", "Minor"]

# Orthogonal Error Type: Syntactic Category
SyntacticCategory = Literal["Subject", "Object", "Other"]

class OrthogonalData(BaseModel):
    meaning_deviation: Optional[MeaningDeviation] = None
    context_function: Optional[ContextFunctionType] = None
    num_words: NumWordsType
    severity: SeverityType
    syntactic_category: Optional[SyntacticCategory] = None


class EditMaximallyMerged(BaseModel):
    edit_type_level1: EditTypeLevel1
    edit_type_level2: Optional[EditTypeLevel2] = None
    edit_type_level3: Optional[EditTypeLevel3] = None
    input_text: List[str] = Field(description="Words in the complex (input) sentence affected by the edit")
    output_text: List[str] = Field(description="Words in the simplified (output) sentence affected by the edit")
    orthogonal_data: OrthogonalData


class DataModelMaximallyMerged_BH(BaseModel):
    source: str = Field(description="The original complex sentence")
    target: str = Field(description="The simplified sentence")
    edits: List[EditMaximallyMerged] = Field(description="List of identified content/meaning errors according to the Maximally Merged Taxonomy")

In [32]:
prompt_meta_huidrom = open("prompts_LLM_annotations/SemiComplex_Prompt_CoT_HuidromBelz.txt", "r").read()
validation_prompt_meta_huidrom = open("prompts_LLM_annotations/SemiComplex_Prompt_ValidationStep_HuidromBelz.txt", "r").read()

##### MB2025

In [33]:
from typing import Optional, List, Literal
from pydantic import BaseModel, Field

# === Level 1 Categories ===
EditTypeLevel1 = Literal["Content / Meaning Errors", "Form / Fluency Errors"]

# === Level 2 Categories ===
EditTypeLevel2 = Literal[
    # Content / Meaning Errors
    "Omission", "Addition", "Substitution",
    # Form / Fluency Errors
    "Coherence and Structural Issues", "Syntactic Errors", "Stylistic Errors"
]

# === Level 3 Subtypes ===
EditTypeLevel3 = Literal[
    # Omission (under Content / Meaning Errors)
    "Essential Omission", "Contextual Omission",

    # Addition (under Content / Meaning Errors)
    "Unnecessary Expansion", "Factual Hallucination", "Repetitive Addition",

    # Substitution (under Content / Meaning Errors)
    "Lexical Inaccuracy / Semantic Drift", "Factual Distortion", "Lack of Simplicity / Lexical Complexity", "Coreference / Anaphora Resolution",

    # Coherence and Structural Issues (under Form / Fluency Errors)
    "Awkward Phrasing", "Bad Structure / Split",

    # Syntactic Errors (under Form / Fluency Errors)
    "Subject-Verb Agreement Error", "Tense Inconsistency", "Punctuation Errors",

    # Stylistic Errors (under Form / Fluency Errors)
    "Genre / Tone Misalignment"
]

# === Orthogonal Dimension Types ===
SeverityType = Literal["Minor", "Major", "Critical"]
ScopeType = Literal["Word", "Phrase", "Clause", "Sentence"]
DomainSensitivityType = Literal["Generic", "Domain-Specific"]
FactualDependenceType = Literal["Requires External Knowledge", "Self-Contained"]
PolaritySwitchType = Literal["Polarity Switch", None]
SimplificationDirectionType = Literal["Too Complex", "Too Simple", None]

class OrthogonalData(BaseModel):
    severity: SeverityType
    scope: ScopeType
    domain_sensitivity: Optional[DomainSensitivityType] = None
    factual_dependence: Optional[FactualDependenceType] = None
    polarity_switch: Optional[PolaritySwitchType] = None
    simplification_direction: Optional[SimplificationDirectionType] = None


class EditHierarchical(BaseModel):
    edit_type_level1: EditTypeLevel1
    edit_type_level2: EditTypeLevel2
    edit_type_level3: Optional[EditTypeLevel3] = None
    input_text: List[str] = Field(description="Words in the complex (input) sentence affected by the edit")
    output_text: List[str] = Field(description="Words in the simplified (output) sentence affected by the edit")
    orthogonal_data: OrthogonalData


class DataModelHierarchicalTaxonomy_MB(BaseModel):
    source: str = Field(description="The original complex sentence")
    target: str = Field(description="The simplified sentence")
    edits: List[EditHierarchical] = Field(description="List of identified content/meaning and fluency/form errors according to the hierarchical taxonomy")

In [34]:
prompt_mb2025 = open("prompts_LLM_annotations/SemiComplex_Prompt_CoT_MB2025.txt", "r").read()
validation_prompt_mb2025 = open("prompts_LLM_annotations/SemiComplex_Prompt_ValidationStep_MB2025.txt", "r").read()


prompt_mb2025

'You are an expert text simplification analyst using the following framework for a research project. \nYour task is to analyze the differences between an original sentence and its simplified version. Utmost careful work is paramount here.\n\nYou are first given some information on the Framework to be used for Text Simplification Error Annotations:\n\n<Framework_Information>\nThe following framework is a comprehensive method for evaluating errors in LLM-generated text simplifications.\nIt provides a structured approach to annotating and analyzing erroneous changes made between an original text and its simplified version.\nThe taxonomy is designed to be domain-agnostic and captures both content/meaning errors and fluency/form errors, while allowing for adjustments to the level of detail through its hierarchical structure (3 levels).\n\nThe framework recognizes the following primary error types and sub-types:\n1. Content / Meaning Errors (Errors impacting factual content, semantics, or me

#### Actual Execution

In [None]:
import pandas as pd

df = pd.read_csv("../data/TS_datasets/taxonomy_validation_subset_n50.csv", encoding='mac_roman')

In [None]:
# list of models to use as annotators
models = [
    "gpt-4o-latest", 
    "claude-3-5-sonnet-20241022",
    "localLLAMA" #via LM STUDIO
]

taxID_for_filename = "MB2025"

# Call the function (adjust the prompt strings and datamodel as needed)
# (DISABLED AFTER SUCCESSFUL EXECUTION)

# annotations = generate_semicomplex_annotations_for_csv_multi_model(
#     csv_file_path="../data/TS_datasets/taxonomy_validation_subset_n50.csv",
#     output_dir="../data/salsa_annotations/LLM_annotations_output",
#     models=models,
#     num_samples=None,        
#     seed=42,
#     prompt = prompt_mb2025,
#     validation_prompt = validation_prompt_mb2025,
#     datamodel_validationstep = DataModelHierarchicalTaxonomy_MB,
#     n_fewshot_samples=0, # DO NOT USE FEWSHOT when using a custom taxonomy !
#     taxName_for_filename = taxID_for_filename
# )