Approaches:

1. Generic prompting of an Ollama Model (llama3.2:3B, qwen3:8B, Gemma3:12b, Gpt-oss:20b)
2. Use a basic react agent (Test this also with a larger model - gemini-2.5-flash)
3. Use a react agent + tool (Test this also with a larger model - gemini-2.5-flash)
4. Use a react agent + 2 tools (Test this also with a larger model - gemini-2.5-flash)

In [None]:
#All imports 
# Requires: Ollama, Transformers, Evaluate, Smolagents
# OpenRouter API Key if you want to explore non-local models. 
import json
import ollama

from sklearn.metrics import f1_score,root_mean_squared_error
from transformers import pipeline,AutoTokenizer
import evaluate
import numpy as np
import pandas as pd
import json

#for smolagents: 
from smolagents import tool, LiteLLMModel, OpenAIServerModel, ToolCallingAgent

In [None]:
#Function to write the output file after generating them:
"""
Submissions must be provided as .jsonl files. 
Each line in the file should contain a JSON object with the following fields:

text_id: the identifier of the corresponding entry in the test dataset
simplified: the system-generated simplification of the entry, targeting the specified target_cefr level
Further instructions will be provided for the final submissions once the test set is released.
"""
def write_output(text_ids, simplified_texts, outputfilepath):
    #WRITE THE OUTPUTS INTO A JSONL file
    data = [{"text_id": item1, "simplified": item2} for item1, item2 in zip(text_ids, simplified_texts)]
 
    # Write to .jsonl file
    with open(outputfilepath, "w") as f:
        for item in data:
            json_line = json.dumps(item)
            f.write(json_line + "\n")
    print("Wrote output to: ", outputfilepath)

In [None]:
#EVERYTHING THAT NEEDS TO CHANGE EXISTS HERE - SEE CAREFULLY!

#Ollama Model of Choice: CHANGE AS NEEDED. 
ollama_model = "qwen3:latest" #"gpt-oss:20b"
litellmmodel = "ollama_chat/qwen3:latest"
smolagentmodel = LiteLLMModel(model_id=litellmmodel, api_base="http://127.0.0.1:11434", num_ctx=8192)
#Models explored or to be explored: Gemma2:9B, Gemma3:12B, Qwen2.5:7B, Qwen3:8B, LLama3.2, Gptoss20b

#If using the openrouter models - this is only for smolagent:
"""
smolagentmodel = OpenAIServerModel(
    model_id="google/gemini-2.5-flash",
    api_base="https://openrouter.ai/api/v1", # Leave this blank to query OpenAI servers.
    api_key="", # your API key
    )
"""

#Keeping this unchanged for all the reported results in the final task description paper
system_prompt = """You are an expert text simplifier. Your task is to rewrite the provided text 
                    at a given Target CEFR English proficiency level.

The simplified text must retain the core meaning and intent of the original 
while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level.

Provide only the simplified text, without any additional commentary, explanations, or labels.
                """

#OUTPUT FILE PATHS: Changes each time we run with a new model.
outputpath_basic = "outputs/output_paper_promptonly_qwen3.jsonl"
outputpath_agent_notools = "outputs/output_paper_basicagent_qwen3.jsonl"
outputpath_agent_toolcall = "outputs/output_paper_withtoolcall_qwen3.jsonl"
outputpath_agent_twotools = "outputs/output_paper_with2tools_qwen3.jsonl"

In [None]:
#Load the test data
file_path = 'tsar2025_test_withrefs.jsonl'

#Read all the data:
data = []

with open(file_path, 'r') as f:
    for line in f:
        data.append(json.loads(line))

#store all the required fields. 
original_texts = []
reference_texts = []
target_cefr_levels = [] 
text_ids = []
for i in range(0,len(data)):
    original_texts.append(data[i]['original'])
    target_cefr_levels.append(data[i]['target_cefr'].upper())
    text_ids.append(data[i]['text_id'])
    #reference_texts.append(data[i]['reference'])

In [None]:
#1. Generic prompting, simplify with a local Ollama model

simplified_texts = []

for i in range(0,len(data)):
    message = system_prompt + "Here is the original text: " \
    + original_texts[i]  + " \n Here is the target cefr level: " \
    + target_cefr_levels[i] + " \n Generate the simplified text now." 
    
    response = ollama.chat(model=ollama_model, messages=[{'role': 'user', 'content': message}])
    required_output = response.message.content.strip()
    if "qwen3" in ollama_model:
        required_output = required_output.split("</think>")[1].strip()
    simplified_texts.append(required_output)

#Write a output file in the given format:
write_output(text_ids, simplified_texts, outputpath_basic)

In [None]:
#2. With smolagents, without any tool calling:  

"""
smolagentmodel = OpenAIServerModel(
    model_id="google/gemini-2.5-pro",
    api_base="https://openrouter.ai/api/v1", # Leave this blank to query OpenAI servers.
    api_key="", # Switch to the API key for the server you're targeting.
    )
"""
agent = ToolCallingAgent(model=smolagentmodel, tools=[])
simplified_texts_agents = []
for i in range(len(data)):
    message = system_prompt + "Here is the original text: " \
    + original_texts[i]  + " \n Here is the target cefr level: " \
    + target_cefr_levels[i] + " \n Generate the simplified text now." 
    output = agent.run(message)
    simplified_texts_agents.append(output)
    
#Write an output file in the given format
write_output(text_ids, simplified_texts_agents, outputpath_agent_notools)

In [None]:
# Define tools: check cefr, meaningbert score
cefr_labeler1 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-doc_en-Cefr" )
cefr_labeler2 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-doc_sent_en-Cefr")
cefr_labeler3 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-reference_AllLang2-Cefr2")
models=[cefr_labeler1,cefr_labeler2,cefr_labeler3]
meaning_bert = evaluate.load("davebulaval/meaningbert")

@tool
def check_cefr_level(inputtext : str) -> str:
    """
    Checks the CEFR level for the given text using Barayan's CEFR classifiers
    Args:
        inputtext: The text for which you want to know the CEFR level. 
    Returns:
        str: The CEFR level - one of A1, A2, B1, B2, C1, C2.
    """
    top_preds = (model(inputtext)[0] for model in models)
    best = max(top_preds, key=lambda d: d["score"])
    return best["label"]

@tool
def get_meaningbert_score(prediction: str, original:str) -> str:
  """
  Checks the meaning similarity between the generated output and the original 
  Args:
      prediction: generated simplified text from the LLM
      original: original text given to the LLM
  Returns:
      str: the meaning bert model score as a string. The closer to 1.0 the better. 
  """
  result=[]
  score = meaning_bert.compute(predictions=[prediction], references=[original])
  return str(round(np.mean(score["scores"][0])/100, 4))


In [None]:
#3. Use the CEFR classifier as a tool so that the agent can verify its generated text.

#Now the prompting begins:

system_prompt = """You are an expert text simplifier. Your task is to rewrite the provided text 
                    at a given Target CEFR English proficiency level.

The simplified text must retain the core meaning and intent of the original 
while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level.

Provide only the simplified text, without any additional commentary, explanations, or labels.

You can verify your output using the check_cefr_level() tool, which can help you verify the cefr level of your
generated text.
                """

agent = ToolCallingAgent(model=smolagentmodel, tools=[check_cefr_level])
simplified_texts_agents_tools = []
for i in range(len(data)):
    #print(data[i])
    orig = data[i]['original']
    tar_cefr = data[i]['target_cefr'].upper()
    #tar_desc = level_disc[tar_cefr]
    message = system_prompt + "Here is the original text: " \
                + original_texts[i]  + " \n Here is the target cefr level: " \
                + target_cefr_levels[i] + " \n Generate the simplified text now." 
    try:
        output = agent.run(message)
    except: #quick fix for generation errors if any. 
        output = orig
        continue
    
    simplified_texts_agents_tools.append(output)

#Write a output file in the given format:
write_output(text_ids, simplified_texts_agents_tools, outputpath_agent_toolcall)

In [None]:
# Use both meaningbert and cefr predictor as tools:

sysprompt = """You are an expert text simplifier. Your task is to rewrite the provided text 
                    at a given Target CEFR English proficiency level.

The simplified text must retain the core meaning and intent of the original 
while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level.

Provide only the simplified text, without any additional commentary, explanations, or labels.

You can verify your output using the check_cefr_level() tool, which can help you verify the cefr level of your
generated text, and get_meaningbert_score() tool which helps you check the meaning similarity 
between the original text and the generated simplification"""

agent = ToolCallingAgent(model=smolagentmodel, tools=[check_cefr_level, get_meaningbert_score])
simplified_texts_agents_tools = []
for i in range(len(data)):
    #print(data[i])
    orig = data[i]['original']
    tar_cefr = data[i]['target_cefr'].upper()
    #tar_desc = level_disc[tar_cefr]
    message = system_prompt + "Here is the original text: " \
                + original_texts[i]  + " \n Here is the target cefr level: " \
                + target_cefr_levels[i] + " \n Generate the simplified text now." 
    output = agent.run(message)
    simplified_texts_agents_tools.append(output)

#Write a output file in the given format:
write_output(text_ids, simplified_texts_agents_tools, outputpath_agent_twotools)

