Uses DSpy React/Cot agents and React+CEFR verification tool.
Evaluated: gemma3:12b, Gpt-oss:20b - Local models, Gemini-2.5-Flash - OpenRouter



In [None]:
import json
import dspy
import numpy as np
import pandas as pd
import os
from transformers import pipeline


In [None]:
#Function to write the output file after generating them:
"""
Submissions must be provided as .jsonl files. 
Each line in the file should contain a JSON object with the following fields:

text_id: the identifier of the corresponding entry in the test dataset
simplified: the system-generated simplification of the entry, targeting the specified target_cefr level
Further instructions will be provided for the final submissions once the test set is released.
"""
def write_output(text_ids, simplified_texts, outputfilepath):
    #WRITE THE OUTPUTS INTO A JSONL file
    data = [{"text_id": item1, "simplified": item2} for item1, item2 in zip(text_ids, simplified_texts)]
 
    # Write to .jsonl file
    with open(outputfilepath, "w") as f:
        for item in data:
            json_line = json.dumps(item)
            f.write(json_line + "\n")
    print("Wrote output to: ", outputfilepath)

In [None]:
#Read the data
file_path = 'tsar2025_test_withrefs.jsonl'

#Read all the data:
data = []

with open(file_path, 'r') as f:
    for line in f:
        data.append(json.loads(line))
                
text_ids = [item['text_id'] for item in data]
originals = [item['original'] for item in data]
target_cefrs = [item['target_cefr'] for item in data]
references = [item['reference'] for item in data]

In [None]:
#Define a basic simplification with Dspy

#CHANGE AS NEEDED HERE
#lm = dspy.LM("ollama_chat/gpt-oss:20b", api_base="http://localhost:11434", api_key="")


#to use one of the larger models via openrouter:
lm = dspy.LM(
    model="openrouter/google/gemini-2.5-flash",
    api_base="https://openrouter.ai/api/v1",
    api_key='',
)


dspy.configure(lm=lm)

#Define the actual class signature
class Simplify(dspy.Signature):
    "Rewrite the given English text into a simpler language to suit the given target CEFR level. \
    The simplified text must retain the core meaning and intent of the original \
    while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level."
    source_text = dspy.InputField()
    cefr_level = dspy.InputField()
    simplified_text = dspy.OutputField()

#Use that to do the task using CoT pipeline. 
class Simplify_COT(dspy.Module):
    "Rewrite the given English text into a simpler language to suit the given target CEFR level. \
    The simplified text must retain the core meaning and intent of the original \
    while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level."
    def __init__(self):
        super().__init__()
        self.prog = dspy.ChainOfThought(Simplify)
    
    def forward(self, source, cefr):
        return self.prog(source_text=source, cefr_level=cefr)
    
#Use that to do the task using react pipeline. 
class Simplify_React(dspy.Module):
    "Rewrite the given English text into a simpler language to suit the given target CEFR level. \
    The simplified text must retain the core meaning and intent of the original \
    while using vocabulary, sentence structures, and grammar that are appropriate for the given CEFR level."
    def __init__(self):
        super().__init__()
        self.prog = dspy.ReAct(Simplify, tools=[])
    
    def forward(self, source, cefr):
        return self.prog(source_text=source, cefr_level=cefr)


#Defining the CEFR checker tool: 
cefr_labeler1 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-doc_en-Cefr" )
cefr_labeler2 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-doc_sent_en-Cefr")
cefr_labeler3 = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-reference_AllLang2-Cefr2")
models=[cefr_labeler1,cefr_labeler2,cefr_labeler3]

def check_cefr_level(inputtext : str):
    """
    Checks the CEFR level for the given text using Barayan's CEFR classifiers
    Args:
        inputtext: The text for which you want to know the CEFR level. 
    Returns:
        str: The CEFR level - one of A1, A2, B1, B2, C1, C2.
    """
    top_preds = (model(inputtext)[0] for model in models)
    best = max(top_preds, key=lambda d: d["score"])
    return best["label"]

#Use that tool to do the task using react pipeline. 
class Simplify_React_CheckCEFR(dspy.Module):
    def __init__(self):
        super().__init__()
        self.prog = dspy.ReAct(Simplify, tools=[check_cefr_level])
    
    def forward(self, source, cefr):
        return self.prog(source_text=source, cefr_level=cefr)

In [None]:
# Simplify with Dspy:
simplified_texts = []
c = Simplify_COT()   
for i in range(0,len(data)):
    simplified_texts.append(c.forward(originals[i], target_cefrs[i])['simplified_text'])
    print(i)
#Write a output file in the given format:
write_output(text_ids, simplified_texts, "output_dspybasic_paper_geminiflash.jsonl")

In [None]:
# Simplify with Dspy React:
simplified_texts = []
c = Simplify_React()   
for i in range(0,len(data)):
    simplified_texts.append(c.forward(originals[i], target_cefrs[i])['simplified_text'])
    print(i)
#Write a output file in the given format:
write_output(text_ids, simplified_texts, "output_dspyreact_paper_geminiflash.jsonl")

In [None]:
"""# Simplify with Dspy predict:
simplified_texts = []
c = Simplify_Predict()   
for i in range(0,len(data)):
    simplified_texts.append(c.forward(originals[i], target_cefrs[i])['simplified_text'])
    #print(i)
#Write a output file in the given format:
write_output(text_ids, simplified_texts, "output_dspypredict_paper_gemma312b.jsonl")
"""

In [None]:
#Use the CEFR classifier as a tool so that the agent can verify its generated text.
simplified_texts = []
c = Simplify_React_CheckCEFR()   
for i in range(0,len(data)):
    simplified_texts.append(c.forward(originals[i], target_cefrs[i])['simplified_text'])
    print(i)
#Write a output file in the given format:
write_output(text_ids, simplified_texts, "output_dspyreacttool_paper_geminiflash.jsonl")