In [5]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

model = SentenceTransformer('all-MiniLM-L6-v2')
jargon_dict = ["neural network", "backpropagation", "gradient descent"]  # Your domain list
text = "The RNN uses backpropagation for training, and use gradient desc."

# Embed
text_tokens = [w for w in text.split() if len(w) > 2]
text_embeds = model.encode(text_tokens)  # Chunk text
jargon_embeds = model.encode(jargon_dict)

# Fuzzy match
sims = cosine_similarity(text_embeds, jargon_embeds)
matches = np.where(sims > 0.70)  # Threshold for fuzzy jargon

jargon_matches = sorted({jargon_dict[i] for i in matches[1]})
print("Detected jargon:", jargon_matches)

Detected jargon: ['backpropagation', 'gradient descent']


In [None]:
import json
import os
from openai import OpenAI

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY environment variable is required.")

client = OpenAI(api_key=api_key)

def replace_jargon_with_llm(text: str, jargon_terms: list[str]) -> tuple[str, dict[str, str]]:
    """Use an OpenAI model to rewrite text with simplified jargon."""
    replacements = {
        "RNN": "Recurrent Neural Net",
        "neural network": "artificial intelligence model",
        "backpropagation": "error correction process",
        "gradient descent": "iterative optimization step",
    }    
    if not jargon_terms:
        return text, {}
    system_prompt = (
        "You simplify technical jargon. Replace only the listed jargon terms "
        "with the alternatives listed within 'replacements' while keeping "
        "without changes the original sentence that should "
        "not be modified by the jargons replacement."
    )
    user_prompt = (
        "Original text:\n"
        f"{text}\n\n"
        "Jargon terms to replace (provide alternatives for each):\n"
        f"{jargon_terms}\n\n"
        "Return valid JSON with keys 'replacements' (mapping jargon to the new phrase) "
        "and 'rewritten_text' (the updated text without modifying the text that wasn't " 
        "changed). Do not include Markdown fences."
    )
    response = client.responses.create(
        model="gpt-4o-mini",
        input=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        #response_format={"type": "json_object"},
        temperature=0.2,
    )
    raw_output = response.output[0].content[0].text
    try:
        payload = json.loads(raw_output)
    except json.JSONDecodeError as exc:
        raise ValueError(f"Model returned invalid JSON: {raw_output}") from exc
    rewritten = payload.get("rewritten_text", text)
    #replacements = payload.get("replacements", {})
    return rewritten, replacements

rewritten_text, suggested_replacements = replace_jargon_with_llm(text, jargon_matches)
print("Suggested replacements:", suggested_replacements)
print("Rewritten text:", rewritten_text)

Suggested replacements: {'RNN': 'Recurrent Neural Net', 'neural network': 'artificial intelligence model', 'backpropagation': 'error-correction process', 'gradient descent': 'iterative optimization step'}
Rewritten text: The RNN uses error correction method for training, and uses step-by-step optimization.
