In [91]:
import dspy
from dotenv import load_dotenv

load_dotenv()

True

In [92]:
# Parameters for simulation (IVs)
n_abstracts = 10
length_abstracts = 500
typicality = 1
degree_jargon = .90
llm_temperature = .7
stimulus = {
    'inclusion_criteria': 'We included WD  [Wilson disease] patients of any age or stage. The study drug had to be one of four established therapies, namely DPen [copper chelators D‐penicillamine], trientine, TTM [etrathiomolybdate] or Zn [zinc salts]. The control could be placebo [...]. Concomitant therapies had to be identical in the compared treatment arms (eg trientine plus Zn vs TTM plus Zn).  We included studies that reported all‐cause mortality, orthotopic liver transplantation (OLT), neurological symptoms (eg dystonia, dysarthria, cognitive decline, drooling, tremor, gait disturbance, chorea, seizure, psychosis), liver‐related symptoms (eg icterus, ascites, steatosis, fibrosis, mild hepatitis, acute liver failure, cirrhosis, serum transaminases), adverse effects (eg dermatological manifestations, nephrotoxicity, pulmonary toxicity, autoimmune disorders, anaemia, neutrophilic agranulocytosis, thrombocytopenia, hypothyroidism, liver dysfunction, colitis, status dystonicus, myasthenia gravis, arthropathy, macromastia, early neurological deterioration, gastrointestinal irritation), and frequency of treatment discontinuation (ie switching to another drug, stopping or changing the treatment). We included prospective and retrospective studies, including randomized, non‐randomized controlled trials and comparative observational studies that were written in English, German, Dutch, French, Spanish or Portuguese.  No publication date restrictions were applied.',
    'exclusion_criteria': 'The control could be placebo but no treatment or any other treatment that does not include the respective study drug (eg Zn [zinc salts] vs trientine was allowed, Zn 50 mg vs Zn 100 mg was not allowed). Comparisons between monotherapy and combination therapy regimens that included the respective monotherapy drug (eg DPen [copper chelators D‐penicillamine] plus Zn vs Zn) have been analysed elsewhere and were not considered any further here. Animal studies, case reports, case series, cross‐sectional studies, before‐after studies, reviews, letters, abstract‐only publications, editorials,  diagnostic or other testing studies and non‐controlled studies were excluded.'
}

In [93]:
### Create signature ###
lm = dspy.LM("openai/gpt-4o-mini", temperature=0.7, cache=False)
dspy.configure(lm=lm)


class ListOfJargonTerms(dspy.Signature):
    """Generate a list of jargon terms based on the inclusion/exclusion criteria."""
    label_relevant: int = dspy.InputField(desc="1 for an example of an abstract and title relevant to the review; 0 for an example of an abstract and title irrelevant to the review")
    criteria: str = dspy.InputField(desc="The inclusion or exclusion criteria of the review")
    length_abstracts: int = dspy.InputField(desc="The number of words that the generated abstract should exactly contain.")
    jargon: str = dspy.OutputField(desc='One-line JSON object: {"jargon_terms":"term1, term2, term3, ..."}')


class MakeAbstract(dspy.Signature):
    """Generate a fake abstract based on search terms and whether it should be included or not."""
    label_relevant: int = dspy.InputField(desc="1 for an example of an abstract and title relevant to the review; 0 for an example of an abstract and title irrelevant to the review")
    criteria: str = dspy.InputField(desc="The inclusion or exclusion criteria of the review")
    length_abstracts: int = dspy.InputField(desc="The number of words that the generated abstract should exactly contain.")
    normal_abstract: str = dspy.OutputField(desc='One-line JSON object: {"doi":"None","title":"...","abstract":"...","label_included":"1/0","reasoning":"..."}')
 
    
class MyChainOfThought(dspy.Module):
    """Chain of thought prompting module."""
    def __init__(self):
        self.cot1 = dspy.ChainOfThought(MakeAbstract)
        self.cot2 = dspy.ChainOfThought(ListOfJargonTerms)
        self.degree_jargon = 0  # Default value

    def forward(self, label_relevant, criteria, length_abstracts, degree_jargon=degree_jargon):

        if degree_jargon == 0:
            return self.cot1(
                label_relevant=label_relevant, 
                criteria=criteria, 
                length_abstracts=length_abstracts).normal_abstract
        elif degree_jargon == 1:
            return self.cot2(
                label_relevant=label_relevant, 
                criteria=criteria, 
                length_abstracts=length_abstracts).jargon


In [94]:
model = MyChainOfThought()

output = model(
    degree_jargon=1,
    label_relevant=1,
    criteria=stimulus['inclusion_criteria'],
    length_abstracts=100,
)

In [95]:
lm.inspect_history(n=2)





[34m[2025-10-28T18:17:06.092434][0m

[31mSystem message:[0m

Your input fields are:
1. `label_relevant` (int): 1 for an example of an abstract and title relevant to the review; 0 for an example of an abstract and title irrelevant to the review
2. `criteria` (str): The inclusion or exclusion criteria of the review
3. `length_abstracts` (int): The number of words that the generated abstract should exactly contain.
Your output fields are:
1. `reasoning` (str): 
2. `jargon` (str): One-line JSON object: {"jargon_terms":"term1, term2, term3, ..."}
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## label_relevant ## ]]
{label_relevant}

[[ ## criteria ## ]]
{criteria}

[[ ## length_abstracts ## ]]
{length_abstracts}

[[ ## reasoning ## ]]
{reasoning}

[[ ## jargon ## ]]
{jargon}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Generate a list of jargon terms based on the inclusion/exclusion criteria.

In [96]:
print(output)

{"jargon_terms":"Wilson disease, DPen, trientine, TTM, Zn, orthotopic liver transplantation, neurological symptoms, liver-related symptoms, adverse effects, prospective studies, retrospective studies, randomized controlled trials, comparative observational studies"}


In [97]:
make_abstract = dspy.ChainOfThought(MakeAbstract)

#generate relevant abstract
relevant = make_abstract(
    label_relevant=1,
    criteria = stimulus['inclusion_criteria'],
    length_abstracts=length_abstracts,
    typicality=typicality,
    degree_jargon=degree_jargon
)

#generate irrelevant abstract
irrelevant = make_abstract(
    label_relevant=0,
    criteria = stimulus['exclusion_criteria'],
    length_abstracts=length_abstracts,
    typicality=typicality,
    degree_jargon=degree_jargon
)
    

In [98]:
dspy.inspect_history()





[34m[2025-10-28T18:17:17.723486][0m

[31mSystem message:[0m

Your input fields are:
1. `label_relevant` (int): 1 for an example of an abstract and title relevant to the review; 0 for an example of an abstract and title irrelevant to the review
2. `criteria` (str): The inclusion or exclusion criteria of the review
3. `length_abstracts` (int): The number of words that the generated abstract should exactly contain.
Your output fields are:
1. `reasoning` (str): 
2. `normal_abstract` (str): One-line JSON object: {"doi":"None","title":"...","abstract":"...","label_included":"1/0","reasoning":"..."}
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## label_relevant ## ]]
{label_relevant}

[[ ## criteria ## ]]
{criteria}

[[ ## length_abstracts ## ]]
{length_abstracts}

[[ ## reasoning ## ]]
{reasoning}

[[ ## normal_abstract ## ]]
{normal_abstract}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Gene

In [99]:
relevant.completions[0]

Prediction(
    reasoning="The abstract provided is relevant to the review as it discusses a study involving Wilson disease patients treated with established therapies, which aligns with the inclusion criteria outlined. The focus on mortality, liver transplantation, neurological symptoms, and adverse effects also corresponds with the elements specified in the criteria, ensuring that the content is pertinent to the review's objectives.",
    normal_abstract='{"doi":"None","title":"Therapeutic Outcomes in Wilson Disease: A Comparative Study of Established Treatments","abstract":"This study evaluates the therapeutic outcomes in patients diagnosed with Wilson disease, focusing on various established treatment options including D-penicillamine, trientine, tetrathiomolybdate, and zinc salts. We conducted a comprehensive analysis involving patients of all ages and stages of the disease, reporting on critical outcomes such as all-cause mortality, rates of orthotopic liver transplantation, and 