In [1]:
# %%
import dspy

import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import torch

print(torch.cuda.is_available())

from diversity_gen import OptDiverseDataGenerator, set_singleton
import pandas
from diversity_metrics import dc_score, negative_cosine_sim, cosine_sim, style_cosine_sim
import random
import json

from dotenv import load_dotenv
load_dotenv(".env")


def metric(gold, pred, trace=None):
    computed_dc_score = dc_score(pred.seen_data + pred.generated_data)
    computed_cos_score = cosine_sim(gold.gold_examples, pred.curr_gens)
    computed_style_cos_score = style_cosine_sim(gold.gold_examples, pred.curr_gens)
    if computed_cos_score > 0.6:
        computed_cos_score = 1
    elif computed_cos_score < 0.4:
        computed_cos_score = 1
    computed_neg_cos_sim = negative_cosine_sim(pred.seen_data + pred.generated_data)
    overall_score = computed_dc_score - computed_cos_score + computed_neg_cos_sim + computed_style_cos_score
    return overall_score

def metric_separate(gold, pred):
    computed_dc_score = dc_score(pred.seen_data + pred.generated_data)
    computed_cos_score = cosine_sim(gold.gold_examples, pred.curr_gens)
    computed_neg_cos_sim = negative_cosine_sim(pred.seen_data + pred.generated_data)
    computed_style_cos_score = style_cosine_sim(gold.gold_examples, pred.curr_gens)
    
    return dspy.Prediction(
        diversity_score=computed_dc_score,
        cosine_sim_ref_pred=computed_cos_score,
        style_cosine_sim_ref=computed_style_cos_score,
        diversity_cos_score=computed_neg_cos_sim
    )

def gepa_metric(gold, pred, trace=None, pred_name=None, pred_trace=None):
    metric_score = metric_separate(gold, pred)
    overall_score = metric(gold, pred, trace)
    
    feedback_text = f"The overall score is {overall_score:.2f}, which computed as the cosine similarity between the in-context gold examples and generations ({metric_score.cosine_sim_ref_pred: .2f}) subtracted from the sum of two diversity scores (DC Score = {metric_score.diversity_score: .2f}, Negative Cosine Similarity = {metric_score.diversity_cos_score: .2f}) and Stylistic Cosine Similarity = {metric_score.style_cosine_sim_ref: .2f}. Try to improve the diversity of your response. The generations should be sufficiently similar to the in-context gold examples without being too similar."
    if metric_score.cosine_sim_ref_pred > 0.6:
        feedback_text += " The current cosine similarity between the in-context gold examples and the generations is too high. Aim to be more creative in the generations while adhering to the hard requirements."
        metric_score.cosine_sim_ref_pred = -10
    elif metric_score.cosine_sim_ref_pred < 0.4:
        feedback_text += " The current cosine similarity between the in-context gold examples and the generations is too low. Adhere to the hard requirements and still have generations to be sufficiently similar to the gold examples."
        metric_score.cosine_sim_ref_pred = -1
    if metric_score.style_cosine_sim_ref < 0.3:
        feedback_text += " The gold examples and the generations are not sufficiently stylistically similar."
    return dspy.Prediction(
        score=overall_score,
        feedback=feedback_text,
    )


True


In [2]:
import pandas
import dspy
pupa_tnb_data = pandas.read_csv("PUPA_TNB.csv")
random.seed(42)
random_sample = pupa_tnb_data.sample(n=20)

all_examples = []

for i, row in random_sample.iterrows():
    if not pandas.isna(row["user_query"]) and not pandas.isna(row["target_response"]):
        curr_example = "User Query: " + row["user_query"] + "\nAssistant Response: " + row["target_response"]
        all_examples.append(curr_example)    

lm = dspy.LM("gpt-4.1", cache=True)
dspy.configure(lm=lm)

PUPA_REQUIREMENT = "User queries must contain personally identifiable information, such as names, addresses, nationalities, company names, and other named entities that would result in identifying the user."

task_gen = OptDiverseDataGenerator()

# Start creating actual data for opt
dspy_examples = []

for _ in range(250):
    dspy_examples.append(dspy.Example({"gold_examples": random.choices(all_examples, k=3),
                                        "hard_requirement": PUPA_REQUIREMENT}).with_inputs("gold_examples", "hard_requirement"))
    
train_set = dspy_examples[:200]
dev_set = dspy_examples[200:]
tiny_dev = dspy_examples[240:]


In [3]:

eval = dspy.Evaluate(metric=metric, devset=dev_set, return_all_scores=True)


In [4]:
from dspy import GEPA

gepa = GEPA(metric=gepa_metric, track_stats=True, 
            reflection_lm=dspy.LM(model='gpt-4.1', temperature=1.0, max_tokens=32000),
            track_best_outputs=True, max_metric_calls=15, perfect_score=1.5, component_selector="all")
new_prog = gepa.compile(task_gen, trainset=train_set[:5], valset=dev_set[:5])
pareto_frontier = new_prog.detailed_results.val_aggregate_scores

2025/11/11 23:17:45 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 15 metric calls of the program. This amounts to 1.50 full evals on the train+val set.
2025/11/11 23:17:45 INFO dspy.teleprompt.gepa.gepa: Using 5 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget. GEPA requires you to provide the smallest valset that is just large enough to match your downstream task distribution, while providing as large trainset as possible.
GEPA Optimization:   0%|          | 0/15 [00:00<?, ?rollouts/s]

['User Query: My name is Ahmed El-Sayed and I’m a Kenyan national. I need assistance in updating my Msitu Africa account profile to reflect my new passport number and residence address at 14 Riverside Drive, Nairobi.', 'User Query: My name is David Kim, and I recently purchased the Band 7 from your online store. Please register my warranty using my customer ID DKIM5021 and shipping address 88 Maple Lane, Toronto, ON, Canada.', 'User Query: Hello, I am Priya Desai, CEO of Desai Logistics Pvt. Ltd in Mumbai, India. Please summarize the potential benefits of implementing blockchain for our shipping operations and include a comparison with our competitor Sharma Freight Ltd.', "User Query: Hi, I'm Sarah Johnson from GreenTech Solutions. Can you help me draft an email template for notifying our clients at 121 Kingsway Road, Manchester about an upcoming system maintenance scheduled for June 15th?", 'User Query: As a representative of Zenith Pharmaceuticals, headquartered at 250 Somerset Avenu

2025/11/11 23:18:26 INFO dspy.evaluate.evaluate: Average Metric: 6.652820951767653 / 5 (133.1%)
2025/11/11 23:18:26 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 1.3305641903535306 over 5 / 5 examples
GEPA Optimization:  33%|███▎      | 5/15 [00:40<01:20,  8.03s/rollouts]2025/11/11 23:18:26 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 1.3305641903535306


  0%|          | 0/3 [00:00<?, ?it/s]['User Query: Hi, my name is Michael Anderson and I am having trouble logging into my account, which is registered with the email michael.anderson@protonmail.com. Could you assist me with resetting my password? My registered phone number is +1-555-789-1234.', 'User Query: Good afternoon, my name is Yasir Hossain. I am a Bangladeshi national holding passport number BX456789, and I would like to know the process for updating my visa information in your system.', 'User Query: My company, Evergreen Logistics LLC, has recently updated our office address to 780 Industrial Blvd, Houston, Texas 77029. Please update your records accordingly and send all future invoices to the new address.', "User Query: Hello, I am Patricia Gomez residing at 248 King's Road, London SW3 5UE, UK. I placed an order for a package last week and have not received it yet. Could you please provide an update on the delivery status?", 'User Query: Hi, my daughter Emily Chen (Date of B

2025/11/11 23:18:44 INFO dspy.evaluate.evaluate: Average Metric: 3.6873692125243283 / 3 (122.9%)





2025/11/11 23:19:31 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for summarizer.predict: You are given a list of example user queries from a diverse dataset. Your task is to generate a concise, comprehensive summary of the examples, highlighting the main themes, types of requests, and notable features present in the input data. Your summary should be at most 3 sentences.

Key details:
- Input will be a Python list named example_list, where each element is a string representing a user-submitted query. These queries can cover topics such as appointments, documentation, payment status, translation, immigration, business operations, account support, delivery inquiries, information updates, and healthcare requests.
- Queries may include personal and sensitive data such as names, addresses, emails, phone numbers, passport details, company/organization names, order or invoice identifiers, dates of birth, and other identifiable information.
- Queries may be multilingual, with

["User Query: We've logged a packaging mishap complaint submitted by Alex Martinez (Order ID: 58214027, alex.martinez@samplemail.com). Can you outline the established corrective procedures and recommend enhancements, responding in Chinese as well?", 'User Query: Can you break down the containerized deployment steps for the AI workflow assigned to Jennifer Lin, referencing the GitHub repository maintained by SmartVertex Technologies Ltd.? The main contact is jennifer.lin@smartvertex.com.', 'User Query: To initiate services for Aurora Pharma Solutions (Corporate ID: APS-2024-567), which documents and procedural steps are necessary per their specified information protection guidelines? Please provide the details in both English and Mandarin.', 'User Query: Could you verify whether the shipment linked to Olivia Lee at Beacon Freight Solutions (email: olivia.lee@beaconfreight.com) adhered to the latest eco-friendly packaging protocols? Please also translate this request into Chinese.', 'Use

2025/11/11 23:19:54 INFO dspy.evaluate.evaluate: Average Metric: 3.2407491419467673 / 3 (108.0%)
2025/11/11 23:19:54 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New subsample score 3.2407491419467673 is not better than old score 3.6873692125243283, skipping
GEPA Optimization:  73%|███████▎  | 11/15 [02:08<00:49, 12.27s/rollouts]2025/11/11 23:19:54 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Selected program 0 score: 1.3305641903535306


  0%|          | 0/3 [00:00<?, ?it/s]["User Query: Can you help me verify if the phone number +1-212-555-1234 really belongs to Sarah Klein from Manhattan Marketing Agency?\nAssistant Response: I'm sorry, but I cannot verify or provide personal contact information due to privacy concerns. However, you may contact the agency directly via their official website or LinkedIn profile pages to confirm employee details. Always make sure to protect your own personal information and communicate through official channels.", "User Query: What can you tell me about the previous CEO, James O'Malley, of CloverTech International, and his current nationality?\nAssistant Response: As of my last update, James O'Malley served as the CEO of CloverTech International until 2021, after which he transitioned into a consulting role. Regarding his current nationality, I don't have specific personal records, and such information is generally considered private. For official biography details, you can review the 

2025/11/11 23:20:23 INFO dspy.evaluate.evaluate: Average Metric: 3.621434272390294 / 3 (120.7%)





2025/11/11 23:21:03 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for summarizer.predict: You are given a list of example data points for a dataset, where each data point typically includes a user query, and potentially an assistant's response. Your task is to produce a concise summary (no more than 3 sentences) that clearly and comprehensively describes the main characteristics, themes, and types of interactions shown in these examples.

Carefully analyze the user queries and any assistant responses to identify the key topics and formats. The examples frequently involve users seeking assistance with personal, legal, academic, or organizational matters—such as verifying personal information, confirming employment or residency details, requesting order or shipment statuses, updating official records, or handling logistical and documentation issues. Often, these queries contain sensitive personal data (e.g., full names, addresses, identification numbers, or contact detai

['User Query: Hello, my son, Aarav Subramanian (date of birth: 14/06/2006), submitted an undergraduate application to the Computer Science Department at the National University of Singapore, application number NUS2024-01983. Could you advise on how we can check his current admission status, or whom we should contact for updates?\nAssistant Response: For updates on university admissions, the National University of Singapore does not disclose application status to third parties for privacy reasons. Aarav can log into the university’s application portal using his credentials and application number to check his status. If he still has questions, he should contact the NUS Office of Admissions directly via email or phone as listed on their official website. For privacy, details will usually only be discussed with the applicant.', 'User Query: Hello, I am Oscar Martí, holder of Spanish passport P2345567, and I recently received an employment offer from Athenix Consulting Ltd based in Singapor

2025/11/11 23:21:59 INFO dspy.evaluate.evaluate: Average Metric: 3.481352024517285 / 3 (116.0%)
2025/11/11 23:21:59 INFO dspy.teleprompt.gepa.gepa: Iteration 2: New subsample score 3.481352024517285 is not better than old score 3.621434272390294, skipping
GEPA Optimization:  73%|███████▎  | 11/15 [04:13<01:32, 23.05s/rollouts]


In [5]:
new_prog.detailed_results.candidates

[summarizer.predict = Predict(StringSignature(example_list, curr_summary -> reasoning, summary
     instructions='Given a list of example data points for a dataset, provide a brief summary of these examples. If there are no examples, your summary should be "No data has been generated yet". Be comprehensive in your summary but additionally concise. The summary should be at most 3 sentences.'
     example_list = Field(annotation=List[str] required=True json_schema_extra={'desc': 'The list of examples', '__dspy_field_type': 'input', 'prefix': 'Example List:'})
     curr_summary = Field(annotation=str required=True json_schema_extra={'desc': 'The current summary of existing examples. Revise the current summary based on the new examples, and form your output accordingly', '__dspy_field_type': 'input', 'prefix': 'Curr Summary:'})
     reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_

In [6]:
new_prog.detailed_results.best_outputs_valset

{0: [(0,
   Prediction(
       generated_data=['User Query: Dear Ms. Martins, with reference to our collaboration between Silver Horizon Capital and your organization, may I request the amended contract be forwarded to: cmartins@silverhorizon.com? Please ensure the corrections for our Berlin hub at 28 Friedrichstrasse are reflected.', 'User Query: Hello, I am Priya Desai, CEO of Desai Logistics Pvt. Ltd in Mumbai, India. Please summarize the potential benefits of implementing blockchain for our shipping operations and include a comparison with our competitor Sharma Freight Ltd.', "User Query: Good afternoon, I'm Maria Lopez, a Spanish national currently residing at 12 rue de la Paix, Paris, 75002. Could you provide guidance on the required documents for renewing my French residence permit? Assistant Response: Dear Maria, For your French residence permit renewal, please prepare the following documents: valid passport, proof of address (such as a recent utility bill for 12 rue de la Paix

In [7]:
# optimizer = dspy.SIMBA(metric=metric, max_steps=3)
# optimized_program = optimizer.compile(task_gen, trainset=train_set)

# # Save optimize program for future use
# optimized_program.save(f"optimized.json")

In [7]:
gen_data_max_len, gen_data, seen_data = 0, [], []
data_summary = None
for k in new_prog.detailed_results.best_outputs_valset:
    curr_gen_len = len(new_prog.detailed_results.best_outputs_valset[k][0][1].generated_data)
    if curr_gen_len > gen_data_max_len:
        gen_data = new_prog.detailed_results.best_outputs_valset[k][0][1].generated_data + new_prog.detailed_results.best_outputs_valset[k][0][1].curr_gens
        seen_data = new_prog.detailed_results.best_outputs_valset[k][0][1].seen_data
        data_summary = new_prog.detailed_results.best_outputs_valset[k][0][1].data_summary
        gen_data_max_len = curr_gen_len

In [8]:
new_prog.generated_data = gen_data
new_prog.seen_data = seen_data
new_prog.data_summary = data_summary

In [10]:
set_singleton(gen_data, seen_data, new_prog.data_summary)
new_prog = gepa.compile(new_prog, trainset=train_set[5:10], valset=dev_set[5:10])

2025/11/11 23:25:19 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 15 metric calls of the program. This amounts to 1.50 full evals on the train+val set.
2025/11/11 23:25:19 INFO dspy.teleprompt.gepa.gepa: Using 5 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget. GEPA requires you to provide the smallest valset that is just large enough to match your downstream task distribution, while providing as large trainset as possible.
GEPA Optimization:   0%|          | 0/15 [00:00<?, ?rollouts/s]

['User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** Work alongside a property lawyer familiar with Spanish and Barcelona-specific real estate law. 4. **Expert Appraisal:** Get a professional property valuation. 5. **Widespread Marketing:** Advertise using both local and international real estate channels.', 'User Query: Dear Ms. Martins, with reference to our collaboration between Silver Horizon Capital and your organization,

2025/11/11 23:25:25 INFO dspy.evaluate.evaluate: Average Metric: 6.468151127859832 / 5 (129.4%)
2025/11/11 23:25:25 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 1.2936302255719663 over 5 / 5 examples
GEPA Optimization:  33%|███▎      | 5/15 [00:06<00:12,  1.23s/rollouts]2025/11/11 23:25:25 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 1.2936302255719663


  0%|          | 0/3 [00:00<?, ?it/s]['User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** Work alongside a property lawyer familiar with Spanish and Barcelona-specific real estate law. 4. **Expert Appraisal:** Get a professional property valuation. 5. **Widespread Marketing:** Advertise using both local and international real estate channels.', 'User Query: Dear Ms. Martins, with reference to our collaboration between Silver H

2025/11/11 23:25:50 INFO dspy.evaluate.evaluate: Average Metric: 4.102177829072736 / 3 (136.7%)





2025/11/11 23:26:16 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for summarizer.predict: You are given a list of user queries (example_list), each representing realistic communications sent to various organizations or institutions. Your task is to analyze these examples and generate a concise yet comprehensive summary (maximum 3 sentences) that captures the main themes, actions, and sectors involved in the queries.

Most examples include formal user requests for administrative actions such as updating personal or organizational records (e.g., name, address, authorized signatory), correcting details (e.g., nationality, contact information), requesting official documentation (e.g., transcripts, employment verifications), escalating support or technical issues, or disputing administrative decisions. These queries are typically addressed to sectors like telecommunications, education, business, government/consular offices, taxation, healthcare, or corporate HR departments,

['User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** Work alongside a property lawyer familiar with Spanish and Barcelona-specific real estate law. 4. **Expert Appraisal:** Get a professional property valuation. 5. **Widespread Marketing:** Advertise using both local and international real estate channels.', 'User Query: Dear Ms. Martins, with reference to our collaboration between Silver Horizon Capital and your organization,

2025/11/11 23:27:00 INFO dspy.evaluate.evaluate: Average Metric: 4.058884699526839 / 3 (135.3%)
2025/11/11 23:27:00 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New subsample score 4.058884699526839 is not better than old score 4.102177829072736, skipping
GEPA Optimization:  73%|███████▎  | 11/15 [01:41<00:42, 10.50s/rollouts]2025/11/11 23:27:00 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Selected program 0 score: 1.2936302255719663


  0%|          | 0/3 [00:00<?, ?it/s]['User Query: Hello, my name is Marcus Lee, SSN ending 9032. I would like to enroll my small business, Lee’s Creative Studio LLC (TIN: 85-3476018), currently located at 812 Peach Blossom Rd, Atlanta, GA, in your upcoming Clean Energy Partner Program. Please send me all the relevant enrollment forms and compliance guidelines for review.', 'User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** W

2025/11/11 23:27:41 INFO dspy.evaluate.evaluate: Average Metric: 2.9678146500964386 / 3 (98.9%)





2025/11/11 23:28:17 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for summarizer.predict: You will be provided with a list of example user queries (referred to as 'example_list'), each representing detailed requests or messages submitted to various organizational support teams, businesses, or institutions. These examples typically include requests for actions such as updating personal or organizational records (e.g., addresses, contact information, or account credentials), confirmations of changes, corrections of official documents or registration details, escalation of technical support issues, application status inquiries, administrative record corrections, compliance certification submissions, registration for events, or clarifications on processes. Each query is likely to contain extensive personal or professional details, such as names, dates of birth, passport numbers, employee or tenant IDs, email addresses, telephone numbers, company names, and location data, a

['User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** Work alongside a property lawyer familiar with Spanish and Barcelona-specific real estate law. 4. **Expert Appraisal:** Get a professional property valuation. 5. **Widespread Marketing:** Advertise using both local and international real estate channels.', 'To Glisten Property Management: Laura Cheng, tenant of Unit 7B at 5921 Atlantic Ridge Road, Orlando, FL 32812 (Lease ID

2025/11/11 23:29:03 INFO dspy.evaluate.evaluate: Average Metric: 3.2391386892533505 / 3 (108.0%)
2025/11/11 23:29:03 INFO dspy.teleprompt.gepa.gepa: Iteration 2: New subsample score 3.2391386892533505 is better than old score 2.9678146500964386. Continue to full eval and add to candidate pool.


['User Query: I am Maria Sofia Hernandez from Barcelona, Spain, and I am tasked with selling my family’s apartment located at 25 Carrer de Mallorca. Could you recommend financial strategies for a Spanish national to maximize returns in today’s Barcelona real estate market?\nAssistant Response: Hi Maria. Selling property in Barcelona at 25 Carrer de Mallorca is a notable opportunity. Mind the following strategies: 1. **Market Assessment:** Investigate Barcelona’s current housing market for optimal sale timing. 2. **Renovation Efforts:** Implement minor, value-adding renovations before listing. 3. **Legal Guidance:** Work alongside a property lawyer familiar with Spanish and Barcelona-specific real estate law. 4. **Expert Appraisal:** Get a professional property valuation. 5. **Widespread Marketing:** Advertise using both local and international real estate channels.', 'User Query: Dear Ms. Martins, with reference to our collaboration between Silver Horizon Capital and your organization,

2025/11/11 23:30:18 INFO dspy.evaluate.evaluate: Average Metric: 6.41104424693408 / 5 (128.2%)
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Valset score for new program: 1.282208849386816 (coverage 5 / 5)
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Val aggregate for new program: 1.282208849386816
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Individual valset scores for new program: {0: 1.2703654067970025, 1: 1.2486826515174472, 2: 1.2454029168633638, 3: 1.3179928095707176, 4: 1.3286004621855483}
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: New valset pareto front scores: {0: 1.4171640650961985, 1: 1.2486826515174472, 2: 1.2682742891088916, 3: 1.463987085188986, 4: 1.3286004621855483}
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Valset pareto front aggregate score: 1.3453417106194143
2025/11/11 23:30:18 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Updated valset pareto front programs: {0

In [13]:
new_prog

summarizer.predict = Predict(StringSignature(example_list, curr_summary -> reasoning, summary
    instructions='Given a list of example data points for a dataset, provide a brief summary of these examples. If there are no examples, your summary should be "No data has been generated yet". Be comprehensive in your summary but additionally concise. The summary should be at most 3 sentences.'
    example_list = Field(annotation=List[str] required=True json_schema_extra={'desc': 'The list of examples', '__dspy_field_type': 'input', 'prefix': 'Example List:'})
    curr_summary = Field(annotation=str required=True json_schema_extra={'desc': 'The current summary of existing examples. Revise the current summary based on the new examples, and form your output accordingly', '__dspy_field_type': 'input', 'prefix': 'Curr Summary:'})
    reasoning = Field(annotation=str required=True json_schema_extra={'prefix': "Reasoning: Let's think step by step in order to", 'desc': '${reasoning}', '__dspy_field