# Mipro V2

In [187]:
import datetime
import functools
import os
import pathlib
import random
import re

# %pip install click
import click

import dspy
import pandas as pd
import py3langid as langid
from dspy.datasets.dataset import Dataset
from dspy.evaluate.auto_evaluation import SemanticRecallPrecision, f1_score, SemanticF1
from dspy.teleprompt import MIPROv2
from pydantic import BaseModel, Field

In [188]:
from dotenv import load_dotenv
load_dotenv()  # This loads the variables from .env

True

In [189]:
faq_data = pd.read_csv('/Users/nami-macos/Documents/BIDV-1/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain filtered.csv')
key_columns = faq_data[['question', 'context_extracted', 'chat_history_extracted', 'ground_truth']]
# Display the first few rows of these columns
print("\nSample data:")
display(key_columns.head())


Sample data:


Unnamed: 0,question,context_extracted,chat_history_extracted,ground_truth
0,thẻ atm có những tính năng gì thế em,"{\n ""question"": ""BIDV đang triển khai những...",,"Dạ, thẻ ATM BIDV Smart có những tính năng sau:..."
1,Chức năng của thẻ nội địa phi vật lí và thẻ nộ...,"{\n ""question"": ""BIDV đang triển khai những...",Customer: thẻ atm có những tính năng gì thế em...,"Dạ, thẻ ghi nợ nội địa BIDV Smart có hai loại:..."
2,"em đăng ký làm thẻ atm từ hqua ,kb bgio mới có...","{\n ""question"": ""Dịch vụ chọn số thẻ đẹp th...",,"Dạ, thời gian phát hành thẻ ATM của BIDV là tố..."
3,mình muốn làm nhiều thẻ luôn đc không hay chỉ ...,"{\n ""question"": ""Các loại thẻ tín dụng quốc...",Customer: cần cbi giấy tờ gì để làm thẻ thế\nB...,"Dạ, mỗi cá nhân có thể được phát hành 01 thẻ c..."
4,Số tiền thanh toán tối thiểu của thẻ tín dụng,"{\n ""Description"": ""Gồm LÃI SUẤT và các loạ...",,Chủ thẻ chi tiêu trước – trả tiền sau theo thờ...


In [190]:
# # Convert faq_data to a list of dictionaries
# dataset = []
# for _, row in faq_data.iterrows():
#     dataset.append(row.to_dict())

# # Split into train and dev sets with 50/50 split
# length = len(dataset)
# rng = random.Random(42)  # Fixed seed for reproducibility
# rng.shuffle(dataset)  # This was missing in your original cell

# train_data = dataset[: int(0.5 * length)]
# dev_data = dataset[int(0.5 * length) :]

# print(f"Total examples: {length}")
# print(f"Training examples: {len(train_data)}")
# print(f"Development examples: {len(dev_data)}")

In [191]:
class FAQDataset(Dataset):
    def __init__(self, data_path, selected_output_field=None):
        super().__init__(input_keys=["question", "context_extracted", "chat_history_extracted"])
        
        if data_path.endswith(".csv"):
            df = pd.read_csv(data_path)
        elif data_path.endswith(".xlsx"):
            df = pd.read_excel(data_path)
        else:
            raise ValueError("Unsupported file format.")
            
        if selected_output_field is not None:
            cols_to_keep = ["question", "context_extracted", "chat_history_extracted", selected_output_field]
            df = df[cols_to_keep]
            
        dataset = []
        for _, row in df.iterrows():
            row_dict = row.to_dict()
            
            # Clean and sanitize context_extracted
            if "context_extracted" in row_dict and isinstance(row_dict["context_extracted"], str):
                # Strip any invalid JSON prefixes/suffixes if needed
                row_dict["context_extracted"] = row_dict["context_extracted"].replace('\\n', '\n').replace('\\"', '"')

            example = dspy.Example()
            
            # Add fields to the example            
            if "context_extracted" in row_dict:
                example.context_extracted = row_dict["context_extracted"]
            
            if "chat_history_extracted" in row_dict:
                example.chat_history_extracted = row_dict["chat_history_extracted"]

            # Add default empty string for chat_history if missing or None
            if "chat_history_extracted" not in row_dict or row_dict["chat_history_extracted"] is None:
                row_dict["chat_history_extracted"] = ""
            
            if "question" in row_dict:
                example.question = row_dict["question"]

            # Add the output field if present
            if selected_output_field and selected_output_field in row_dict:
                example[selected_output_field] = row_dict[selected_output_field]
            
            # Use list form of with_inputs instead of keyword arguments
            example = example.with_inputs("context_extracted", "chat_history_extracted", "question")
                
            dataset.append(example)
            
        length = len(dataset)
        rng = random.Random(42)
        rng.shuffle(dataset)
        
        self._train = dataset[: int(0.5 * length)]
        self._dev = dataset[int(0.5 * length) :]
        
    @property
    def train(self):
        return self._train
        
    @property
    def dev(self):
        return self._dev
    

    
# Updated FAQScore class 
class FAQScore:
    def __init__(self, selected_output_field: str, semantic: bool = False):
        super().__init__()

        self.semantic = semantic
        if semantic:
            self.evaluate = dspy.ChainOfThought(SemanticRecallPrecision)

        self.selected_output_field = selected_output_field

    def __call__(self, example: dspy.Example, pred: dspy.Prediction, trace=None):
        try:
            # Get values and ensure they're strings
            target = example.toDict()[self.selected_output_field]
            prediction = pred.toDict()[self.selected_output_field]

            # Convert to strings if needed
            if not isinstance(target, str):
                target = str(target)
            if not isinstance(prediction, str):
                prediction = str(prediction)

            target = target.strip().lower()
            prediction = prediction.strip().lower()

            if self.semantic:
                # Convert all inputs to strings explicitly to avoid type errors
                context = str(example.context_extracted) if hasattr(example, 'context_extracted') else ""
                history = str(example.chat_history_extracted) if hasattr(example, 'chat_history_extracted') else ""
                question = str(example.question) if hasattr(example, 'question') else ""
                
                # Build the question string with explicit string concatenation
                question_text = "\n## Context:\n" + context + "\n## Chat History:\n" + history + "\n## Question:\n" + question + "\n## Answer:\n" + self.selected_output_field + ": "
                
                scores = self.evaluate(
                    question=question_text,
                    ground_truth=target,
                    system_response=prediction,
                )                
                return f1_score(scores.precision, scores.recall)
            else:
                return float(target == prediction)
        except Exception as e:
            print(f"Error in FAQScore: {str(e)}")
            return 0.0            
            

In [192]:
# Get the API key from environment variables
api_key=os.environ.get("OPENAI_API_KEY")
# Configure the language model with the API key
lm = dspy.LM(
    model="gpt-4o", 
    api_key=api_key,
    max_tokens=12000, 
    cache=True, 
    num_retries=3,
    temperature=0.0000001,
    request_timeout=120  # Add longer timeout
    )
dspy.configure(lm=lm)

In [193]:
# Define the signature for FAQ prompt optimization
class FAQProgSig(dspy.Signature):
    """# <Role>
    An expert Vietnamese knowledge assistant from BIDV bank, named BIDV, tasked with answering customer inquiries about BIDV and its services. **ALWAYS** follow the 2 steps below:

    1. **Internal Reasoning** (in `<think>` tags):
    - Break down the conversation into atomic steps without skipping stages.
    - Assess the current workflow state of the process.
    - **Verify technological context**: Identify specific technological requirements like NFC or biometric needs related to the inquiry and ensure alignment with BIDV's offered services and apps, including verifying the app version and service status of the BIDV SmartBanking app, before proceeding.
    - **Confirm loan type**: Ensure that the specific type of loan being inquired about is confirmed, as BIDV offers various loan types with different conditions.
    - **Handle multi-part inquiries**: For inquiries involving multiple components (e.g., recovering both username and password), ensure each element is identified and addressed thoroughly.
    - **Emphasize Verification**: Prioritize the verification of every piece of information against BIDV's official documents to avoid assumptions and inaccuracies.
    **Determine card sales eligibility**  
    Set `can_suggest_card = true` only if **all** of the following conditions are met:
    - Inquiry mentions card-related topics: phí thường niên, hoàn tiền, mua sắm online, tính năng thẻ, rút tiền ATM, mở thẻ, thẻ tín dụng quốc tế, thẻ ghi nợ, thanh toán dư nợ thẻ tín dụng, loại thẻ, tham khảo, thông tin thẻ, tiện ích thẻ, tính năng thẻ, etc.
    - Tone is neutral, polite, or curious
    - Customer has **not** expressed disinterest or refused to provide info
    - Conversation is **not** about complaints or service errors

    2. **Final Answer** (after `</think>`):
    - Follow <Response rules> to provide only the final response in Vietnamese without repeating reasoning.
    - **Mandatory:** Confirm all responses are supported by provided documents to avoid unverified claims.
    - Always maintain a polite and respectful tone and make sure to give a preface ("Dạ ...") before answering, repeat the scope of info you are giving before answering, and follow up after responding.
    - End with a **Follow-up Reflective Prompt**:
    - If `can_suggest_card == true`:
        > *"Dạ, hiện BIDV đang có nhiều sản phẩm thẻ với các tính năng hoàn tiền, tích điểm mà còn được miễn phí thường niên rất hấp dẫn và thủ tục mở thẻ vô cùng đơn giản.
        Anh/chị có muốn tham khảo thêm thông tin không ạ?"*
    - If `can_suggest_card == false`:
        > Default to encourage sharing of any additional related intent. Gently suggest common missed areas if relevant (e.g., card type, app usage, service fees). based on 
        relevant chunk. Ex: *"Anh/chị có cần thêm thông tin về loại thẻ, ứng dụng hay biểu phí không ạ?"*

    # <Workflow>

    1. **Understand and respond to the customer inquiry**:
    - Carefully read the inquiry to identify the **main topic** and **customer intent**.
    - If the inquiry includes specific terms (e.g. card type, app feature, loan type, phí thường niên),  
    ➤ Proceed to generate an **expansive answer immediately** using all available information.
    - For multi-part inquiries, break them down and answer each part fully and systematically.
    - If the customer's wording is **partially vague**, try to provide general but helpful information on possible scenarios.  
    ➤ Follow this with a **polite clarifying question** to guide them to specify more details if needed.
    - If the intent or product/service is **too unclear** to provide any meaningful response:
    ➤ Ask a **short, polite clarifying question** to identify the missing info (e.g., “Anh/chị đang hỏi về loại thẻ nào ạ?”).
    - For **complex topics** (e.g. loan types or SmartBanking errors), provide available guidance first,  
    ➤ Then ask for missing inputs only if required to complete the answer.
    - **Clarification loop cap**: If 2 clarifications are ignored, politely direct the customer to **hotline 1900 9247** or the **nearest branch**.

    2. **Verify and support with specific details**:
    - When the customer specifies a card type, feature, service, loan type, or tech detail (e.g. SmartBanking, NFC),  
    ➤ Use that info to answer fully using verified documents.
    - If some details are **missing or unclear**, still provide a helpful general answer first,  
    ➤ Then ask a polite clarifying question if needed to refine the response.
    - Always check BIDVs available tools (e.g. SmartBanking capabilities, latest app version) to ensure accuracy for tech-related inquiries.
    - For issues involving technology:
    ➤ Offer troubleshooting steps (e.g. internet connection, app version, device restart).
    - **Document verification is mandatory**: Only share facts from official BIDV documents or provided context.
    ➤ If not available, politely direct the customer to hotline 1900 9247 or a nearby branch.

    3. **Explain automatic vs. manual processes clearly**:
    - If the topic involves systems like auto-renewals, automatic debits, or statement generation:
    ➤ State what is done automatically and what requires customer action.
    ➤ Use clear “nếu/thì” language to help customers understand exceptions and edge cases.

    4. Review the provided documents:
    - **Mandatory:** Thoroughly review <Additional banking knowledge> to address the inquiry using existing data before suggesting external contact.
    - Focus on features, terms, or services directly related to the inquiry and ensure that information is drawn from the most relevant sources to prevent false assumptions or misguidance.
    - Refer to the documents to understand the banking perspective.
    - If after review, no relevant document is found:
    ➤ Inform the customer politely and suggest the hotline or branch as fallback.
    ➤ Never speculate; clearly mark unavailable data.

    5. Provide an exact answer:
    - Do not make assumptions or fabricate information not present in the documents.
    - Respond clearly and concisely to all aspects of the inquiry in Vietnamese.
    - Use the Expansive-Answer Mode as described in Response Rules>

    6. **Proactively Uncover Sub-Intents**:
    - After each response, include a polite follow-up prompt to check whether the customer has additional, related needs or if their inquiry covers multiple concerns.
    - Your follow-up must be **context-aware**, ensures the customer feels fully supported and prevents missed follow-up service needs. Tailor the question to the topic just discussed.

    #### When to include a Card Sales Invitation in the Follow-up

    Include a follow-up that **invites the customer to explore card products** **only if most the following are true**:

    - The inquiry is related to **card benefits**, **fees**, **usage**, or **features** — e.g., phrases such as:
    - *“phí thường niên”*, *“hoàn tiền”*, *“mua sắm online”*, *“tính năng thẻ”*, *“rút tiền ATM”*
    - The customer has not expressed disinterest or refused to share information
    - The customers tone is polite, curious, or neutral
    - The conversation is not about complaints or error handling

    If these conditions are met, the bot should include a soft, friendly card suggestion.
    Ex: > *"Dạ BIDV đang có một số sản phẩm thẻ hoàn tiền và miễn phí thường niên rất hấp dẫn. Em có thể giới thiệu thêm nếu anh/chị quan tâm ạ?"*
        + If the user asked about card opening eligibility, ask if what card they want to open.
        + If they asked about loan eligibility, ask if they also need help with required documents or processing time.
    ---

    # <Response Rules>
    ## 1. Response:
    - Language: always Vietnamese.
    - Naming: Address the customer as "Anh/Chị" (non-gendered) and refer to yourself as "Em."
    - Style:
    + Use a conversational dialogue style with numeric and short bullet-point responses (up to 6 points).
    + List key content numerically first.
    + Follow markdown conventions.
    + **Emphasize key details using **bold text** or numbering.**
    + Ensure all instructional responses adhere to a numbered list format for clarity.
    + Adapt response structure to be conversational, using numeric bullet points where possible.
    - <Expansive-Answer Mode>:
    + Preserve full detail.
    + Merge overlapping chunks without loss.
    + Exceed token limits if needed.
    + Focus on completeness over brevity.  

    ## 2. Content:
    - Use only information from the provided documents; do not create information.
    - Highlight BIDV's offerings if asked about other banks, and advise checking the other bank’s resources.
    - Do not direct customers to contact BIDV for more information unless all document data is exhausted and no answer is available. If so, provide a standardized message directing to the BIDV helpline or website.

    # <Documents Rules>
    - Response full original detail on fees to ensure thoroughness and to cover all possible cases. Should also include VAT where applicable.
    - Check carefully for discontinued card types; always inform the customer if applicable.

    # <Additional banking knowledge>
    - An ATM card is a domestic debit card (thẻ ghi nợ nội địa), not an international debit card.
    - Visa Cashback card details can serve as reference when other cash-back cards lack specifics.
    - **Lock card** = temporary pause; **Cancel card** = permanent closure with final balance settlement.
    - **Statement date (ngày sao kê)** = the day each month when the credit-card balance is recorded and a bill is generated.
    - **Payment due date (ngày đến hạn)** = the last day to pay at least the minimum amount; usually 15 days after the statement date.
    - **Grace period** applies only when the full statement balance is paid by the due date; cash advances never enjoy a grace period.
    - **Minimum payment** = fixed percentage of the statement balance (or a floor amount) that must be paid to keep the account current.
    - **Cash advance** = any cash withdrawal from a credit line (ATM or POS); attracts interest from the transaction date and a separate fee.
    - **Purchase transaction** = retail or e-commerce payment; interest-free if settled in full within the grace period.
    - **Foreign-currency transaction** = any card charge cleared in non-VND; incurs a conversion fee (phí chuyển đổi ngoại tệ) plus network FX rate.
    - **Internal transfer (nội bộ)** = BIDV→BIDV; **Interbank transfer (liên ngân hàng)** = BIDV→other banks; fees and limits differ.
    - **Contactless (NFC/payWave/payPass)** requires (i) card with “◔” logo, (ii) NFC-enabled POS, and (iii) user phone-wallet if no plastic.
    - **Priority / Premier / KHCC** = customer segment granted higher limits and selected fee waivers, but still pays some service charges.
    - **SmartBanking** = BIDVs retail mobile app; functions include transfers, bill pay, card lock/unlock, and e-statement download.
    - **e-Statement** = PDF billing file sent by email or viewed in SmartBanking; printing at a branch is a separate paid service.
    - **Auto-debit** = automatic deduction of credit-card minimum/full payment from a linked BIDV account on due date.
    - **Card replacement** (làm lại thẻ) differs from **re-issuance/renewal** (phát hành lại khi hết hạn); fees and card numbers may change.
    - **Card lock (khóa thẻ)** = reversible security block; **Card cancel/close (hủy thẻ)** = permanent, requires balance settlement and destroys credit line.
    - **Annual fee** = recurring charge for holding a card; waiver rules depend on annual spending thresholds or promotional bundles.
    - **VAT on bank fees** in Vietnam is 10 %; unless a fee is explicitly stated “đã gồm VAT”, assume VAT is added separately.
    - **“Miễn phí” vs. “0 đồng”** in documents both mean the customer pays nothing, even though the bank still reports VAT = 0.
    - **Discontinued product** = card or service no longer issued; existing holders keep benefits until normal expiry unless informed otherwise.
    - **Complaint (khiếu nại)** = customer claim of error or unauthorised charge; triggers a regulated investigation SLA and may require dispute form.


### Initialization
As <Role>, follow <Response Rules> and <Documents Rules> STRICTLY and engage the customer according to <Workflow>. Use conditional flags like `can_suggest_card` for consistent logic and better sales behavior. Implement clear step-by-step guidelines for processes involving conditional actions to ensure logical consistency. Ensure that all responses are verified against provided documentation and use fallback recommendations to official BIDV resources when needed. 
    """

    context_extracted: str = dspy.InputField()
    chat_history_extracted: str = dspy.InputField()
    question: str = dspy.InputField()

    
    ground_truth: str = dspy.OutputField()

        # ensure all inputs are strings
    def preprocess_inputs(self, **kwargs):
        for key in ["context_extracted", "chat_history_extracted", "question"]:
            if key in kwargs and kwargs[key] is not None and not isinstance(kwargs[key], str):
                kwargs[key] = str(kwargs[key])
        return kwargs


In [199]:
# Teacher module for FAQ optimization
from typing import Dict, List, Any, Optional
import dspy

class FAQTeacher:
    """Teacher module that provides demonstrations for the FAQ optimization."""
    
    def __init__(self, examples: List[dspy.Example], lm=None):
        """Initialize the teacher with example instances."""
        self.examples = examples
        teacher_lm = dspy.LM(
            model="gpt-4-turbo",  # Use a stronger model than the student's gpt-4o
            api_key=os.environ.get("OPENAI_API_KEY"),
            max_tokens=4096,  # Higher token limit for more detailed responses
            temperature=0.0000001,    # Lower temperature for more deterministic outputs
            cache=True,
            num_retries=5,      # Higher retry count
            request_timeout=180  # Longer timeout
        )
        self.lm = teacher_lm
        
    def generate_demonstration(self, example: dspy.Example) -> dspy.Example:
        """Generate a high-quality response for an example."""
        system_prompt = """
    # <Role>
    An expert Vietnamese knowledge assistant from BIDV bank, named BIDV, tasked with answering customer inquiries about BIDV and its services. **ALWAYS** follow the 2 steps below:

    1. **Internal Reasoning** (in `<think>` tags):
    - Break down the conversation into atomic steps without skipping stages.
    - Assess the current workflow state of the process.
    - **Verify technological context**: Identify specific technological requirements like NFC or biometric needs related to the inquiry and ensure alignment with BIDV's offered services and apps, including verifying the app version and service status of the BIDV SmartBanking app, before proceeding.
    - **Confirm loan type**: Ensure that the specific type of loan being inquired about is confirmed, as BIDV offers various loan types with different conditions.
    - **Handle multi-part inquiries**: For inquiries involving multiple components (e.g., recovering both username and password), ensure each element is identified and addressed thoroughly.
    - **Emphasize Verification**: Prioritize the verification of every piece of information against BIDV's official documents to avoid assumptions and inaccuracies.
    **Determine card sales eligibility**  
    Set `can_suggest_card = true` only if **all** of the following conditions are met:
    - Inquiry mentions card-related topics: phí thường niên, hoàn tiền, mua sắm online, tính năng thẻ, rút tiền ATM, mở thẻ, thẻ tín dụng quốc tế, thẻ ghi nợ, thanh toán dư nợ thẻ tín dụng, loại thẻ, tham khảo, thông tin thẻ, tiện ích thẻ, tính năng thẻ, etc.
    - Tone is neutral, polite, or curious
    - Customer has **not** expressed disinterest or refused to provide info
    - Conversation is **not** about complaints or service errors

    2. **Final Answer** (after `</think>`):
    - Follow <Response rules> to provide only the final response in Vietnamese without repeating reasoning.
    - **Mandatory:** Confirm all responses are supported by provided documents to avoid unverified claims.
    - Always maintain a polite and respectful tone and make sure to give a preface ("Dạ ...") before answering, repeat the scope of info you are giving before answering, and follow up after responding.
    - End with a **Follow-up Reflective Prompt**:
    - If `can_suggest_card == true`:
        > *"Dạ, hiện BIDV đang có nhiều sản phẩm thẻ với các tính năng hoàn tiền, tích điểm mà còn được miễn phí thường niên rất hấp dẫn và thủ tục mở thẻ vô cùng đơn giản.
        Anh/chị có muốn tham khảo thêm thông tin không ạ?"*
    - If `can_suggest_card == false`:
        > Default to encourage sharing of any additional related intent. Gently suggest common missed areas if relevant (e.g., card type, app usage, service fees). based on 
        relevant chunk. Ex: *"Anh/chị có cần thêm thông tin về loại thẻ, ứng dụng hay biểu phí không ạ?"*

    # <Workflow>

    1. **Understand and respond to the customer inquiry**:
    - Carefully read the inquiry to identify the **main topic** and **customer intent**.
    - If the inquiry includes specific terms (e.g. card type, app feature, loan type, phí thường niên),  
    ➤ Proceed to generate an **expansive answer immediately** using all available information.
    - For multi-part inquiries, break them down and answer each part fully and systematically.
    - If the customer's wording is **partially vague**, try to provide general but helpful information on possible scenarios.  
    ➤ Follow this with a **polite clarifying question** to guide them to specify more details if needed.
    - If the intent or product/service is **too unclear** to provide any meaningful response:
    ➤ Ask a **short, polite clarifying question** to identify the missing info (e.g., “Anh/chị đang hỏi về loại thẻ nào ạ?”).
    - For **complex topics** (e.g. loan types or SmartBanking errors), provide available guidance first,  
    ➤ Then ask for missing inputs only if required to complete the answer.
    - **Clarification loop cap**: If 2 clarifications are ignored, politely direct the customer to **hotline 1900 9247** or the **nearest branch**.

    2. **Verify and support with specific details**:
    - When the customer specifies a card type, feature, service, loan type, or tech detail (e.g. SmartBanking, NFC),  
    ➤ Use that info to answer fully using verified documents.
    - If some details are **missing or unclear**, still provide a helpful general answer first,  
    ➤ Then ask a polite clarifying question if needed to refine the response.
    - Always check BIDVs available tools (e.g. SmartBanking capabilities, latest app version) to ensure accuracy for tech-related inquiries.
    - For issues involving technology:
    ➤ Offer troubleshooting steps (e.g. internet connection, app version, device restart).
    - **Document verification is mandatory**: Only share facts from official BIDV documents or provided context.
    ➤ If not available, politely direct the customer to hotline 1900 9247 or a nearby branch.

    3. **Explain automatic vs. manual processes clearly**:
    - If the topic involves systems like auto-renewals, automatic debits, or statement generation:
    ➤ State what is done automatically and what requires customer action.
    ➤ Use clear “nếu/thì” language to help customers understand exceptions and edge cases.

    4. Review the provided documents:
    - **Mandatory:** Thoroughly review <Additional banking knowledge> to address the inquiry using existing data before suggesting external contact.
    - Focus on features, terms, or services directly related to the inquiry and ensure that information is drawn from the most relevant sources to prevent false assumptions or misguidance.
    - Refer to the documents to understand the banking perspective.
    - If after review, no relevant document is found:
    ➤ Inform the customer politely and suggest the hotline or branch as fallback.
    ➤ Never speculate; clearly mark unavailable data.

    5. Provide an exact answer:
    - Do not make assumptions or fabricate information not present in the documents.
    - Respond clearly and concisely to all aspects of the inquiry in Vietnamese.
    - Use the Expansive-Answer Mode as described in Response Rules>

    6. **Proactively Uncover Sub-Intents**:
    - After each response, include a polite follow-up prompt to check whether the customer has additional, related needs or if their inquiry covers multiple concerns.
    - Your follow-up must be **context-aware**, ensures the customer feels fully supported and prevents missed follow-up service needs. Tailor the question to the topic just discussed.

    #### When to include a Card Sales Invitation in the Follow-up

    Include a follow-up that **invites the customer to explore card products** **only if most the following are true**:

    - The inquiry is related to **card benefits**, **fees**, **usage**, or **features** — e.g., phrases such as:
    - *“phí thường niên”*, *“hoàn tiền”*, *“mua sắm online”*, *“tính năng thẻ”*, *“rút tiền ATM”*
    - The customer has not expressed disinterest or refused to share information
    - The customers tone is polite, curious, or neutral
    - The conversation is not about complaints or error handling

    If these conditions are met, the bot should include a soft, friendly card suggestion.
    Ex: > *"Dạ BIDV đang có một số sản phẩm thẻ hoàn tiền và miễn phí thường niên rất hấp dẫn. Em có thể giới thiệu thêm nếu anh/chị quan tâm ạ?"*
        + If the user asked about card opening eligibility, ask if what card they want to open.
        + If they asked about loan eligibility, ask if they also need help with required documents or processing time.
    ---

    # <Response Rules>
    ## 1. Response:
    - Language: always Vietnamese.
    - Naming: Address the customer as "Anh/Chị" (non-gendered) and refer to yourself as "Em."
    - Style:
    + Use a conversational dialogue style with numeric and short bullet-point responses (up to 6 points).
    + List key content numerically first.
    + Follow markdown conventions.
    + **Emphasize key details using **bold text** or numbering.**
    + Ensure all instructional responses adhere to a numbered list format for clarity.
    + Adapt response structure to be conversational, using numeric bullet points where possible.
    - <Expansive-Answer Mode>:
    + Preserve full detail.
    + Merge overlapping chunks without loss.
    + Focus on completeness over brevity.  

    ## 2. Content:
    - Use only information from the provided documents; do not create information.
    - Highlight BIDV's offerings if asked about other banks, and advise checking the other bank’s resources.
    - Do not direct customers to contact BIDV for more information unless all document data is exhausted and no answer is available. If so, provide a standardized message directing to the BIDV helpline or website.

    # <Documents Rules>
    - Response full original detail on fees to ensure thoroughness and to cover all possible cases. Should also include VAT where applicable.
    - Check carefully for discontinued card types; always inform the customer if applicable.

    # <Additional banking knowledge>
    - An ATM card is a domestic debit card (thẻ ghi nợ nội địa), not an international debit card.
    - Visa Cashback card details can serve as reference when other cash-back cards lack specifics.
    - **Lock card** = temporary pause; **Cancel card** = permanent closure with final balance settlement.
    - **Statement date (ngày sao kê)** = the day each month when the credit-card balance is recorded and a bill is generated.
    - **Payment due date (ngày đến hạn)** = the last day to pay at least the minimum amount; usually 15 days after the statement date.
    - **Grace period** applies only when the full statement balance is paid by the due date; cash advances never enjoy a grace period.
    - **Minimum payment** = fixed percentage of the statement balance (or a floor amount) that must be paid to keep the account current.
    - **Cash advance** = any cash withdrawal from a credit line (ATM or POS); attracts interest from the transaction date and a separate fee.
    - **Purchase transaction** = retail or e-commerce payment; interest-free if settled in full within the grace period.
    - **Foreign-currency transaction** = any card charge cleared in non-VND; incurs a conversion fee (phí chuyển đổi ngoại tệ) plus network FX rate.
    - **Internal transfer (nội bộ)** = BIDV→BIDV; **Interbank transfer (liên ngân hàng)** = BIDV→other banks; fees and limits differ.
    - **Contactless (NFC/payWave/payPass)** requires (i) card with “◔” logo, (ii) NFC-enabled POS, and (iii) user phone-wallet if no plastic.
    - **Priority / Premier / KHCC** = customer segment granted higher limits and selected fee waivers, but still pays some service charges.
    - **SmartBanking** = BIDVs retail mobile app; functions include transfers, bill pay, card lock/unlock, and e-statement download.
    - **e-Statement** = PDF billing file sent by email or viewed in SmartBanking; printing at a branch is a separate paid service.
    - **Auto-debit** = automatic deduction of credit-card minimum/full payment from a linked BIDV account on due date.
    - **Card replacement** (làm lại thẻ) differs from **re-issuance/renewal** (phát hành lại khi hết hạn); fees and card numbers may change.
    - **Card lock (khóa thẻ)** = reversible security block; **Card cancel/close (hủy thẻ)** = permanent, requires balance settlement and destroys credit line.
    - **Annual fee** = recurring charge for holding a card; waiver rules depend on annual spending thresholds or promotional bundles.
    - **VAT on bank fees** in Vietnam is 10 %; unless a fee is explicitly stated “đã gồm VAT”, assume VAT is added separately.
    - **“Miễn phí” vs. “0 đồng”** in documents both mean the customer pays nothing.
    - **Discontinued product** = card or service no longer issued; existing holders keep benefits until normal expiry unless informed otherwise.

    ### Initialization
    As <Role>, follow <Response Rules> and <Documents Rules> STRICTLY and engage the customer according to <Workflow>. Use conditional flags like `can_suggest_card` for consistent logic and better sales behavior. Implement clear step-by-step guidelines for processes involving conditional actions to ensure logical consistency. Ensure that all responses are verified against provided documentation and use fallback recommendations to official BIDV resources when needed. 

    Based on the provided information, answer the customer's question:
    Banking Information: {{context_extracted}}
    Previous Messages: {{chat_history_extracted}}
    Customer's Question: {{question}}
    Response:
        """
        
        # Extract the relevant fields
        context = example.context_extracted
        history = example.chat_history_extracted or ""
        question = example.question
        
        # Build the prompt
        prompt = f"""
        {system_prompt}
        
        BANKING INFORMATION:
        {context}
        
        PREVIOUS CONVERSATION:
        {history}
        
        CUSTOMER QUESTION:
        {question}
        
        Please respond to this customer inquiry in Vietnamese:
        """
        
        # Generate a high-quality response
        response = self.lm(prompt)
            
        # Fix: Check if response is a list and handle appropriately
        if isinstance(response, list):
            response = response[0] if response else ""
        
        # Create a new example with the generated response
        new_example = dspy.Example(
            context_extracted=context,
            chat_history_extracted=history,
            question=question,
            ground_truth=response.strip() if isinstance(response, str) else str(response).strip()
        )
        
        return new_example
    
    def get_demonstrations(self, k: int = 3) -> List[dspy.Example]:
        """Return k high-quality demonstrations."""
        # Use a subset of examples 
        selected_examples = self.examples[:k]
        return [self.generate_demonstration(ex) for ex in selected_examples]

In [200]:
# Test the teacher's demonstration generation
prompt_of_field_to_optimize = "ground_truth"

dataset = FAQDataset(
    data_path="/Users/nami-macos/Documents/BIDV-1/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain filtered.csv",
    selected_output_field=prompt_of_field_to_optimize,
)

try:
    print("Testing teacher module...")
    test_teacher = FAQTeacher(dataset.train[:1])
    demo = test_teacher.generate_demonstration(dataset.train[0])
    
    print("\nOriginal question:")
    print(dataset.train[0].question)
    
    print("\nGenerated response:")
    print(demo.ground_truth[:200] + "..." if len(demo.ground_truth) > 200 else demo.ground_truth)
    
    print("\nTeacher module is working correctly!")
except Exception as e:
    import traceback
    print(f"Error testing teacher: {str(e)}")
    traceback.print_exc()

Testing teacher module...

Original question:
ngân hàng căn cứ vào đâu để xác định mức cho vay

Generated response:
Dạ, để xác định mức cho vay, BIDV căn cứ vào các yếu tố sau:

1. **Phương án sử dụng vốn**: BIDV xem xét kế hoạch sử dụng vốn của khách hàng để đảm bảo mục đích vay vốn phù hợp và hiệu quả.
2. **Khả n...

Teacher module is working correctly!


In [181]:
# Test the program with proper error handling
prompt_of_field_to_optimize = "ground_truth"

dataset = FAQDataset(
    data_path="/Users/nami-macos/Documents/BIDV-1/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain filtered.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
program = dspy.ChainOfThought(FAQProgSig)
# Take the first example from your dataset for testing
test_example = dataset.train[1]

# Print basic info about the example
print("===== EXAMPLE CONTENT =====")
print(f"Question: {test_example.question}")

# Try running the program with this example
print("\n===== RUNNING TEST =====")
result = program(
    context_extracted=test_example.context_extracted,
    chat_history_extracted=test_example.chat_history_extracted or "",
    question=test_example.question
)
print(f"\nSuccess! Output: {result.ground_truth[:100]}...")

# except Exception as e:
#     import traceback
#     error_type = type(e).__name__
    
#     if "AuthenticationError" in error_type or "API key" in str(e):
#         print(f"❌ API Key Error: {str(e)}")
#         print("\nYou need to set up a valid OpenAI API key.")
#         print("1. Go to https://platform.openai.com/api-keys to get a valid key")
#         print("2. Replace the API key in your code with the new valid key")
#         print("3. Make sure billing is set up for your OpenAI account")
#     else:
#         print(f"❌ Error testing example: {str(e)}")
#         print("\nDetailed traceback:")
#         traceback.print_exc()

===== EXAMPLE CONTENT =====
Question: nợ xấu có vay dc bên ngân hàng mình dc ko

===== RUNNING TEST =====

Success! Output: Dạ, theo quy định hiện hành của BIDV, khách hàng có nợ xấu thường không đủ điều kiện để vay vốn. Để ...


In [182]:
# Set up optimization parameters for FAQ dataset
prompt_of_field_to_optimize = "ground_truth"
semantic = True

# Load the FAQ dataset and a target field
dataset = FAQDataset(
    data_path="/Users/nami-macos/Documents/BIDV-1/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain filtered.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
metric = FAQScore(
    selected_output_field=prompt_of_field_to_optimize, semantic=semantic
)

# Set up the optimization program
program = dspy.ChainOfThought(FAQProgSig)

# Configure the teleprompter for optimization with DSPy 2.6.27
teleprompter = MIPROv2(
    metric=metric, 
    max_bootstrapped_demos=0, 
    max_labeled_demos=0, 
    auto='light',
    # num_threads=10
    # num_candidates=5
    )  # Specify number of candidates
# Compile and optimize the program
optimized_program = teleprompter.compile(
    program,
    trainset=dataset.train,
    valset=dataset.dev,
    requires_permission_to_run=False,
    # num_trials=7       
)

# Save the optimized prompt to a file
optimized_program.save("faq_optimized_prompt.json")

2025/06/16 18:26:53 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 9
minibatch: True
num_fewshot_candidates: 6
num_instruct_candidates: 6
valset size: 87

2025/06/16 18:26:53 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/16 18:26:53 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2025/06/16 18:26:53 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6


  3%|▎         | 3/87 [00:07<03:16,  2.33s/it]


Bootstrapped 2 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/6


  6%|▌         | 5/87 [01:15<20:38, 15.10s/it]


KeyboardInterrupt: 

# Reference

In [None]:
prompt_of_field_to_optimize = "clinic_advantages_used_by_employee_for_introduction"
semantic = True

# Load dataset and a target field (to avoid giving spurious information)
dataset = BonbozTelesalesCall(
    path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
metric = BonbozTelesalesScore(
    selected_output_field=prompt_of_field_to_optimize, semantic=semantic
)


class ProgSig(dspy.Signature):
    """Act as a direct, efficient, knowledgeable and bilingual (Vietnamese and English) assistant for a cosmetic surgery clinic. Your task is to read the transcript of a skincare telesale agent with a potential customer, then deliver a meaningful output based on your reading of the conversation as instructed.

    I have the following the list of elements (I will call them: "LIST_ELEMENTS_AAA") that the skincare telesale agent usually use to persuade customers:
    - Bằng cấp giấy phép phòng khám
    - Mời trải nghiệm để xin đánh giá
    - Không bán mỹ phẩm
    - Phân biệt với spa
    - Không mất phí
    - Không mang CCCD
    - Giải thích trường hợp phát sinh phí
    - Bác sĩ trực tiếp thăm khám
    - Liệu trình chỉ 60-90p

    Your task is reading and checking if skincare telesale agent usually use elements in "LIST_ELEMENTS_AAA" to persuade customers. Afterthat, return the answer following format

    - Bằng cấp giấy phép phòng khám: <YES/NO>
    - Mời trải nghiệm để xin đánh giá: <YES/NO>
    - Không bán mỹ phẩm: <YES/NO>
    - Phân biệt với spa: <YES/NO>
    - Không mất phí: <YES/NO>
    - Không mang CCCD: <YES/NO>
    - Giải thích trường hợp phát sinh phí: <YES/NO>
    - Bác sĩ trực tiếp thăm khám: <YES/NO>
    - Liệu trình chỉ 60-90p: <YES/NO>

    If there is no explicit or the information in the conversation is lacking or too ambiguous, you should answer that the information is missing.  You MUST be aware of phonetic errors, diarization errors, and translation errors in the machine-translated transcript and make educated guesses in ambiguous cases. After the output, you will quote specific lines from the conversations that support your output. All output must be in Vietnamese only, not bold and italic."""

    transcript = dspy.InputField(format="str")

    clinic_advantages_used_by_employee_for_introduction = dspy.OutputField()


program = dspy.ChainOfThought(ProgSig)

teleprompter = MIPROv2(
    metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
)

optimized_program = teleprompter.compile(
    program,
    trainset=dataset.train,
    valset=dataset.dev,
    requires_permission_to_run=False,
)
optimized_program.save("optimized_prompt.json")

NameError: name 'BonbozTelesalesCall' is not defined

In [None]:
class BonbozTelesalesCall(Dataset):
    def __init__(self, path: str, selected_output_field: str | None = None):
        super().__init__(input_keys=["transcript"])

        if path.endswith(".csv"):
            df = pd.read_csv(path)
        elif path.endswith(".xlsx"):
            df = pd.read_excel(path)
        else:
            raise ValueError("Unsupported file format.")

        if selected_output_field is not None:
            col_to_keep = ["transcript", selected_output_field]
            if "evidence_" + selected_output_field in df.columns:
                col_to_keep.append("evidence_" + selected_output_field)

            df = df[col_to_keep]

        dataset = []
        for _, row in df.iterrows():
            dataset.append(row.to_dict())

        length = len(dataset)
        rng = random.Random(42)
        rng.shuffle(dataset)

        self._train = dataset[: int(0.5 * length)]
        self._dev = dataset[int(0.5 * length) :]


class BonbozTelesalesScore:
    def __init__(self, selected_output_field: str, semantic: bool = False):
        super().__init__()

        self.semantic = semantic
        if semantic:
            self.evaluate = dspy.ChainOfThought(SemanticRecallPrecision)

        self.selected_output_field = selected_output_field

    def __call__(self, example: dspy.Example, pred: dspy.Prediction, trace=None):
        try:
            target = example.toDict()[self.selected_output_field]
            prediction = pred.toDict()[self.selected_output_field]

            if isinstance(target, str):
                target = target.strip().lower()

            if isinstance(target, str):
                prediction = prediction.strip().lower()

            if self.semantic:
                scores = self.evaluate(
                    question="## Transcript\n"
                    + example.transcript
                    + "\n## Analysis\n"
                    + self.selected_output_field
                    + ": ",
                    ground_truth=str(target),
                    system_response=str(prediction),
                )
                return f1_score(scores.precision, scores.recall)
            else:
                return float(target == prediction)
        except Exception as e:
            print(str(e))
            return 0.0

lm = dspy.LM(model="azure/gpt-4o", max_tokens=4096, cache=True, num_retries=3)
dspy.configure(lm=lm)

In [None]:
fields_of_interest = [
    "telesales_employee_introduction",
    "customer_agrees_to_schedule_appointment_at_end_of_call",
    "customer_rejects_at_start_of_call",
    "customer_rejects_at_least_once",
    "customer_reluctant_due_to_distance",
    "customer_skeptical_of_free_service_and_fears_additional_purchases",
    "customer_does_not_need_skin_care",
    "customer_receiving_skin_care_elsewhere",
    "customer_inquires_about_procedure_details",
    "clinic_advantages_used_by_employee_for_introduction",
]

In [None]:

# class FAQDataset(Dataset):
#     def __init__(self, data_path, selected_output_field=None):
#         super().__init__(input_keys=["question", "context_extracted", "chat_history_extracted"])
        
#         if data_path.endswith(".csv"):
#             df = pd.read_csv(data_path)
#         elif data_path.endswith(".xlsx"):
#             df = pd.read_excel(data_path)
#         else:
#             raise ValueError("Unsupported file format.")
            
#         if selected_output_field is not None:
#             cols_to_keep = ["question", "context_extracted", "chat_history_extracted", selected_output_field]
#             df = df[cols_to_keep]
            
#         dataset = []
#         for _, row in df.iterrows():
#             # Convert dictionary to dspy.Example
#             row_dict = row.to_dict()
#             example = dspy.Example(
#                 question=row_dict["question"],
#                 context_extracted=row_dict["context_extracted"],
#                 chat_history_extracted=row_dict["chat_history_extracted"]
#             )
            
#             # Add the output field if present
#             if selected_output_field and selected_output_field in row_dict:
#                 example[selected_output_field] = row_dict[selected_output_field]
                
#             dataset.append(example)
            
#         length = len(dataset)
#         rng = random.Random(42)
#         rng.shuffle(dataset)
        
#         self._train = dataset[: int(0.5 * length)]
#         self._dev = dataset[int(0.5 * length) :]
        
#     @property
#     def train(self):
#         return self._train
        
#     @property
#     def dev(self):
#         return self._dev
    

In [None]:
# prompt_of_field_to_optimize = "telesales_employee_introduction"
# semantic = True

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a proficient Vietnamese assistant for a beauty clinic, your task is to analyze call transcripts between an agent and a potential customer. Your primary objective is to accurately extract the introductory segment where the telesales agent introduces themselves and the service offer. Be vigilant of potential diarization errors and misassigned speaker tags in the transcript, making educated guesses in ambiguous situations. Provide a detailed reasoning, including specific quotes from the transcript, to support your conclusions. Use double quotes for text and square brackets for timestamps. Ensure telesales employee introduction is in Vietnamese."""

#     transcript = dspy.InputField(format="str")

#     telesales_employee_introduction = dspy.OutputField(default="thiếu thông tin")


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt_intro.json")

2024/11/14 13:32:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 13:32:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 13:32:56 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 13:32:56 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 14%|█▎        | 3/22 [00:30<03:12, 10.15s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:15<01:37,  5.11s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:15<01:37,  5.12s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 14%|█▎        | 3/22 [00:18<01:55,  6.08s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 255.07it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 313.72it/s]
2024/11/14 13:34:15 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 13:34:15 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 13:34:37 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/14 13:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 13:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a proficient Vietnamese assistant for a beauty clinic, your task is to analyze call transcripts between an agent and a potential customer. Your primary objective is to accurately extract the introductory segment where the telesales agent introduces themselves and the service offer. Be vigilant of potential diarization errors and misassigned speaker tags in the transcript, making educated guesses in ambiguous situations. Provide a detailed reasoning, including specific quotes from the transcript, to support your conclusions. Use double quotes for text and square brackets for timestamps. Ensure telesales employee introduction is in Vietnamese.

2024/11/14 13:36:30 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a skilled Vietnamese

litellm.RateLimitError: AzureException RateLimitError - Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2024-02-15-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 5 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}


Average Metric: 14.398141151614496 / 20  (72.0):  87%|████████▋ | 20/23 [01:48<00:15,  5.21s/it]

litellm.RateLimitError: AzureException RateLimitError - Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2024-02-15-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 2 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}


Average Metric: 16.678842906000458 / 23  (72.5): 100%|██████████| 23/23 [01:51<00:00,  4.84s/it]
2024/11/14 13:38:22 INFO dspy.evaluate.evaluate: Average Metric: 16.678842906000458 / 23 (72.5%)
2024/11/14 13:38:22 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 72.52

2024/11/14 13:38:22 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
2024/11/14 13:38:22 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.

2024/11/14 13:38:22 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 1 / 7 =====
Average Metric: 17.476871777713544 / 22  (79.4):  96%|█████████▌| 22/23 [01:14<00:02,  2.60s/it]

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 18.476871777713544 / 23  (80.3): 100%|██████████| 23/23 [01:18<00:00,  3.42s/it]
2024/11/14 13:39:40 INFO dspy.evaluate.evaluate: Average Metric: 18.476871777713544 / 23 (80.3%)
2024/11/14 13:39:40 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 80.33
2024/11/14 13:39:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.33 with parameters ['Predictor 0: Instruction 1'].
2024/11/14 13:39:40 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.52, 80.33]
2024/11/14 13:39:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.33


2024/11/14 13:39:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 7 =====
Average Metric: 17.259058588470353 / 23  (75.0): 100%|██████████| 23/23 [01:49<00:00,  4.76s/it]
2024/11/14 13:41:30 INFO dspy.evaluate.evaluate: Average Metric: 17.259058588470353 / 23 (75.0%)
2024/11/14 13:41:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.04 with parameters ['Predictor 0: Instruction 5'

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 17.10521098236854 / 21  (81.5):  91%|█████████▏| 21/23 [01:16<00:05,  2.68s/it] 

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 17.771877649035208 / 23  (77.3): 100%|██████████| 23/23 [01:21<00:00,  3.56s/it]
2024/11/14 13:42:52 INFO dspy.evaluate.evaluate: Average Metric: 17.771877649035208 / 23 (77.3%)
2024/11/14 13:42:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 77.27 with parameters ['Predictor 0: Instruction 2'].
2024/11/14 13:42:52 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.52, 80.33, 75.04, 77.27]
2024/11/14 13:42:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.33


2024/11/14 13:42:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 7 =====
Average Metric: 17.259058588470353 / 23  (75.0): 100%|██████████| 23/23 [00:18<00:00,  1.23it/s] 
2024/11/14 13:43:11 INFO dspy.evaluate.evaluate: Average Metric: 17.259058588470353 / 23 (75.0%)
2024/11/14 13:43:11 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.04 with parameters ['Predictor 0: Instruction 5'].
2024/11/14 13:43:11 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.52, 80.33

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 12.965418318891663 / 16  (81.0):  70%|██████▉   | 16/23 [00:39<00:17,  2.57s/it]2024/11/14 13:43:50 ERROR dspy.evaluate.evaluate: Error for example in dev set: 		 litellm.RateLimitError: AzureException RateLimitError - Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2024-02-15-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 4 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}. Set `provide_traceback=True` to see the stack trace.
Average Metric: 12.965418318891663 / 17  (76.3):  70%|██████▉   | 16/23 [00:39<00:17,  2.57s/it]

litellm.RateLimitError: AzureException RateLimitError - Error code: 429 - {'error': {'code': '429', 'message': 'Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2024-02-15-preview have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 4 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.'}}


Average Metric: 15.9535135569869 / 22  (72.5):  96%|█████████▌| 22/23 [01:04<00:04,  4.06s/it]  

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 16.3535135569869 / 23  (71.1): 100%|██████████| 23/23 [01:22<00:00,  3.59s/it]
2024/11/14 13:44:33 INFO dspy.evaluate.evaluate: Average Metric: 16.3535135569869 / 23 (71.1%)
2024/11/14 13:44:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 71.1 with parameters ['Predictor 0: Instruction 4'].
2024/11/14 13:44:34 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.52, 80.33, 75.04, 77.27, 75.04, 71.1]
2024/11/14 13:44:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.33


2024/11/14 13:44:34 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 7 =====
Average Metric: 18.476871777713544 / 23  (80.3): 100%|██████████| 23/23 [00:16<00:00,  1.38it/s] 
2024/11/14 13:44:50 INFO dspy.evaluate.evaluate: Average Metric: 18.476871777713544 / 23 (80.3%)
2024/11/14 13:44:50 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.33 with parameters ['Predictor 0: Instruction 1'].
2024/11/14 13:44:50 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.5

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 17.59966740314075 / 22  (80.0):  96%|█████████▌| 22/23 [01:10<00:04,  4.53s/it] 

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 18.266334069807417 / 23  (79.4): 100%|██████████| 23/23 [01:12<00:00,  3.14s/it]
2024/11/14 13:46:02 INFO dspy.evaluate.evaluate: Average Metric: 18.266334069807417 / 23 (79.4%)
2024/11/14 13:46:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 79.42 with parameters ['Predictor 0: Instruction 6'].
2024/11/14 13:46:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [72.52, 80.33, 75.04, 77.27, 75.04, 71.1, 80.33, 79.42]
2024/11/14 13:46:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 80.33


2024/11/14 13:46:02 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 80.33!


In [None]:
# prompt_of_field_to_optimize = "customer_agrees_to_schedule_appointment_at_end_of_call"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )

# # from enum import Enum


# # class TeleSalesResult(Enum):
# #     từ_chối = "từ chối"
# #     đồng_ý = "đồng ý"
# #     gửi_thêm_thông_tin = "gửi thêm thông tin"

# from typing import Literal


# class ProgSig(dspy.Signature):
#     """As a Vietnamese language analyst, your task is to thoroughly analyze the provided call transcript between a beauty clinic's agent and a potential client. Your objective is to determine the customer's decision at the end of the call: "từ chối" (decline), "đồng ý" (agree), or "gửi thêm thông tin" (request more information). Carefully consider the dialogue for any indications of the customer's intent, and be mindful of potential diarization errors or misattributed speaker tags. Use specific quotes from the transcript, enclosed in double quotes, and provide timestamps in square brackets to support your analysis. Conclude with a clear determination of the customer's decision regarding the appointment."""

#     transcript = dspy.InputField(format="str")

#     customer_agrees_to_schedule_appointment_at_end_of_call: Literal[
#         "từ chối", "đồng ý", "gửi thêm thông tin"
#     ] = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/14 13:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 13:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 13:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 13:48:27 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 18%|█▊        | 4/22 [00:15<01:09,  3.87s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:09<01:01,  3.24s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:03<00:23,  1.23s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 18%|█▊        | 4/22 [00:15<01:08,  3.80s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 322.02it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 422.66it/s]
2024/11/14 13:49:12 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 13:49:12 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2024/11/14 13:49:12 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...



Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 13:51:18 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 13:51:18 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a Vietnamese language analyst, your task is to thoroughly analyze the provided call transcript between a beauty clinic's agent and a potential client. Your objective is to determine the customer's decision at the end of the call: "từ chối" (decline), "đồng ý" (agree), or "gửi thêm thông tin" (request more information). Carefully consider the dialogue for any indications of the customer's intent, and be mindful of potential diarization errors or misattributed speaker tags. Use specific quotes from the transcript, enclosed in double quotes, and provide timestamps in square brackets to support your analysis. Conclude with a clear determination of the customer's decision regarding the appointment.

2024/11/14 13:51:18 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a Vietnamese language analyst specializing in customer i

In [None]:
# prompt_of_field_to_optimize = "customer_rejects_at_start_of_call"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a culturally aware and detail-oriented Vietnamese telesales analyst for a beauty clinic, your task is to meticulously examine call transcripts between a telesales agent and a potential client. Your primary goal is to ascertain whether the customer rejects the offer from the telesales agent at the very beginning of the call. Pay close attention to potential diarization errors and misassigned speaker tags within the transcript, employing educated guesses when faced with ambiguous situations. Construct a comprehensive reasoning that includes specific quotes from the transcript, using double quotes for text and square brackets for timestamps, to substantiate your conclusions."""

#     transcript = dspy.InputField(format="str")

#     customer_rejects_at_start_of_call: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt_reject_at_start.json")

2024/11/14 13:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 13:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 13:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 13:10:58 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 14%|█▎        | 3/22 [00:20<02:08,  6.77s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:19<02:00,  6.35s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:09<01:03,  3.33s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 14%|█▎        | 3/22 [00:08<00:56,  2.98s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 349.15it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 541.03it/s]
2024/11/14 13:11:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 13:11:57 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2024/11/14 13:11:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...



Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 13:13:39 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 13:13:39 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a culturally aware and detail-oriented Vietnamese telesales analyst for a beauty clinic, your task is to meticulously examine call transcripts between a telesales agent and a potential client. Your primary goal is to ascertain whether the customer rejects the offer from the telesales agent at the very beginning of the call. Pay close attention to potential diarization errors and misassigned speaker tags within the transcript, employing educated guesses when faced with ambiguous situations. Construct a comprehensive reasoning that includes specific quotes from the transcript, using double quotes for text and square brackets for timestamps, to substantiate your conclusions.

2024/11/14 13:13:39 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a culturally aware and detail-oriented Vietnamese telesales analyst working fo

In [None]:
# prompt_of_field_to_optimize = "customer_rejects_at_least_once"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects the offer from the telesales agent at least once in the call. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets."""

#     transcript = dspy.InputField(format="str")

#     customer_rejects_at_least_once: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/14 13:02:05 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 13:02:05 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 13:02:05 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 13:02:05 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


  0%|          | 0/22 [00:00<?, ?it/s]

 14%|█▎        | 3/22 [00:14<01:31,  4.79s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:09<01:01,  3.24s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 18%|█▊        | 4/22 [00:08<00:38,  2.16s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 5/7


 27%|██▋       | 6/22 [00:14<00:39,  2.46s/it]


Bootstrapped 3 full traces after 6 examples for up to 1 rounds, amounting to 6 attempts.
Bootstrapping set 6/7


  9%|▉         | 2/22 [00:06<01:02,  3.11s/it]


Bootstrapped 1 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 7/7


 14%|█▎        | 3/22 [00:00<00:00, 493.51it/s]
2024/11/14 13:02:59 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 13:02:59 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2024/11/14 13:02:59 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...



Bootstrapped 2 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.


2024/11/14 13:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 13:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects the offer from the telesales agent at least once in the call. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets.

2024/11/14 13:04:29 INFO dspy.teleprompt.mipro_optimizer_v2: 1: As a skilled Vietnamese language analyst for a beauty clinic, your task is to carefully review call transcripts betwe

In [None]:
# prompt_of_field_to_optimize = "customer_reluctant_due_to_distance"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a proficient Vietnamese assistant for a beauty clinic, your task is to analyze call transcripts between an agent and a potential customer. Your primary objective is to determine if customer is reluctant with the offer due to distance. Be vigilant of potential diarization errors and misassigned speaker tags in the transcript, making educated guesses in ambiguous situations. Provide a detailed reasoning, including specific quotes from the transcript, to support your conclusions. Use double quotes for text and square brackets for timestamps."""

#     transcript = dspy.InputField(format="str")

#     customer_reluctant_due_to_distance: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/13 17:02:52 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/13 17:02:52 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/13 17:02:52 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/13 17:02:52 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 18%|█▊        | 4/22 [00:25<01:53,  6.32s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:10<01:04,  3.37s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 18%|█▊        | 4/22 [00:07<00:32,  1.83s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 5/7


 14%|█▎        | 3/22 [00:09<00:59,  3.13s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 333.89it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 401.87it/s]
2024/11/13 17:03:44 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/13 17:03:44 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/13 17:04:09 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/13 17:05:39 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/13 17:05:39 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a proficient Vietnamese assistant for a beauty clinic, your task is to analyze call transcripts between an agent and a potential customer. Your primary objective is to determine if customer is reluctant with the offer due to distance. Be vigilant of potential diarization errors and misassigned speaker tags in the transcript, making educated guesses in ambiguous situations. Provide a detailed reasoning, including specific quotes from the transcript, to support your conclusions. Use double quotes for text and square brackets for timestamps.

2024/11/13 17:05:39 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a skilled Vietnamese language analyst specializing in beauty clinic marketing interactions. Your task is to review call trans

In [None]:
# prompt_of_field_to_optimize = "customer_skeptical_of_free_service_or_fears_additional_purchases"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer is skeptical of free service or fears additional purchases. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets."""

#     transcript = dspy.InputField(format="str")

#     customer_skeptical_of_free_service_or_fears_additional_purchases: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/14 12:02:28 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 12:02:28 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 12:02:28 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 12:02:28 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


  0%|          | 0/22 [00:00<?, ?it/s]

 18%|█▊        | 4/22 [00:20<01:30,  5.04s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:08<00:50,  2.68s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:03<00:23,  1.22s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 27%|██▋       | 6/22 [00:18<00:50,  3.14s/it]


Bootstrapped 3 full traces after 6 examples for up to 1 rounds, amounting to 6 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 718.20it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 489.05it/s]
2024/11/14 12:03:19 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 12:03:19 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 12:03:44 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/14 12:05:12 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 12:05:12 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer is skeptical of free service or fears additional purchases. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets.

2024/11/14 12:05:12 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a Vietnamese language analyst 

In [None]:
# prompt_of_field_to_optimize = "customer_does_not_need_skin_care"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects offer because they do not need skin care. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets."""

#     transcript = dspy.InputField(format="str")

#     customer_does_not_need_skin_care: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/13 17:30:45 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/13 17:30:45 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/13 17:30:45 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/13 17:30:45 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 18%|█▊        | 4/22 [00:14<01:03,  3.51s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:09<01:02,  3.27s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:03<00:22,  1.21s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 18%|█▊        | 4/22 [00:14<01:03,  3.50s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 1134.21it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 1159.45it/s]
2024/11/13 17:31:27 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/13 17:31:27 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/13 17:31:51 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/13 17:33:53 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/13 17:33:53 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects offer because they do not need skin care. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets.

2024/11/13 17:33:53 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are a skilled Vietnamese language analyst wo

In [None]:
# prompt_of_field_to_optimize = "customer_receiving_skin_care_elsewhere"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects offer because they are receiving skin care elsewhere. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets."""

#     transcript = dspy.InputField(format="str")

#     customer_receiving_skin_care_elsewhere: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/13 17:42:44 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/13 17:42:44 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/13 17:42:44 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/13 17:42:44 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 14%|█▎        | 3/22 [00:19<02:03,  6.50s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:09<01:02,  3.30s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:07<00:49,  2.62s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 14%|█▎        | 3/22 [00:19<02:05,  6.58s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 394.98it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 433.27it/s]
2024/11/13 17:43:41 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/13 17:43:41 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/13 17:44:00 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/13 17:46:13 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/13 17:46:13 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer rejects offer because they are receiving skin care elsewhere. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets.

2024/11/13 17:46:13 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Imagine you are a Vietnamese languag

In [None]:
# prompt_of_field_to_optimize = "customer_inquires_about_procedure_details"
# semantic = False

# # Load dataset and a target field (to avoid giving spurious information)
# dataset = BonbozTelesalesCall(
#     path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = BonbozTelesalesScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )


# class ProgSig(dspy.Signature):
#     """As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer inquires about procedure details. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets."""

#     transcript = dspy.InputField(format="str")

#     customer_inquires_about_procedure_details: bool = dspy.OutputField()


# program = dspy.ChainOfThought(ProgSig)

# teleprompter = MIPROv2(
#     metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
# )

# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
# )
# optimized_program.save("optimized_prompt.json")

2024/11/14 14:05:38 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 14:05:38 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 14:05:38 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 14:05:38 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


 36%|███▋      | 8/22 [00:27<00:48,  3.44s/it]


Bootstrapped 3 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:06<00:42,  2.23s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 23%|██▎       | 5/22 [00:08<00:29,  1.76s/it]


Bootstrapped 3 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 5/7


 32%|███▏      | 7/22 [00:14<00:30,  2.06s/it]


Bootstrapped 3 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 151.92it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 688.44it/s]
2024/11/14 14:06:35 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 14:06:35 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 14:06:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/14 14:08:25 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 14:08:25 INFO dspy.teleprompt.mipro_optimizer_v2: 0: As a skilled Vietnamese language analyst for a beauty clinic, your role is to meticulously examine call transcripts between the clinic's agent and potential clients. Your main goal is to assess whether the customer inquires about procedure details. Remain alert to potential diarization errors and incorrectly attributed speaker tags within the transcript, and make informed assumptions when faced with unclear situations. Provide a comprehensive analysis by including specific quotes from the transcript to substantiate your findings. Ensure that text is enclosed in double quotes and timestamps in square brackets.

2024/11/14 14:08:25 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are an expert Vietnamese language analyst working for a beau

In [None]:
prompt_of_field_to_optimize = "clinic_advantages_used_by_employee_for_introduction"
semantic = True

# Load dataset and a target field (to avoid giving spurious information)
dataset = BonbozTelesalesCall(
    path="/home/anhnht/Anywhere-LLM/data/bonboz_call_analysis.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
metric = BonbozTelesalesScore(
    selected_output_field=prompt_of_field_to_optimize, semantic=semantic
)


class ProgSig(dspy.Signature):
    """Act as a direct, efficient, knowledgeable and bilingual (Vietnamese and English) assistant for a cosmetic surgery clinic. Your task is to read the transcript of a skincare telesale agent with a potential customer, then deliver a meaningful output based on your reading of the conversation as instructed.

    I have the following the list of elements (I will call them: "LIST_ELEMENTS_AAA") that the skincare telesale agent usually use to persuade customers:
    - Bằng cấp giấy phép phòng khám
    - Mời trải nghiệm để xin đánh giá
    - Không bán mỹ phẩm
    - Phân biệt với spa
    - Không mất phí
    - Không mang CCCD
    - Giải thích trường hợp phát sinh phí
    - Bác sĩ trực tiếp thăm khám
    - Liệu trình chỉ 60-90p

    Your task is reading and checking if skincare telesale agent usually use elements in "LIST_ELEMENTS_AAA" to persuade customers. Afterthat, return the answer following format

    - Bằng cấp giấy phép phòng khám: <YES/NO>
    - Mời trải nghiệm để xin đánh giá: <YES/NO>
    - Không bán mỹ phẩm: <YES/NO>
    - Phân biệt với spa: <YES/NO>
    - Không mất phí: <YES/NO>
    - Không mang CCCD: <YES/NO>
    - Giải thích trường hợp phát sinh phí: <YES/NO>
    - Bác sĩ trực tiếp thăm khám: <YES/NO>
    - Liệu trình chỉ 60-90p: <YES/NO>

    If there is no explicit or the information in the conversation is lacking or too ambiguous, you should answer that the information is missing.  You MUST be aware of phonetic errors, diarization errors, and translation errors in the machine-translated transcript and make educated guesses in ambiguous cases. After the output, you will quote specific lines from the conversations that support your output. All output must be in Vietnamese only, not bold and italic."""

    transcript = dspy.InputField(format="str")

    clinic_advantages_used_by_employee_for_introduction = dspy.OutputField()


program = dspy.ChainOfThought(ProgSig)

teleprompter = MIPROv2(
    metric=metric, max_bootstrapped_demos=0, max_labeled_demos=0, auto="light"
)

optimized_program = teleprompter.compile(
    program,
    trainset=dataset.train,
    valset=dataset.dev,
    requires_permission_to_run=False,
)
optimized_program.save("optimized_prompt.json")

2024/11/14 14:37:00 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 7
minibatch: False
num_candidates: 7
valset size: 23

2024/11/14 14:37:00 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/14 14:37:00 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/14 14:37:00 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=7 sets of demonstrations...


Bootstrapping set 1/7
Bootstrapping set 2/7


  0%|          | 0/22 [00:00<?, ?it/s]

 18%|█▊        | 4/22 [00:43<03:15, 10.84s/it]


Bootstrapped 3 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 3/7


 14%|█▎        | 3/22 [00:20<02:07,  6.73s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/7


 14%|█▎        | 3/22 [00:14<01:32,  4.87s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/7


 14%|█▎        | 3/22 [00:24<02:32,  8.05s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/7


  5%|▍         | 1/22 [00:00<00:00, 256.93it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/7


  9%|▉         | 2/22 [00:00<00:00, 305.56it/s]
2024/11/14 14:38:43 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/14 14:38:43 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/14 14:39:02 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/14 14:41:28 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/14 14:41:28 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Act as a direct, efficient, knowledgeable and bilingual (Vietnamese and English) assistant for a cosmetic surgery clinic. Your task is to read the transcript of a skincare telesale agent with a potential customer, then deliver a meaningful output based on your reading of the conversation as instructed.

I have the following the list of elements (I will call them: "LIST_ELEMENTS_AAA") that the skincare telesale agent usually use to persuade customers:
- Bằng cấp giấy phép phòng khám
- Mời trải nghiệm để xin đánh giá
- Không bán mỹ phẩm
- Phân biệt với spa
- Không mất phí
- Không mang CCCD
- Giải thích trường hợp phát sinh phí
- Bác sĩ trực tiếp thăm khám
- Liệu trình chỉ 60-90p

Your task is reading and checking if skincare telesale agen

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 10.209343434343433 / 23  (44.4): 100%|██████████| 23/23 [01:59<00:00,  5.20s/it]
2024/11/14 14:43:28 INFO dspy.evaluate.evaluate: Average Metric: 10.209343434343433 / 23 (44.4%)
2024/11/14 14:43:28 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 44.39

2024/11/14 14:43:28 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
2024/11/14 14:43:28 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.

2024/11/14 14:43:28 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 1 / 7 =====
Average Metric: 11.16065062388592 / 23  (48.5): 100%|██████████| 23/23 [01:45<00:00,  4.57s/it]
2024/11/14 14:45:13 INFO dspy.evaluate.evaluate: Average Metric: 11.16065062388592 / 23 (48.5%)
2024/11/14 14:45:13 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so 

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 8.541964516964516 / 22  (38.8):  96%|█████████▌| 22/23 [02:29<00:05,  5.04s/it] 

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 9.311195286195286 / 23  (40.5): 100%|██████████| 23/23 [02:33<00:00,  6.68s/it]
2024/11/14 14:47:46 INFO dspy.evaluate.evaluate: Average Metric: 9.311195286195286 / 23 (40.5%)
2024/11/14 14:47:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 40.48 with parameters ['Predictor 0: Instruction 5'].
2024/11/14 14:47:46 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 40.48]
2024/11/14 14:47:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.52


2024/11/14 14:47:46 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 7 =====
Average Metric: 8.098387723387722 / 20  (40.5):  87%|████████▋ | 20/23 [02:04<00:14,  4.70s/it]

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 10.667618492618493 / 23  (46.4): 100%|██████████| 23/23 [02:07<00:00,  5.56s/it]
2024/11/14 14:49:54 INFO dspy.evaluate.evaluate: Average Metric: 10.667618492618493 / 23 (46.4%)
2024/11/14 14:49:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 46.38 with parameters ['Predictor 0: Instruction 2'].
2024/11/14 14:49:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 40.48, 46.38]
2024/11/14 14:49:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.52


2024/11/14 14:49:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 7 =====
Average Metric: 9.311195286195286 / 23  (40.5): 100%|██████████| 23/23 [00:17<00:00,  1.31it/s] 
2024/11/14 14:50:12 INFO dspy.evaluate.evaluate: Average Metric: 9.311195286195286 / 23 (40.5%)
2024/11/14 14:50:12 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 40.48 with parameters ['Predictor 0: Instruction 5'].
2024/11/14 14:50:12 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 

litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 8.491851086716794 / 23  (36.9): 100%|██████████| 23/23 [02:05<00:00,  5.46s/it] 
2024/11/14 14:52:18 INFO dspy.evaluate.evaluate: Average Metric: 8.491851086716794 / 23 (36.9%)
2024/11/14 14:52:18 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 36.92 with parameters ['Predictor 0: Instruction 4'].
2024/11/14 14:52:18 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 40.48, 46.38, 40.48, 36.92]
2024/11/14 14:52:18 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.52


2024/11/14 14:52:18 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 7 =====


litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 11.160650623885918 / 23  (48.5): 100%|██████████| 23/23 [00:24<00:00,  1.04s/it] 
2024/11/14 14:52:42 INFO dspy.evaluate.evaluate: Average Metric: 11.160650623885918 / 23 (48.5%)
2024/11/14 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 48.52 with parameters ['Predictor 0: Instruction 1'].
2024/11/14 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 40.48, 46.38, 40.48, 36.92, 48.52]
2024/11/14 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.52


2024/11/14 14:52:42 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 7 =====


litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}


Average Metric: 10.113525363525362 / 23  (44.0): 100%|██████████| 23/23 [01:34<00:00,  4.12s/it]
2024/11/14 14:54:17 INFO dspy.evaluate.evaluate: Average Metric: 10.113525363525362 / 23 (44.0%)
2024/11/14 14:54:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 43.97 with parameters ['Predictor 0: Instruction 6'].
2024/11/14 14:54:17 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [44.39, 48.52, 40.48, 46.38, 40.48, 36.92, 48.52, 43.97]
2024/11/14 14:54:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.52


2024/11/14 14:54:17 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 48.52!


litellm.BadRequestError: litellm.ContentPolicyViolationError: litellm.ContentPolicyViolationError: AzureException - Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'low'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}
