# BIDV Prompt Optimization

## a. Import packages

In [350]:
import datetime
import functools
import os
import pathlib
import random
import re

# %pip install click
import click

import dspy
import pandas as pd
import py3langid as langid
from dspy.datasets.dataset import Dataset
from dspy.evaluate.auto_evaluation import SemanticRecallPrecision, f1_score, SemanticF1
from dspy import Evaluate

from dspy.teleprompt import MIPROv2
from pydantic import BaseModel, Field

In [351]:
from dotenv import load_dotenv
load_dotenv()  # This loads the variables from .env

True

## b. Prepare Dataset and Metrics

In [352]:
faq_data = pd.read_csv('/Users/nami-macos/Documents/GitHub/BIDV-DSPy-Optimization/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain 1706.csv')
key_columns = faq_data[['question', 'context_extracted', 'chat_history_extracted', 'bank_response']]
# Display the first few rows of these columns
print("\nSample data:")
display(key_columns.head())


Sample data:


Unnamed: 0,question,context_extracted,chat_history_extracted,bank_response
0,Chức năng của thẻ nội địa phi vật lí và thẻ nội địa vật lí đều như...,"{\n ""question"": ""BIDV đang triển khai những sản phẩm thẻ nào?"",...","Customer: thẻ atm có những tính năng gì thế em\nBIDV: Dạ, thẻ ATM ...","Dạ, thẻ ghi nợ nội địa BIDV Smart có hai loại: thẻ vật lý và thẻ p..."
1,thẻ atm có những tính năng gì thế em,"{\n ""question"": ""BIDV đang triển khai những sản phẩm thẻ nào?"",...",,"Dạ, thẻ ATM BIDV Smart có những tính năng sau:\n- Rút tiền và than..."
2,"em đăng ký làm thẻ atm từ hqua ,kb bgio mới có thẻ ạ","{\n ""question"": ""Dịch vụ chọn số thẻ đẹp thẻ ghi nợ nội địa...",,"Dạ, thời gian phát hành thẻ ATM của BIDV là tối đa 6 ngày làm việc..."
3,mình muốn làm nhiều thẻ luôn đc không hay chỉ ddc làm 1 cái thôi,"{\n ""question"": ""Các loại thẻ tín dụng quốc tế đang phát hành"",...","Customer: cần cbi giấy tờ gì để làm thẻ thế\nBIDV: Dạ, để mở thẻ t...","Dạ, mỗi cá nhân có thể được phát hành 01 thẻ chính cho mỗi mã sản ..."
4,Số tiền thanh toán tối thiểu của thẻ tín dụng,"{\n ""Description"": ""Gồm LÃI SUẤT và các loại phí khi sử dụng TH...",,Chủ thẻ chi tiêu trước – trả tiền sau theo thời hạn thông báo của ...


In [353]:
class FAQDataset(Dataset):
    def __init__(self, data_path, selected_output_field=None):
        super().__init__(input_keys=["question", "context_extracted", "chat_history_extracted"])
        
        if data_path.endswith(".csv"):
            df = pd.read_csv(data_path)
        elif data_path.endswith(".xlsx"):
            df = pd.read_excel(data_path)
        else:
            raise ValueError("Unsupported file format.")
            
        if selected_output_field is not None:
            cols_to_keep = ["question", "context_extracted", "chat_history_extracted", selected_output_field]
            df = df[cols_to_keep]
            
        dataset = []
        for _, row in df.iterrows():
            row_dict = row.to_dict()
            
            # Clean and sanitize context_extracted
            if "context_extracted" in row_dict and isinstance(row_dict["context_extracted"], str):
                # Strip any invalid JSON prefixes/suffixes if needed
                row_dict["context_extracted"] = row_dict["context_extracted"].replace('\\n', '\n').replace('\\"', '"')

            example = dspy.Example()
            
            # Add fields to the example            
            if "context_extracted" in row_dict:
                example.context_extracted = row_dict["context_extracted"]
            
            if "chat_history_extracted" in row_dict:
                example.chat_history_extracted = row_dict["chat_history_extracted"]

            # Add default empty string for chat_history if missing or None
            if "chat_history_extracted" not in row_dict or row_dict["chat_history_extracted"] is None:
                row_dict["chat_history_extracted"] = ""
            
            if "question" in row_dict:
                example.question = row_dict["question"]

            # Add the output field if present
            if selected_output_field and selected_output_field in row_dict:
                example[selected_output_field] = row_dict[selected_output_field]
            
            # Use list form of with_inputs instead of keyword arguments
            example = example.with_inputs("context_extracted", "chat_history_extracted", "question")
                
            dataset.append(example)
            
        length = len(dataset)
        rng = random.Random(42)
        rng.shuffle(dataset)
        
        # Updated split ratio: 80% train, 20% dev
        self._train = dataset[: int(0.2 * length)]
        self._dev = dataset[int(0.2 * length) :]
        
    @property
    def train(self):
        return self._train
        
    @property
    def dev(self):
        return self._dev
    

# Updated FAQScore class 
class FAQScore:
    def __init__(self, selected_output_field: str, semantic: bool = False):
        super().__init__()

        self.semantic = semantic
        if semantic:
            self.evaluate = dspy.ChainOfThought(SemanticRecallPrecision)

        self.selected_output_field = selected_output_field

    def __call__(self, example: dspy.Example, pred: dspy.Prediction, trace=None):
        try:
            # Get values and ensure they're strings
            target = example.toDict()[self.selected_output_field]
            prediction = pred.toDict()[self.selected_output_field]

            # Convert to strings if needed
            if not isinstance(target, str):
                target = str(target)
            if not isinstance(prediction, str):
                prediction = str(prediction)

            target = target.strip().lower()
            prediction = prediction.strip().lower()

            if self.semantic:
                # Convert all inputs to strings explicitly to avoid type errors
                context = str(example.context_extracted) if hasattr(example, 'context_extracted') else ""
                history = str(example.chat_history_extracted) if hasattr(example, 'chat_history_extracted') else ""
                question = str(example.question) if hasattr(example, 'question') else ""
                
                # Build the question string with explicit string concatenation
                question_text = "\n## Context:\n" + context + "\n## Chat History:\n" + history + "\n## Question:\n" + question + "\n## Answer:\n" + self.selected_output_field + ": "
                
                scores = self.evaluate(
                    question=question_text,
                    ground_truth=target,       # Changed from bank_response to ground_truth
                    system_response=prediction, 
                )                
                return f1_score(scores.precision, scores.recall)
            else:
                return float(target == prediction)
        except Exception as e:
            print(f"Error in FAQScore: {str(e)}")
            return 0.0

In [None]:
# Get the API key from environment variables
api_key=os.environ.get("OPENAI_API_KEY")

lm = dspy.LM(
    model="gpt-4o-mini", 
    api_key=api_key,
    max_tokens=12000, 
    cache=True, 
    num_retries=3,
    temperature=0.0000001,
    request_timeout=120  # Add longer timeout
    )
dspy.configure(lm=lm)

## c. Define signature 

In [355]:
# Define the signature for FAQ prompt optimization
class FAQProgSig(dspy.Signature):
    """# <Role>
    An expert Vietnamese knowledge assistant from BIDV bank, named BIDV, tasked with answering customer inquiries about BIDV and its services. **ALWAYS** follow the 2 steps below:

    1. **Internal Reasoning** (in `<think>` tags):
    - Break down the conversation into atomic steps without skipping stages.
    - Assess the current workflow state of the process.
    - **Verify technological context**: Identify specific technological requirements like NFC or biometric needs related to the inquiry and ensure alignment with BIDV's offered services and apps, including verifying the app version and service status of the BIDV SmartBanking app, before proceeding.
    - **Confirm loan type**: Ensure that the specific type of loan being inquired about is confirmed, as BIDV offers various loan types with different conditions.
    - **Handle multi-part inquiries**: For inquiries involving multiple components (e.g., recovering both username and password), ensure each element is identified and addressed thoroughly.
    - **Emphasize Verification**: Prioritize the verification of every piece of information against BIDV's official documents to avoid assumptions and inaccuracies.
    **Determine card sales eligibility**  
    Set `can_suggest_card = true` only if **all** of the following conditions are met:
    - Inquiry mentions card-related topics: phí thường niên, hoàn tiền, mua sắm online, tính năng thẻ, rút tiền ATM, mở thẻ, thẻ tín dụng quốc tế, thẻ ghi nợ, thanh toán dư nợ thẻ tín dụng, loại thẻ, tham khảo, thông tin thẻ, tiện ích thẻ, tính năng thẻ, etc.
    - Tone is neutral, polite, or curious
    - Customer has **not** expressed disinterest or refused to provide info
    - Conversation is **not** about complaints or service errors

    2. **Final Answer** (after `</think>`):
    - Follow <Response rules> to provide only the final response in Vietnamese without repeating reasoning.
    - **Mandatory:** Confirm all responses are supported by provided documents to avoid unverified claims.
    - Always maintain a polite and respectful tone and make sure to give a preface ("Dạ ...") before answering, repeat the scope of info you are giving before answering, and follow up after responding.
    - End with a **Follow-up Reflective Prompt**:
    - If `can_suggest_card == true`:
        > *"Dạ, hiện BIDV đang có nhiều sản phẩm thẻ với các tính năng hoàn tiền, tích điểm mà còn được miễn phí thường niên rất hấp dẫn và thủ tục mở thẻ vô cùng đơn giản.
        Anh/chị có muốn tham khảo thêm thông tin không ạ?"*
    - If `can_suggest_card == false`:
        > Default to encourage sharing of any additional related intent. Gently suggest common missed areas if relevant (e.g., card type, app usage, service fees). based on 
        relevant chunk. Ex: *"Anh/chị có cần thêm thông tin về loại thẻ, ứng dụng hay biểu phí không ạ?"*

    # <Workflow>

    1. **Understand and respond to the customer inquiry**:
    - Carefully read the inquiry to identify the **main topic** and **customer intent**.
    - If the inquiry includes specific terms (e.g. card type, app feature, loan type, phí thường niên),  
    ➤ Proceed to generate an **expansive answer immediately** using all available information.
    - For multi-part inquiries, break them down and answer each part fully and systematically.
    - If the customer's wording is **partially vague**, try to provide general but helpful information on possible scenarios.  
    ➤ Follow this with a **polite clarifying question** to guide them to specify more details if needed.
    - If the intent or product/service is **too unclear** to provide any meaningful response:
    ➤ Ask a **short, polite clarifying question** to identify the missing info (e.g., “Anh/chị đang hỏi về loại thẻ nào ạ?”).
    - For **complex topics** (e.g. loan types or SmartBanking errors), provide available guidance first,  
    ➤ Then ask for missing inputs only if required to complete the answer.
    - **Clarification loop cap**: If 2 clarifications are ignored, politely direct the customer to **hotline 1900 9247** or the **nearest branch**.

    2. **Verify and support with specific details**:
    - When the customer specifies a card type, feature, service, loan type, or tech detail (e.g. SmartBanking, NFC),  
    ➤ Use that info to answer fully using verified documents.
    - If some details are **missing or unclear**, still provide a helpful general answer first,  
    ➤ Then ask a polite clarifying question if needed to refine the response.
    - Always check BIDVs available tools (e.g. SmartBanking capabilities, latest app version) to ensure accuracy for tech-related inquiries.
    - For issues involving technology:
    ➤ Offer troubleshooting steps (e.g. internet connection, app version, device restart).
    - **Document verification is mandatory**: Only share facts from official BIDV documents or provided context.
    ➤ If not available, politely direct the customer to hotline 1900 9247 or a nearby branch.

    3. **Explain automatic vs. manual processes clearly**:
    - If the topic involves systems like auto-renewals, automatic debits, or statement generation:
    ➤ State what is done automatically and what requires customer action.
    ➤ Use clear “nếu/thì” language to help customers understand exceptions and edge cases.

    4. Review the provided documents:
    - **Mandatory:** Thoroughly review <Additional banking knowledge> to address the inquiry using existing data before suggesting external contact.
    - Focus on features, terms, or services directly related to the inquiry and ensure that information is drawn from the most relevant sources to prevent false assumptions or misguidance.
    - Refer to the documents to understand the banking perspective.
    - If after review, no relevant document is found:
    ➤ Inform the customer politely and suggest the hotline or branch as fallback.
    ➤ Never speculate; clearly mark unavailable data.

    5. Provide an exact answer:
    - Do not make assumptions or fabricate information not present in the documents.
    - Respond clearly and concisely to all aspects of the inquiry in Vietnamese.
    - Use the Expansive-Answer Mode as described in Response Rules>

    6. **Proactively Uncover Sub-Intents**:
    - After each response, include a polite follow-up prompt to check whether the customer has additional, related needs or if their inquiry covers multiple concerns.
    - Your follow-up must be **context-aware**, ensures the customer feels fully supported and prevents missed follow-up service needs. Tailor the question to the topic just discussed.

    #### When to include a Card Sales Invitation in the Follow-up

    Include a follow-up that **invites the customer to explore card products** **only if most the following are true**:

    - The inquiry is related to **card benefits**, **fees**, **usage**, or **features** — e.g., phrases such as:
    - *“phí thường niên”*, *“hoàn tiền”*, *“mua sắm online”*, *“tính năng thẻ”*, *“rút tiền ATM”*
    - The customer has not expressed disinterest or refused to share information
    - The customers tone is polite, curious, or neutral
    - The conversation is not about complaints or error handling

    If these conditions are met, the bot should include a soft, friendly card suggestion.
    Ex: > *"Dạ BIDV đang có một số sản phẩm thẻ hoàn tiền và miễn phí thường niên rất hấp dẫn. Em có thể giới thiệu thêm nếu anh/chị quan tâm ạ?"*
        + If the user asked about card opening eligibility, ask if what card they want to open.
        + If they asked about loan eligibility, ask if they also need help with required documents or processing time.
    ---

    # <Response Rules>
    ## 1. Response:
    - Language: always Vietnamese.
    - Naming: Address the customer as "Anh/Chị" (non-gendered) and refer to yourself as "Em."
    - Style:
    + Use a conversational dialogue style with numeric and short bullet-point responses (up to 6 points).
    + List key content numerically first.
    + Follow markdown conventions.
    + **Emphasize key details using **bold text** or numbering.**
    + Ensure all instructional responses adhere to a numbered list format for clarity.
    + Adapt response structure to be conversational, using numeric bullet points where possible.
    - <Expansive-Answer Mode>:
    + Preserve full detail.
    + Merge overlapping chunks without loss.
    + Exceed token limits if needed.
    + Focus on completeness over brevity.  

    ## 2. Content:
    - Use only information from the provided documents; do not create information.
    - Highlight BIDV's offerings if asked about other banks, and advise checking the other bank’s resources.
    - Do not direct customers to contact BIDV for more information unless all document data is exhausted and no answer is available. If so, provide a standardized message directing to the BIDV helpline or website.

    # <Documents Rules>
    - Response full original detail on fees to ensure thoroughness and to cover all possible cases. Should also include VAT where applicable.
    - Check carefully for discontinued card types; always inform the customer if applicable.

    # <Additional banking knowledge>
    - An ATM card is a domestic debit card (thẻ ghi nợ nội địa), not an international debit card.
    - Visa Cashback card details can serve as reference when other cash-back cards lack specifics.
    - **Lock card** = temporary pause; **Cancel card** = permanent closure with final balance settlement.
    - **Statement date (ngày sao kê)** = the day each month when the credit-card balance is recorded and a bill is generated.
    - **Payment due date (ngày đến hạn)** = the last day to pay at least the minimum amount; usually 15 days after the statement date.
    - **Grace period** applies only when the full statement balance is paid by the due date; cash advances never enjoy a grace period.
    - **Minimum payment** = fixed percentage of the statement balance (or a floor amount) that must be paid to keep the account current.
    - **Cash advance** = any cash withdrawal from a credit line (ATM or POS); attracts interest from the transaction date and a separate fee.
    - **Purchase transaction** = retail or e-commerce payment; interest-free if settled in full within the grace period.
    - **Foreign-currency transaction** = any card charge cleared in non-VND; incurs a conversion fee (phí chuyển đổi ngoại tệ) plus network FX rate.
    - **Internal transfer (nội bộ)** = BIDV→BIDV; **Interbank transfer (liên ngân hàng)** = BIDV→other banks; fees and limits differ.
    - **Contactless (NFC/payWave/payPass)** requires (i) card with “◔” logo, (ii) NFC-enabled POS, and (iii) user phone-wallet if no plastic.
    - **Priority / Premier / KHCC** = customer segment granted higher limits and selected fee waivers, but still pays some service charges.
    - **SmartBanking** = BIDVs retail mobile app; functions include transfers, bill pay, card lock/unlock, and e-statement download.
    - **e-Statement** = PDF billing file sent by email or viewed in SmartBanking; printing at a branch is a separate paid service.
    - **Auto-debit** = automatic deduction of credit-card minimum/full payment from a linked BIDV account on due date.
    - **Card replacement** (làm lại thẻ) differs from **re-issuance/renewal** (phát hành lại khi hết hạn); fees and card numbers may change.
    - **Card lock (khóa thẻ)** = reversible security block; **Card cancel/close (hủy thẻ)** = permanent, requires balance settlement and destroys credit line.
    - **Annual fee** = recurring charge for holding a card; waiver rules depend on annual spending thresholds or promotional bundles.
    - **VAT on bank fees** in Vietnam is 10 %; unless a fee is explicitly stated “đã gồm VAT”, assume VAT is added separately.
    - **“Miễn phí” vs. “0 đồng”** in documents both mean the customer pays nothing, even though the bank still reports VAT = 0.
    - **Discontinued product** = card or service no longer issued; existing holders keep benefits until normal expiry unless informed otherwise.
    - **Complaint (khiếu nại)** = customer claim of error or unauthorised charge; triggers a regulated investigation SLA and may require dispute form.


### Initialization
As <Role>, follow <Response Rules> and <Documents Rules> STRICTLY and engage the customer according to <Workflow>. Use conditional flags like `can_suggest_card` for consistent logic and better sales behavior. Implement clear step-by-step guidelines for processes involving conditional actions to ensure logical consistency. Ensure that all responses are verified against provided documentation and use fallback recommendations to official BIDV resources when needed. 
    """

    context_extracted: str = dspy.InputField()
    chat_history_extracted: str = dspy.InputField()
    question: str = dspy.InputField()

    
    bank_response: str = dspy.OutputField()


## d. Teacher and Student Model

In [356]:
class OptimizationConfig:
    def __init__(self, 
                 student_model="gpt-4o-mini", 
                 teacher_model="gpt-4o", 
                 cache=True,
                 cache_in_memory=False,
                 num_retries=3,
                 max_tokens=12000,
                 max_bootstrapped_demos=0,
                 max_labeled_demos=0,
                 output_path="faq_optimized_prompt.json"):
        self.student_model = student_model
        self.teacher_model = teacher_model
        self.cache = cache
        self.cache_in_memory = cache_in_memory
        self.num_retries = num_retries
        self.max_tokens = max_tokens
        self.max_bootstrapped_demos = max_bootstrapped_demos
        self.max_labeled_demos = max_labeled_demos
        self.output_path = output_path

In [357]:
class FAQPromptOptimizer:
    def __init__(self, config=None):
        """Initialize the optimizer with configuration."""
        self._config = config or OptimizationConfig()
        self._module = dspy.ChainOfThought(FAQProgSig)
        self._optimized_save_path = self._config.output_path
        self._setup_models()
        
    def _setup_models(self):
        """Set up student (inference) and teacher (optimization) models."""
        self._student_lm = dspy.LM(
            model=self._config.student_model,
            api_key=os.environ.get("OPENAI_API_KEY"),
            cache=self._config.cache,
            cache_in_memory=self._config.cache_in_memory,
            num_retries=self._config.num_retries,
            max_tokens=self._config.max_tokens,
            temperature=0.0000001,
            request_timeout=120,
        )

        self._teacher_lm = dspy.LM(
            model=self._config.teacher_model,
            api_key=os.environ.get("OPENAI_API_KEY"),
            cache=self._config.cache,
            cache_in_memory=self._config.cache_in_memory,
            num_retries=self._config.num_retries + 2,  # Higher retry count for teacher
            max_tokens=self._config.max_tokens,
            temperature=0.0000001,
            request_timeout=180,  # Longer timeout for teacher
        )
    
    def optimize(self, dataset, metric):
        """Run the MIPROv2 optimization process."""
        # Configure the default LM to be the student model
        dspy.configure(lm=self._student_lm)
        
        # Set up optimizer with both models
        optimizer = MIPROv2(
            metric=metric,
            max_bootstrapped_demos=self._config.max_bootstrapped_demos,
            max_labeled_demos=self._config.max_labeled_demos,
            auto='medium',
            prompt_model=self._teacher_lm,  # Uses teacher model for prompt optimization
            task_model=self._student_lm,    # Uses student model for inference
            teacher_settings=dict(lm=self._teacher_lm),
        )
        
        print("Starting optimization...")
        optimized_program = optimizer.compile(
            self._module,
            trainset=dataset.train,
            valset=dataset.dev,
            requires_permission_to_run=False,
        )
        
        # Save the optimized prompt
        optimized_program.save(self._optimized_save_path)
        print(f"Optimization complete! Saved to {self._optimized_save_path}")
        
        return optimized_program

## Evaluate the baseline model performance


In [None]:
from dspy import Evaluate

def evaluate_model(module, dataset, metric, sample_size=None, display_progress=True, display_table=3):
    """
    Evaluate a model using dspy.Evaluate for better performance and reporting.
    
    Args:
        module: The DSPy program to evaluate
        dataset: The dataset containing examples
        metric: The scoring metric to use
        sample_size: Optional number of examples to evaluate (None for all)
        display_progress: Whether to show a progress bar
        display_table: How many example results to display in detail (False for none)
    
    Returns:
        float: Average evaluation score
    """
    # Take a subset of examples if needed
    eval_examples = dataset.dev if sample_size is None else dataset.dev[:sample_size]
    
    print(f"Evaluating model on {len(eval_examples)} examples...")
    
    # Create the evaluator
    evaluator = Evaluate(
        devset=eval_examples,
        metric=metric,
        display_progress=display_progress,
        display_table=display_table,
        return_all_scores=True,
        return_outputs=True,     
        num_threads=4
    )
    
    # Run evaluation
    evaluation_result = evaluator(module)
    
    # Handle tuple return types from Evaluate
    if hasattr(evaluation_result, 'score'):
        avg_score = evaluation_result.score
    elif isinstance(evaluation_result, tuple) and len(evaluation_result) >= 1:
        # First element of tuple is the score
        avg_score = evaluation_result[0]
    else:
        # Fall back to calculating average manually if needed
        scores = [metric(ex, pred) for ex, pred in zip(eval_examples, evaluation_result.outputs)] if hasattr(evaluation_result, 'outputs') else []
        avg_score = sum(scores) / len(scores) if scores else 0.0
    print(f"\nModel average score: {avg_score:.4f} on {len(eval_examples)} samples")
    
    return avg_score

# Use the updated evaluation function for the baseline model
base_module = dspy.ChainOfThought(FAQProgSig)

prompt_of_field_to_optimize = "bank_response"
semantic = True
dataset = FAQDataset(
    data_path="/Users/nami-macos/Documents/GitHub/BIDV-DSPy-Optimization/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain 1706.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
metric = FAQScore(
    selected_output_field=prompt_of_field_to_optimize, 
    semantic=semantic
)

# Evaluate the baseline model
baseline_score = evaluate_model(
    base_module, 
    dataset, 
    metric, 
    sample_size=None,  # Use all examples
    display_progress=True,
    display_table=3     # Show 3 examples in detail
)

Evaluating model on 152 examples...
Average Metric: 94.47 / 152 (62.2%): 100%|██████████| 152/152 [00:00<00:00, 890.75it/s]

2025/06/17 17:47:08 INFO dspy.evaluate.evaluate: Average Metric: 94.4731097691594 / 152 (62.2%)





Unnamed: 0,context_extracted,chat_history_extracted,question,example_bank_response,reasoning,pred_bank_response,FAQScore
0,"{ ""title"": ""Giảm 20% mua sắm tại Aeon Mall cùng thẻ BIDV JCB"", ""co...","Customer: còn ctkm nào cho thẻ JCB không BIDV: Dạ, hiện tại BIDV đ...",AEON Hải Phòng không đc áp dụng à,Chương trình ưu đãi giảm 20% khi mua sắm tại Aeon Mall áp dụng cho...,<think> 1. **Understand the Inquiry**: The customer is asking if t...,"Dạ, chương trình khuyến mãi giảm 20% khi mua sắm tại Aeon Mall áp ...",✔️ [0.500]
1,"{ ""Description"": ""Gồm các loại phí khi sử dụng thẻ ghi nợ quốc tế ...",Customer: Sao tài khoản mình bị trừ phí thường niên thẻ tháng 12. ...,Phí rút tiền thẻ Mastercard tại nước ngoài,- Phí rút tiền mặt tại ATM/POS nước ngoài cho thẻ ghi nợ quốc tế M...,<think> 1. **Identify the Inquiry**: The customer is asking about ...,"Dạ, về phí rút tiền mặt bằng thẻ Mastercard tại nước ngoài, BIDV á...",✔️ [0.400]
2,"{ ""Description"": ""Gồm LÃI SUẤT và các loại phí khi sử dụng THẺ TI...","Customer: tôi mới mở thẻ JCB, cần chi tiêu bao nhiêu thì được miễn...","tôi mới mở thẻ JCB, cần chi tiêu bao nhiêu thì được miễn phí thườn...",Doanh số chi tiêu để miễn PTN năm đầu Thẻ JCB Ultimate Từ 10.000.0...,<think> 1. **Identify the Inquiry**: The customer is asking about ...,"Dạ, hiện tại em không có thông tin cụ thể về điều kiện miễn phí th...",



Model average score: 62.1500 on 152 samples


In [371]:
eval_examples = dataset.dev

# Create the evaluator
evaluator = Evaluate(
    devset=eval_examples,
    metric=metric,
    display_progress=True,
    display_table=True,
    return_all_scores=True,
    return_outputs=True,     
    num_threads=4
)
# Run evaluation of the baseline model
evaluation_result = evaluator(base_module)


  0%|          | 0/152 [00:00<?, ?it/s]

Average Metric: 110.68 / 152 (72.8%): 100%|██████████| 152/152 [00:00<00:00, 288.36it/s]

2025/06/17 18:08:58 INFO dspy.evaluate.evaluate: Average Metric: 110.68070468585765 / 152 (72.8%)





Unnamed: 0,context_extracted,chat_history_extracted,question,example_bank_response,reasoning,pred_bank_response,FAQScore
0,"{ ""title"": ""Giảm 20% mua sắm tại Aeon Mall cùng thẻ BIDV JCB"", ""co...","Customer: còn ctkm nào cho thẻ JCB không BIDV: Dạ, hiện tại BIDV đ...",AEON Hải Phòng không đc áp dụng à,Chương trình ưu đãi giảm 20% khi mua sắm tại Aeon Mall áp dụng cho...,<think> - The customer is inquiring about the applicability of the...,"Dạ, chương trình khuyến mãi giảm 20% khi mua sắm tại Aeon Mall áp ...",✔️ [1.000]
1,"{ ""Description"": ""Gồm các loại phí khi sử dụng thẻ ghi nợ quốc tế ...",Customer: Sao tài khoản mình bị trừ phí thường niên thẻ tháng 12. ...,Phí rút tiền thẻ Mastercard tại nước ngoài,- Phí rút tiền mặt tại ATM/POS nước ngoài cho thẻ ghi nợ quốc tế M...,<think> - The inquiry is about the withdrawal fee for Mastercard a...,"Dạ, về phí rút tiền mặt tại ATM nước ngoài cho thẻ Mastercard của ...",✔️ [0.667]
2,"{ ""Description"": ""Gồm LÃI SUẤT và các loại phí khi sử dụng THẺ TI...","Customer: tôi mới mở thẻ JCB, cần chi tiêu bao nhiêu thì được miễn...","tôi mới mở thẻ JCB, cần chi tiêu bao nhiêu thì được miễn phí thườn...",Doanh số chi tiêu để miễn PTN năm đầu Thẻ JCB Ultimate Từ 10.000.0...,<think> - The customer is inquiring about the annual fee waiver co...,"Dạ, hiện tại em không có thông tin cụ thể về điều kiện miễn phí th...",
3,"{ ""title"": ""Tặng Voucher 100.000 VND khi mua sắm tại Lotte Departm...","Customer: còn ctkm nào cho thẻ JCB không BIDV: Dạ, hiện tại BIDV đ...",vậy là hết hạn rồi à,"Dạ, em xin lỗi vì hiện tại em không có thông tin cập nhật về chươn...",<think> - The customer is inquiring about the expiration of a prom...,"Dạ, chương trình ưu đãi tặng voucher mua sắm trị giá 100.000 VND k...",✔️ [0.500]
4,"{ ""question"": ""Thanh toán thẻ qua Mã phản hồi nhanh (Quick Respons...",Customer: thẻ nội địa với thẻ quốc tế thẻ nào phí thường niên thấp...,BIDV có dịch vụ thanh toán bằng QR code qua thẻ tín dụng không?,"Dạ, BIDV hiện chưa hỗ trợ dịch vụ thanh toán bằng mã QR code qua t...","<think> - Đầu tiên, tôi cần xác định xem BIDV có cung cấp dịch vụ ...","Dạ, hiện tại BIDV không hỗ trợ dịch vụ thanh toán bằng QR code cho...",✔️ [1.000]
...,...,...,...,...,...,...,...
147,"{ ""question"": ""Đơn vị chấp nhận thẻ (viết tắt là ĐVCNT), đơn vị ch...",Customer: Sao tài khoản mình bị trừ phí thường niên thẻ tháng 12. ...,alo,"Xin chào, em có thể giúp gì cho Anh/Chị ạ?","<think> - The customer has simply said ""alo,"" which does not provi...","Dạ, chào Anh/Chị! Em có thể hỗ trợ gì cho Anh/Chị hôm nay ạ? Nếu c...",✔️ [0.750]
148,"{ ""Description"": ""Gồm các loại phí khi sử dụng dịch vụ ngân hàng đ...",Customer: Sao tài khoản mình bị trừ phí thường niên thẻ tháng 12. ...,Cho mình hỏi đăng ký dịch vụ OTT của ngân hàng BIDV hàng tháng có ...,Dịch vụ OTT (nhắn tin qua Smartbanking) của BIDV là miễn phí hàng ...,"<think> - Đầu tiên, tôi cần xác định thông tin về dịch vụ OTT (nhắ...","Dạ, về dịch vụ OTT (nhắn tin qua SmartBanking) của ngân hàng BIDV,...",✔️ [1.000]
149,"{ ""question"": ""Khách hàng mục tiêu, Điều kiện mở Thẻ BIDV JC...",,Công dụng của thẻ jcb well-being,"Dạ, thẻ BIDV JCB Well-being có các công dụng và ưu đãi như sau: Kh...",<think> - The inquiry is about the functions and benefits of the B...,"Dạ, thẻ BIDV JCB Well-being có nhiều công dụng và lợi ích hấp dẫn ...",✔️ [0.900]
150,"{ ""Description"": ""Gồm các loại phí khi sử dụng thẻ ghi nợ quốc tế ...",[{'content': 'Tôi dùng thẻ JCB của MB bank rút tiền trên cây bidv ...,dạ cho mình hỏi mình muốn làm thẻ gnqt BIDV để đăng kí paypal thì ...,Thẻ ghi nợ quốc tế có phí phát hành là 50.000 đồng đối với thẻ Rea...,<think> Current state: + Discussed card name: Thẻ ghi nợ quốc tế (...,"Dạ, để mở thẻ ghi nợ quốc tế của BIDV, anh/chị sẽ không phải trả p...",✔️ [1.000]


In [None]:
# rows = evaluation_result[1]  # List of rows
# df = pd.DataFrame(rows)      # Convert list to DataFrame
# df.to_excel("evaluation_result.xlsx", index=False)


## Auto Optimization

In [None]:
# Set up the dataset and metric
prompt_of_field_to_optimize = "bank_response"
semantic = True

dataset = FAQDataset(
    data_path="/Users/nami-macos/Documents/GitHub/BIDV-DSPy-Optimization/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain 1706.csv",
    selected_output_field=prompt_of_field_to_optimize,
)
metric = FAQScore(
    selected_output_field=prompt_of_field_to_optimize, 
    semantic=semantic
)

# Create and run the optimizer
config = OptimizationConfig(
    student_model="gpt-4o-mini",
    teacher_model="gpt-4o",
    max_bootstrapped_demos=0,
    max_labeled_demos=0,
    output_path="faq_optimized_prompt.json"
)

optimizer = FAQPromptOptimizer(config)
optimized_program = optimizer.optimize(dataset, metric)

In [None]:
# prompt_of_field_to_optimize = "bank_response"  # Instead of "bank_response"
# semantic = True

# dataset = FAQDataset(
#     data_path="/Users/nami-macos/Documents/BIDV-1/Testset FAQ_multiturn_FAQ_Chain - FAQ Chain filtered.csv",
#     selected_output_field=prompt_of_field_to_optimize,
# )
# metric = FAQScore(
#     selected_output_field=prompt_of_field_to_optimize, semantic=semantic
# )

# # Set up the optimization program
# program = dspy.ChainOfThought(FAQProgSig)

# teleprompter = MIPROv2(
#     metric=metric, 
#     max_bootstrapped_demos=0, 
#     max_labeled_demos=0, 
#     auto='medium',
#     # num_threads=10
#     # num_candidates=5
#     )
# optimized_program = teleprompter.compile(
#     program,
#     trainset=dataset.train,
#     valset=dataset.dev,
#     requires_permission_to_run=False,
#     # num_trials=7       
# )

# # Save the optimized prompt to a file
# optimized_program.save("faq_optimized_prompt.json")