In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path
from openai import AzureOpenAI

In [13]:
import os
import json
import shutil
from openai import AzureOpenAI

def azure_openai_model_for_optimizations(intent, utterances):
    endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
    deployment = "gpt-4o"
    subscription_key = os.environ["AZURE_OPENAI_API_KEY"]

    client = AzureOpenAI(
        azure_endpoint=endpoint,
        api_key=subscription_key,
        api_version="2024-05-01-preview",
    )

    # Prompt
    chat_prompt = [
        {
            "role": "system",
            "content": (
                "You are an expert language model trained to optimize and consolidate user utterances for NLU systems. "
                "Your task is to analyze a list of user utterances associated with a specific intent and generate a minimized, "
                "high-quality list of optimized utterances while preserving full intent coverage. "
                "Return only the list in this format: [\"Utterance1\", \"Utterance2\", ...]"
            ),
        },
        {
            "role": "user",
            "content": f"""Optimize the following utterances for the intent: '{intent}'.

Here is the list of input utterances:
{utterances}

Return the optimized utterance list in plain JSON array format only: [ ... ]""",
        },
    ]

    completion = client.chat.completions.create(
        model=deployment,
        messages=chat_prompt,
        max_tokens=800,
        temperature=0.7,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stream=False,
    )

    # Parse GPT response as JSON
    response_text = completion.choices[0].message.content.strip()
    try:
        optimized = json.loads(response_text)
    except json.JSONDecodeError:
        print(f"[Warning] Could not parse GPT response for intent: {intent}")
        optimized = utterances  # Fallback to original if parsing fails

    return optimized


def update_qna_questions_with_backup(json_file_path):
    base, ext = os.path.splitext(json_file_path)
    copy_file_path = f"{base}_copy{ext}"
    shutil.copy(json_file_path, copy_file_path)

    with open(copy_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    for entry in data.get("qna", []):
        qid = entry.get("qid")
        questions = entry.get("q", [])
        print(f"\n[Original Qs for {qid}]: {len(questions)}")
        try:
            optimized_questions = azure_openai_model_for_optimizations(qid, questions)
            print(f"[Optimized Qs for {qid}]: {len(optimized_questions)}")
            entry["q"] = optimized_questions
        except Exception as e:
            print(f"[Error optimizing QID {qid}]: {e}")
            entry["q"] = questions  # fallback

    updated_file_path = f"{base}_updated{ext}"
    with open(updated_file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

    return updated_file_path


# Example call
update_qna_questions_with_backup("data/sample_qna_test.json")


[Original Qs for 001_Test_Intent_A]: 3
[Optimized Qs for 001_Test_Intent_A]: 2

[Original Qs for 002_Test_Intent_B]: 3
[Optimized Qs for 002_Test_Intent_B]: 3

[Original Qs for 003_Test_Intent_C]: 2
[Optimized Qs for 003_Test_Intent_C]: 2

[Original Qs for 004_Test_Intent_D]: 3
[Optimized Qs for 004_Test_Intent_D]: 3

[Original Qs for 005_Test_Intent_E]: 3
[Optimized Qs for 005_Test_Intent_E]: 3


'data/sample_qna_test_updated.json'