## Batch Retrieval Process Begin

In [1]:
import os
from openai import AzureOpenAI
import json
import pandas as pd
from typing import List, Any
import sys
import sqlite3
from datetime import datetime
import re

In [2]:
client = AzureOpenAI(
        azure_endpoint="https://voicecast-gpt-france.openai.azure.com/",
        api_key=os.environ["OPENAI_API_KEY"],
        api_version="2025-01-01-preview",
    )

In [3]:
def get_tracking_db_connection(name: str) -> Any:

    print("*" * 8, "Check DB Connection")
    try:
        conn = sqlite3.connect(f'{name}.db')
        cursor = conn.cursor()
        cursor.execute(f"""
                SELECT count(name) 
                FROM sqlite_master 
                WHERE type='table' AND name='{name}';
            """)
        
        if cursor.fetchone()[0] == 0:
            return None
        
        else:
            return cursor

    except Exception as e:
        print(e)
        return None

In [4]:
def get_batchRequests(TRACK_DB: str, cursor) -> pd.DataFrame:
    print("*" * 8, "Get Requests to Track")
    return pd.read_sql_query(f"Select * from {TRACK_DB} where tracking_reference = 'TRACK'", cursor.connection)

In [5]:
def retrieve_batch_completions(output_file_id: str) -> List[Any]:

    print("*" * 8, "Retrieve Batch Completions")
    result_list = []
    file_response = client.files.content(output_file_id)
    raw_responses = file_response.text.strip().split('\n')  

    for raw_response in raw_responses:  
        json_response = json.loads(raw_response)  
        result_list.append(json_response)

    return result_list

In [6]:
def json_result_decode(json_result_set: List[Any]) -> List[List[Any]]:

    print("*" * 8, "Creating structured Records")    
    completion_list = []
    for completion in json_result_set:
        sender_id = completion["custom_id"]
        content = completion['response']['body']['choices'][0]['message']['content']
        claned_content = re.sub(r',\s*([}\]])', r'\1', content)
        json_completion = json.loads(claned_content)
        score = json_completion["score"]
        created_date = str(datetime.fromtimestamp(completion['response']['body']['created']))
        feedback_text = json_completion['reasoning']
        label = json_completion["satisfaction_label"]
        completion_list.append([sender_id, created_date, score, label, feedback_text])

    return completion_list

In [7]:
def check_resultant_db_connection(db_name: str):

    print("*" * 8, "Check DB Connection")
    try:
        conn = sqlite3.connect(f'{db_name}.db')
        cursor = conn.cursor()
        cursor.execute(f"""
                SELECT count(name) 
                FROM sqlite_master 
                WHERE type='table' AND name='{db_name}';
            """)
        
        if cursor.fetchone()[0] == 0:
            return None
        
        else:
            return cursor

    except Exception as e:
        return None

In [8]:
def create_resultant_db(db_name: str):

    print("*" * 8, "Create DB")
    # Connect to SQLite database (or create it)
    conn = sqlite3.connect(f'{db_name}.db')
    cursor = conn.cursor()

    cursor.execute(f'''
        CREATE TABLE IF NOT EXISTS {db_name} (
            senderId TEXT,
            creation_time TEXT,
            score TEXT,
            label TEXT,
            feedback_text TEXT
        )
    ''')

    return cursor

In [9]:
def append_records(cursor, RESULT_DB, completion_results: List[List[Any]]):

    print("*" * 8, "Appending Retreived Results")
    df_temp = pd.DataFrame(completion_results, columns=["senderId", "creation_time", "score", "label", "feedback_text"])
    df_temp.to_sql(name=RESULT_DB, con=cursor.connection, if_exists="append", index=False)

    cursor.connection.commit()
    return True

In [10]:
def update_track_status(cursor, db_name: str, batchId: str):

    print("*" * 8, "Update Track Status")
    cursor.execute(f'''
        UPDATE {db_name} 
        SET tracking_reference = "Completed",
        job_status = "Completed"
        where batch_id = '{batchId}'
    ''')

    cursor.connection.commit()
    return True

In [11]:
TRACK_DB = "track_status"
RESULT_DB = "scores"

cursor_tracking = get_tracking_db_connection(TRACK_DB)
if not cursor_tracking:
    sys.exit("No Connection Found")
    
df = get_batchRequests(TRACK_DB, cursor_tracking)
cursor_scores = check_resultant_db_connection(RESULT_DB)
if not cursor_scores:
    cursor_scores = create_resultant_db(RESULT_DB)

******** Check DB Connection
******** Get Requests to Track
******** Check DB Connection


In [12]:
for batch_id, job_status, creation_time, tracking_reference in df.values:

    batch_response = client.batches.retrieve(batch_id)
    if batch_response.status == "completed":
        output_file_id = batch_response.output_file_id
        
        if output_file_id:
            result_set = retrieve_batch_completions(output_file_id)
            completion_results = json_result_decode(result_set)
            status = append_records(cursor_scores, RESULT_DB, completion_results)
            
            if status:
                update_track_status(cursor_tracking, TRACK_DB, batch_id)
    
    break

******** Retrieve Batch Completions
******** Creating structured Records
******** Appending Retreived Results
******** Update Track Status


In [36]:
TRACK_DB = "track_status"
conn_tracking = sqlite3.connect(f'{TRACK_DB}.db')
cursor_tracking = conn_tracking.cursor()

In [32]:
cursor_tracking.execute(f'''
        UPDATE {TRACK_DB}
        SET tracking_reference = "TRACK",
        job_status = "validating"
    ''')

<sqlite3.Cursor at 0x10d91dac0>

In [33]:
conn_tracking.commit()

In [37]:
pd.read_sql_query(f"Select * from {TRACK_DB}", cursor_tracking.connection)

Unnamed: 0,batch_id,job_status,creation_time,tracking_reference
0,batch_00baf5e7-c184-46aa-9e81-b79d43cccc6b,Completed,1751996729,Completed
1,batch_12bd4672-183e-4ab7-a486-7a71c186951d,Completed,1751996734,Completed
2,batch_d6c25594-6c5d-4b7a-bd25-e7de10e7ed5e,Completed,1751996740,Completed
3,batch_bbfe2fbd-6cbc-4f72-aa31-d7be2f351436,Completed,1751996750,Completed
4,batch_fbf8b159-bac6-4731-b2e1-f790902f2316,validating,1752128766,TRACK


In [35]:
conn_tracking.close()

In [29]:
RESULT_DB = "scores"
conn_scores = sqlite3.connect(f'{RESULT_DB}.db')
cursor_scores = conn_scores.cursor()

In [30]:
pd.read_sql_query(f"Select * from {RESULT_DB}", cursor_scores.connection)

Unnamed: 0,senderId,creation_time,score,label,feedback_text
0,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:46,4,Satisfied,Positive:\n• The bot clearly understood the us...
1,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:46,4,Satisfied,Positive:\n• The chatbot guided the user throu...
2,yCCKo0asCCrhjWVvgCGQw,2025-07-09 00:00:01,2,Dissatisfied,Positive:\n• The bot provided some basic troub...
3,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:16,4,Satisfied,Positive:\n• The chatbot provided a relevant p...
4,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:16,4,Satisfied,Positive:\n• The bot correctly understood the ...
5,yCCKo0asCCrhjWVvgCGQw,2025-07-08 23:59:16,2,Dissatisfied,Positive:\n• The bot provided stepwise trouble...
6,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot understood the user's int...
7,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot quickly recognized the us...
8,yCCKo0asCCrhjWVvgCGQw,2025-07-08 23:59:15,2,Dissatisfied,Positive:\n• The bot attempted troubleshooting...
9,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:46,4,Satisfied,Positive:\n• The bot clearly understood the us...


In [25]:
cursor_scores.connection.close()

In [14]:
pd.read_sql_query(f"Select * from {RESULT_DB}", cursor_scores.connection)

Unnamed: 0,senderId,creation_time,score,label,feedback_text
0,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot understood the user's int...
1,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot quickly recognized the us...
2,yCCKo0asCCrhjWVvgCGQw,2025-07-08 23:59:15,2,Dissatisfied,Positive:\n• The bot attempted troubleshooting...
3,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot understood the user's int...
4,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot quickly recognized the us...
5,yCCKo0asCCrhjWVvgCGQw,2025-07-08 23:59:15,2,Dissatisfied,Positive:\n• The bot attempted troubleshooting...
6,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot understood the user's int...
7,Ny7i23GjoezOA_h6NjwIK,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot quickly recognized the us...
8,yCCKo0asCCrhjWVvgCGQw,2025-07-08 23:59:15,2,Dissatisfied,Positive:\n• The bot attempted troubleshooting...
9,btLyma2P7Yq2Owe9R5O17,2025-07-08 23:59:15,4,Satisfied,Positive:\n• The bot understood the user's int...


In [263]:
completion_results

[['btLyma2P7Yq2Owe9R5O17', '2025-07-08 23:59:15', 4, 'Satisfied', "Positive:\n• The bot understood the user's intent quickly and routed the conversation effectively using button selections, offering a smooth experience.\n• The bot provided clear instructions and helpful links to resolve the SMIME certificate issue, including a separate resource for a special case (VS-NfD Service cancellation).\n• There were no unnecessary repetitions or confusion in the conversation flow, and next steps were logically presented.\n• Empathetic check-in at the end to confirm whether the issue was resolved shows intent to follow through.\nNegative:\n• The conversation could have benefited from a slightly warmer or more personalized touch from the bot (the tone was more informational than empathetic).\n• There was no explicit confirmation that the user's problem was solved, as the bot's last prompt expects a reply from the user.\n"], ['Ny7i23GjoezOA_h6NjwIK', '2025-07-08 23:59:15', 4, 'Satisfied', "Positiv

In [259]:
completion_results = [[]]

In [261]:
len(completion_results)

1

In [260]:
if completion_results:
    print("hi")

hi


In [224]:
sample_result = json.loads(result_set[0]['response']['body']['choices'][0]['message']['content'])#.keys()

In [251]:
sample_result

{'score': 4, 'satisfaction_label': 'Satisfied', 'reasoning': "Positive:\n• The bot understood the user's intent quickly and routed the conversation effectively using button selections, offering a smooth experience.\n• The bot provided clear instructions and helpful links to resolve the SMIME certificate issue, including a separate resource for a special case (VS-NfD Service cancellation).\n• There were no unnecessary repetitions or confusion in the conversation flow, and next steps were logically presented.\n• Empathetic check-in at the end to confirm whether the issue was resolved shows intent to follow through.\nNegative:\n• The conversation could have benefited from a slightly warmer or more personalized touch from the bot (the tone was more informational than empathetic).\n• There was no explicit confirmation that the user's problem was solved, as the bot's last prompt expects a reply from the user.\n"}

In [245]:
from datetime import datetime

In [248]:
str(datetime.fromtimestamp(completion['response']['body']['created']).date())

'2025-07-08'

In [234]:
result_set[0]['response'].keys()

dict_keys(['body', 'request_id', 'status_code'])

In [204]:
df

Unnamed: 0,batch_id,job_status,creation_time,tracking_reference
0,batch_00baf5e7-c184-46aa-9e81-b79d43cccc6b,validating,1751996729,TRACK
1,batch_12bd4672-183e-4ab7-a486-7a71c186951d,validating,1751996734,TRACK
2,batch_d6c25594-6c5d-4b7a-bd25-e7de10e7ed5e,validating,1751996740,TRACK
3,batch_bbfe2fbd-6cbc-4f72-aa31-d7be2f351436,validating,1751996750,TRACK


In [165]:
for batchid, job_status, creationTime, tracking_status in request_file.values:
    if tracking_status == "Track":
        batch_response = client.batches.retrieve(batchid)
        if batch_response.status == "completed":
            output_file_id = batch_response.output_file_id
            if output_file_id:
                result_set = retrieve_batch_copletions(output_file_id)


batch_31022594-dad3-4517-a33c-b4042f8a4463 validating 1751823184 Track


In [169]:
json_response.keys()

dict_keys(['custom_id', 'response', 'error'])

In [170]:
raw_responses

['{"custom_id": "btLyma2P7Yq2Owe9R5O17", "response": {"body": {"choices": [{"content_filter_results": {"hate": {"filtered": false, "severity": "safe"}, "protected_material_code": {"filtered": false, "detected": false}, "protected_material_text": {"filtered": false, "detected": false}, "self_harm": {"filtered": false, "severity": "safe"}, "sexual": {"filtered": false, "severity": "safe"}, "violence": {"filtered": false, "severity": "safe"}}, "finish_reason": "stop", "index": 0, "logprobs": null, "message": {"annotations": [], "content": "{\\n  \\"score\\": 4,\\n  \\"reasoning\\": \\"Positive:\\\\n• The bot correctly identified the user\'s need (Zertifikat auf dem Handy) and routed them through clear button-based choices related to SMIME-Zertifikate.\\\\n• Provided specific, relevant knowledge articles with direct links on how to request/retrieve the SMIME certificate.\\\\n• Bot checked for successful resolution at the end by asking if the user can now encrypt/decrypt emails.\\\\n• Clear

In [172]:
import sqlite3

In [174]:
conn = sqlite3.connect('track_status.db')

In [177]:
cursor = conn.cursor()

In [187]:
cursor.execute(f""" 
SELECT count(name) FROM sqlite_master WHERE type='table' AND name='track_status';
""")

<sqlite3.Cursor object at 0x11c680540>

In [188]:
cursor.fetchone()

(1,)

In [189]:
cursor.connection

<sqlite3.Connection object at 0x11c6b66b0>

In [190]:
conn

<sqlite3.Connection object at 0x11c6b66b0>

In [None]:
conn.commit()

## Batching Exploration Begin

In [44]:
import os
from utils.llm_operations_langchain import prompt_exm, client, parser
from utils.data_processing import get_data, set_slot_processing
from openai import AzureOpenAI
import json

In [23]:
client = AzureOpenAI(
        azure_endpoint="https://voicecast-gpt-france.openai.azure.com/",
        api_key=os.environ["OPENAI_API_KEY"],
        api_version="2025-01-01-preview",
    )

deployment_url = "https://voicecast-gpt-france.openai.azure.com/openai/deployments/gpt-4.1-batch/chat/completions?api-version=2024-12-01-preview"

In [24]:
def create_completion(content, model="gpt-4.1"):
    system_msg = "You are a helpful assistant who know English and German Language"
    response = client.chat.completions.create(model=model, 
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": content},
        ],
        temperature = 0
    )
    return response.choices[0].message.content

In [49]:
def create_batch_yaml(prompt, sender_id):
    
    return {
    "custom_id": sender_id,
    "method": "POST",
    "url": "/v1/chat/completions",
    "body": {
        "model": "gpt-4.1-batch",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant who know English and German Language"},
            {"role": "user", "content": prompt}
            ]
        }
    }


In [50]:
def process_chats(sender_id, df):
    data_sample = df[df['sender_id'] == sender_id]
    data_sample = data_sample.sort_values(by = ["conversation time"])
    conversation_start_time = data_sample['conversation time'].min()
    conversation_end_time = data_sample['conversation time'].max()
    
    df_vals = data_sample[data_sample["type_name"].isin(["user", "bot"])][["type_name", "text"]].values
    conversation = ""
    for speaker, text in df_vals:
        text = text.strip("\n").strip()    
        if "/SetSlots" in text:
            text = set_slot_processing(text = text)
        conversation += f"{speaker}: {text}\n" 
        
    prompt_final = prompt_exm.format(chat_transcript = conversation, 
                        format_instruction=parser.get_format_instructions())

    batch_request_sample = create_batch_yaml(prompt = prompt_final, sender_id = sender_id)
    # sample_result = create_completion(prompt_final)
    return batch_request_sample 
    # sample_result = chain.invoke({"chat_transcript": conversation})
    # asyncio.sleep(1)
    # return [sender_id, conversation_start_time, conversation_end_time, sample_result['score'], 
    #         sample_result["reasoning"], sample_result["satisfaction_label"]]

In [51]:
request_list = []
with open("messages.jsonl", "w") as f:
    for sender in data.sender_id.unique():
        request_json = process_chats(sender_id=sender, df=data)
        json.dump(request_json, f)
        f.write("\n")

In [52]:
file = client.files.create(
  file=open("messages.jsonl", "rb"), 
  purpose="batch"
)

In [123]:
file.model_dump()

{'id': 'file-ee1aa3ac317a44258c136e6be0b00040', 'bytes': 15264, 'created_at': 1751743680, 'filename': 'messages.jsonl', 'object': 'file', 'purpose': 'batch', 'status': 'processed', 'expires_at': None, 'status_details': None}

In [54]:
print(file.model_dump_json(indent=2))
file_id = file.id

{
  "id": "file-ee1aa3ac317a44258c136e6be0b00040",
  "bytes": 15264,
  "created_at": 1751743680,
  "filename": "messages.jsonl",
  "object": "file",
  "purpose": "batch",
  "status": "processed",
  "expires_at": null,
  "status_details": null
}


In [55]:
batch_response = client.batches.create(
    input_file_id=file_id,
    endpoint="/chat/completions",
    completion_window="24h",
)

## Batching Process ends

In [126]:
batch_response.model_dump()

{'id': 'batch_3e62a3ce-d7c4-417b-b078-e2044502734c', 'completion_window': '24h', 'created_at': 1751743797, 'endpoint': '/chat/completions', 'input_file_id': 'file-ee1aa3ac317a44258c136e6be0b00040', 'object': 'batch', 'status': 'completed', 'cancelled_at': None, 'cancelling_at': None, 'completed_at': 1751744307, 'error_file_id': None, 'errors': None, 'expired_at': None, 'expires_at': 1751830197, 'failed_at': None, 'finalizing_at': 1751744220, 'in_progress_at': 1751744049, 'metadata': None, 'output_file_id': 'file-b88d2941-a94a-4666-89d8-05f48c10b055', 'request_counts': {'completed': 2, 'failed': 0, 'total': 2}}

In [56]:
batch_id = batch_response.id

In [None]:
batch_response.model_

In [57]:
print(batch_response.model_dump_json(indent=2))

{
  "id": "batch_3e62a3ce-d7c4-417b-b078-e2044502734c",
  "completion_window": "24h",
  "created_at": 1751743797,
  "endpoint": "/chat/completions",
  "input_file_id": "file-ee1aa3ac317a44258c136e6be0b00040",
  "object": "batch",
  "status": "validating",
  "cancelled_at": null,
  "cancelling_at": null,
  "completed_at": null,
  "error_file_id": "",
  "errors": null,
  "expired_at": null,
  "expires_at": 1751830197,
  "failed_at": null,
  "finalizing_at": null,
  "in_progress_at": null,
  "metadata": null,
  "output_file_id": "",
  "request_counts": {
    "completed": 0,
    "failed": 0,
    "total": 0
  }
}


In [129]:
import pandas as pd

In [151]:
batch_response.to_dict()["status"]#.model_dump()

'completed'

In [146]:
batch_frame = batch_response.to_dict()

In [149]:
for key, values in batch_frame.items():
    
    vals = values
    col
    print(key, values)

id batch_3e62a3ce-d7c4-417b-b078-e2044502734c
completion_window 24h
created_at 1751743797
endpoint /chat/completions
input_file_id file-ee1aa3ac317a44258c136e6be0b00040
object batch
status completed
cancelled_at None
cancelling_at None
completed_at 1751744307
error_file_id None
errors None
expired_at None
expires_at 1751830197
failed_at None
finalizing_at 1751744220
in_progress_at 1751744049
metadata None
output_file_id file-b88d2941-a94a-4666-89d8-05f48c10b055
request_counts {'completed': 2, 'failed': 0, 'total': 2}


In [143]:
del batch_frame["request_counts"]

In [59]:
client.batches.retrieve(batch_id)

Batch(id='batch_3e62a3ce-d7c4-417b-b078-e2044502734c', completion_window='24h', created_at=1751743797, endpoint='/chat/completions', input_file_id='file-ee1aa3ac317a44258c136e6be0b00040', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1751744307, error_file_id=None, errors=None, expired_at=None, expires_at=1751830197, failed_at=None, finalizing_at=1751744220, in_progress_at=1751744049, metadata=None, output_file_id='file-b88d2941-a94a-4666-89d8-05f48c10b055', request_counts=BatchRequestCounts(completed=2, failed=0, total=2))

In [None]:
import time
import datetime 

status = "validating"
while status not in ("completed", "failed", "canceled"):
    time.sleep(60)
    batch_response = client.batches.retrieve(batch_id)
    status = batch_response.status
    print(f"{datetime.datetime.now()} Batch Id: {batch_id},  Status: {status}")

if batch_response.status == "failed":
    for error in batch_response.errors.data:  
        print(f"Error code {error.code} Message {error.message}")

In [152]:
batch_id = "batch_31022594-dad3-4517-a33c-b4042f8a4463"

In [153]:
batch_response = client.batches.retrieve(batch_id)

In [154]:
batch_response

Batch(id='batch_31022594-dad3-4517-a33c-b4042f8a4463', completion_window='24h', created_at=1751823184, endpoint='/chat/completions', input_file_id='file-3f66bbc19b0d4e309ed228e10e771e8c', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1751823783, error_file_id=None, errors=None, expired_at=None, expires_at=1751909584, failed_at=None, finalizing_at=1751823640, in_progress_at=1751823495, metadata=None, output_file_id='file-144fcbae-bb9c-4d7e-a70c-a6be9416553b', request_counts=BatchRequestCounts(completed=3, failed=0, total=3))

In [156]:
import json

output_file_id = batch_response.output_file_id

if not output_file_id:
    output_file_id = batch_response.error_file_id

if output_file_id:
    file_response = client.files.content(output_file_id)
    raw_responses = file_response.text.strip().split('\n')  

    for raw_response in raw_responses:  
        json_response = json.loads(raw_response)  
        formatted_json = json.dumps(json_response, indent=2)  
        print(formatted_json)

{
  "custom_id": "btLyma2P7Yq2Owe9R5O17",
  "response": {
    "body": {
      "choices": [
        {
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "protected_material_code": {
              "filtered": false,
              "detected": false
            },
            "protected_material_text": {
              "filtered": false,
              "detected": false
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
              "filtered": false,
              "severity": "safe"
            }
          },
          "finish_reason": "stop",
          "index": 0,
          "logprobs": null,
          "message": {
            "annotations": [],
            "content": "{\n  \"score\": 4,\n 

In [74]:
json_response["response"].keys()#["message"]

dict_keys(['body', 'request_id', 'status_code'])

In [82]:
json_response["response"]['body'].keys()

dict_keys(['choices', 'created', 'id', 'model', 'object', 'prompt_filter_results', 'system_fingerprint', 'usage'])

In [110]:
json.loads(json.loads(raw_responses[1])["response"]["body"]["choices"][0]["message"]["content"])["score"]

4

In [112]:
json.loads(json.loads(raw_responses[1])["response"]["body"]["choices"][0]["message"]["content"])['satisfaction_label']

'Satisfied'

In [115]:
print(json.loads(json.loads(raw_responses[1])["response"]["body"]["choices"][0]["message"]["content"])['reasoning'])

Positive:
• The chatbot understood the user's intent (certifikat anfordern) and guided them through specific MyCard topics efficiently.
• Provided multiple helpful hyperlinks to manuals and instructions for self-service.
• Shared troubleshooting advice and clarified what to do if initial attempts fail, which demonstrated good anticipation of user needs.
• The bot's tone was friendly and provided clear next steps for problem resolution.

Negative:
• There were several lengthy messages from the bot, potentially a bit overwhelming with multiple links (especially undistinguished ***** Benutzer), which may cause slight confusion.
• No explicit offer of escalation to a human if the steps don't help (though the conversation is not finished yet), but did ask if the solution helped.
• The process was smooth but could have been more concise—some instructions appeared verbose or slightly redundant between main text and links.



In [84]:
pprint(json_response["response"])

{'body': {'choices': [{'content_filter_results': {'hate': {'filtered': False,
                                                           'severity': 'safe'},
                                                  'protected_material_code': {'detected': False,
                                                                              'filtered': False},
                                                  'protected_material_text': {'detected': False,
                                                                              'filtered': False},
                                                  'self_harm': {'filtered': False,
                                                                'severity': 'safe'},
                                                  'sexual': {'filtered': False,
                                                             'severity': 'safe'},
                                                  'violence': {'filtered': False,
                                        

In [None]:


    for entry in data:
        json.dump(entry, f)
        f.write("\n")


In [31]:
create_batch_yaml(prompt= "", sender_id = "1")

{'custom_id': '1',
 'method': 'POST',
 'url': '/v1/chat/completions',
 'body': {'model': 'gpt-4.1',
  'messages': [{'role': 'system',
    'content': 'You are a helpful assistant who know English and German Language'},
   {'role': 'user', 'content': ''}]}}

In [13]:
sample_result = create_completion(sample_prompt)

In [14]:
import json

In [15]:
json.loads(sample_result)

{'score': 4,
 'satisfaction_label': 'Satisfied',
 'reasoning': "Positive:\n• The bot understood the user's intent from the start and provided relevant information about the MyCard certificate process.\n• It offered clear self-service instructions and direct links to documentation and troubleshooting steps.\n• The bot proactively explained possible issues with the new infrastructure and gave practical advice (retrying the process, removing the card before deletion, etc.).\n• The flow was logical and the bot checked if the solution helped, showing engagement.\nNegative:\n• The bot sent multiple consecutive messages, which could feel a bit overwhelming or cluttered.\n• There was some repetition in the information about the new portal and retrying, which could have been condensed.\n• The conversation ended with a check if the solution helped, but there was no confirmation of resolution yet (pending user response)."}

In [None]:
import requests
import json

# Replace with your actual values
deployment_url = "https://voicecast-gpt-france.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2024-12-01-preview"
api_key = os.environ["OPENAI_API_KEY"]

headers = {
    "Content-Type": "application/json",
    "api-key": api_key
}

payload = {
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Tell me a fun fact about space."}
    ],
    "temperature": 0.7,
    "max_tokens": 100
}

response = requests.post(deployment_url, headers=headers, json=payload)

if response.status_code == 200:
    result = response.json()
    print(result['choices'][0]['message']['content'])
else:
    print(f"Request failed: {response.status_code}")
    print(response.text)

In [11]:
print(sample_prompt)



You are evaluating a conversation between company employee and internal IT service desk chatbot. Chatbot is called CSD chatbot and it is supposed to help employees with their workplace related issues, be it HW, SW, account, password or similar issue. Chatbot can provide either self-service support with troubleshooting and resolution of the issue, navigate to proper manual via hyperlink, trigger ticket creation or involvement of human agent.

Your goal is to evaluate the interaction from the **user’s perspective**, as if you were the user. Assess how the user likely felt during the interaction, particularly how satisfied, frustrated, or neutral they were based on the responses they received.

Use the following criteria to assess the conversation quality:

---

Evaluation Criteria:
1.	User Sentiment:
	•	Look for signs of frustration, repetition, escalation, or unresolved issues.
	•	Be cautious about users who initially seem neutral but express dissatisfaction near the end.
	•	If the se

In [2]:
print(prompt_exm.format(chat_transcript = "A", 
                        format_instruction=parser.get_format_instructions()))



You are evaluating a conversation between company employee and internal IT service desk chatbot. Chatbot is called CSD chatbot and it is supposed to help employees with their workplace related issues, be it HW, SW, account, password or similar issue. Chatbot can provide either self-service support with troubleshooting and resolution of the issue, navigate to proper manual via hyperlink, trigger ticket creation or involvement of human agent.

Your goal is to evaluate the interaction from the **user’s perspective**, as if you were the user. Assess how the user likely felt during the interaction, particularly how satisfied, frustrated, or neutral they were based on the responses they received.

Use the following criteria to assess the conversation quality:

---

Evaluation Criteria:
1.	User Sentiment:
	•	Look for signs of frustration, repetition, escalation, or unresolved issues.
	•	Be cautious about users who initially seem neutral but express dissatisfaction near the end.
	•	If the se

In [2]:
os.getcwd()

'/Users/A118390615/Library/CloudStorage/OneDrive-DeutscheTelekomAG/Projects/CSD/adhocs/evaluation_framework/pipeline_architecture'