In [1]:
# Import required libraries
import os
from openai import OpenAI
import json
from collections import Counter
import time
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Set default model
MODEL = "gpt-4o-mini"

print("‚úÖ Setup complete! OpenAI client initialized.")

‚úÖ Setup complete! OpenAI client initialized.


In [2]:
def call_openai(prompt, system_message="You are a helpful assistant.", temperature=0.7, max_tokens=None, seed=None):
    """Helper function to call OpenAI API with specified parameters"""
    try:
        params = {
            "model": MODEL,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}
            ],
            "temperature": temperature
        }
        
        if max_tokens:
            params["max_tokens"] = max_tokens
        if seed is not None:
            params["seed"] = seed
            
        response = client.chat.completions.create(**params)
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

def display_response(title, response, params=None):
    """Display a response with formatting"""
    print(f"\n{'='*60}")
    print(f"üìù {title}")
    if params:
        print(f"Parameters: {params}")
    print(f"{'='*60}")
    print(response)
    print(f"{'='*60}\n")

def count_tokens_approx(text):
    """Approximate token count (roughly 4 characters per token)"""
    return len(text) // 4

print("‚úÖ Helper functions loaded!")

‚úÖ Helper functions loaded!


In [4]:
import pandas as pd

# --- Configuration ---
prompt = "I ordered item #12345 on March 15th. The delivery was fast but the packaging was damaged."
system_message = "Extract the following information and return as JSON: order_id, order_date, delivery_speed, packaging_condition."
temperature = 0.7
num_runs = 15

responses_data = []

print(f"üöÄ Starting 15-run test for Data Extraction prompt...")
print(f"Prompt: '{prompt}'\n")
print("="*80)

# 1. Collection Loop with Auto-Evaluation
for i in range(num_runs):
    print(f"Executing Run {i+1}/{num_runs}...")
    response = call_openai(prompt, system_message=system_message, temperature=temperature)
    
    # Auto-evaluate: check if response contains expected JSON fields
    is_valid = all(field in response.lower() for field in ['order_id', 'order_date', 'delivery', 'packaging'])
    
    responses_data.append({
        "run_id": i + 1,
        "prompt": prompt,
        "response": response,
        "is_success": is_valid,
        "failure_pattern": "None (Success)" if is_valid else "Missing Fields"
    })
    
    print(f"Response: {response[:100]}...")
    print(f"Valid: {'‚úÖ' if is_valid else '‚ùå'}")
    print("-"*40)
    time.sleep(0.5)

# 2. Create DataFrame and Save
df = pd.DataFrame(responses_data)
df.to_json("failure_analysis.json", orient="records", indent=4)
df.to_csv("failure_analysis_table.csv", index=False)

print("\n" + "="*60)
print("üìä FINAL FAILURE ANALYSIS TABLE")
print("="*60)
print(df[['run_id', 'is_success', 'failure_pattern']])

# Summary
print("\n" + "="*60)
print("üìà SUMMARY STATISTICS")
print("="*60)
success_rate = df['is_success'].sum() / len(df) * 100
print(f"Total runs: {len(df)}")
print(f"Successes: {df['is_success'].sum()}")
print(f"Failures: {len(df) - df['is_success'].sum()}")
print(f"Success Rate: {success_rate:.1f}%")

# Failure pattern breakdown
print("\nüìã FAILURE PATTERNS BREAKDOWN:")
print(df['failure_pattern'].value_counts().to_string())

# Display full responses table
print("\n" + "="*60)
print("üìù FULL RESPONSES TABLE")
print("="*60)
pd.set_option('display.max_colwidth', 100)
print(df[['run_id', 'response', 'is_success', 'failure_pattern']].to_string())

print(f"\nüìÅ Results saved to failure_analysis.json and failure_analysis_table.csv")

# Also display as formatted DataFrame (for Jupyter)
df[['run_id', 'response', 'is_success', 'failure_pattern']]

üöÄ Starting 15-run test for Data Extraction prompt...
Prompt: 'I ordered item #12345 on March 15th. The delivery was fast but the packaging was damaged.'

Executing Run 1/15...
Response: ```json
{
  "order_id": "12345",
  "order_date": "March 15th",
  "delivery_speed": "fast",
  "packag...
Valid: ‚úÖ
----------------------------------------
Executing Run 2/15...
Response: ```json
{
  "order_id": "12345",
  "order_date": "2023-03-15",
  "delivery_speed": "fast",
  "packag...
Valid: ‚úÖ
----------------------------------------
Executing Run 3/15...
Response: ```json
{
  "order_id": "12345",
  "order_date": "March 15th",
  "delivery_speed": "fast",
  "packag...
Valid: ‚úÖ
----------------------------------------
Executing Run 4/15...
Response: ```json
{
  "order_id": "12345",
  "order_date": "March 15th",
  "delivery_speed": "fast",
  "packag...
Valid: ‚úÖ
----------------------------------------
Executing Run 5/15...
Response: ```json
{
  "order_id": "12345",
  "order_date": "2023-03-1

Unnamed: 0,run_id,response,is_success,failure_pattern
0,1,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
1,2,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""2023-03-15"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
2,3,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
3,4,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
4,5,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""2023-03-15"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
5,6,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
6,7,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
7,8,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
8,9,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
9,10,"```json\n{\n ""order_id"": ""12345"",\n ""order_date"": ""March 15th"",\n ""delivery_speed"": ""fast"",\n...",True,None (Success)
