In [1]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import json

# Add shared utilities to path
sys.path.append('../shared')

from simulation_utils import SimulationConfig, run_batch_simulation
from binary_baseline_prompt import (
    generate_binary_personality_description, 
    get_binary_prompt,
    create_binary_participant_data
)

print("Binary baseline simulation utilities loaded successfully")


Binary baseline simulation utilities loaded successfully


In [2]:
# Load and examine the data structure
print("=== DEBUGGING DATA STRUCTURE ===")

# Load the original Soto dataset
data_path = Path('../../raw_data/Soto_data.xlsx')
if not data_path.exists():
    print(f"Error: Data file not found at {data_path}")
    exit()

data = pd.read_excel(data_path)
print(f"Loaded {len(data)} participants from Soto dataset")

# Check ALL available columns
print(f"\nTotal columns: {len(data.columns)}")
print(f"All columns: {list(data.columns)}")

# Check specifically for BFI columns
bfi_cols = [col for col in data.columns if 'bfi' in col.lower()]
print(f"\nBFI-related columns found: {bfi_cols}")

# Check for domain score columns specifically
domain_cols = [col for col in data.columns if any(domain in col for domain in ['_e', '_a', '_c', '_n', '_o'])]
print(f"\nDomain score columns found: {domain_cols}")

# Show sample data for BFI domain columns
if len(bfi_cols) > 0:
    print(f"\nSample BFI data:")
    print(data[bfi_cols].head())
else:
    print("\nNo BFI columns found - showing first 10 columns:")
    print(data.iloc[:5, :10])


=== DEBUGGING DATA STRUCTURE ===
Loaded 470 participants from Soto dataset

Total columns: 704
All columns: ['case_id', 'age', 'sex', 'ethnicity', 'rel_acquaintance', 'rel_friend', 'rel_roommate', 'rel_boygirlfriend', 'rel_relative', 'rel_other', 'rel_description', 'bfi1', 'bfi2', 'bfi3', 'bfi4', 'bfi5', 'bfi6', 'bfi7', 'bfi8', 'bfi9', 'bfi10', 'bfi11', 'bfi12', 'bfi13', 'bfi14', 'bfi15', 'bfi16', 'bfi17', 'bfi18', 'bfi19', 'bfi20', 'bfi21', 'bfi22', 'bfi23', 'bfi24', 'bfi25', 'bfi26', 'bfi27', 'bfi28', 'bfi29', 'bfi30', 'bfi31', 'bfi32', 'bfi33', 'bfi34', 'bfi35', 'bfi36', 'bfi37', 'bfi38', 'bfi39', 'bfi40', 'bfi41', 'bfi42', 'bfi43', 'bfi44', 'bfi45', 'bfi46', 'bfi47', 'bfi48', 'bfi49', 'bfi50', 'bfi51', 'bfi52', 'bfi53', 'bfi54', 'bfi55', 'bfi56', 'bfi57', 'bfi58', 'bfi59', 'bfi60', 'pbfi1', 'pbfi2', 'pbfi3', 'pbfi4', 'pbfi5', 'pbfi6', 'pbfi7', 'pbfi8', 'pbfi9', 'pbfi10', 'pbfi11', 'pbfi12', 'pbfi13', 'pbfi14', 'pbfi15', 'pbfi16', 'pbfi17', 'pbfi18', 'pbfi19', 'pbfi20', 'pbfi21', 'p

In [3]:
# Create participant data with correct column names
print("=== CREATING PARTICIPANT DATA ===")

# Use the actual column names found in the data
participants_data = []

# First, let's identify the correct BFI domain columns
if 'bfi2_e' in data.columns:
    # Use BFI-2 format
    domain_columns = {
        'bfi2_e': 'bfi2_e',
        'bfi2_a': 'bfi2_a', 
        'bfi2_c': 'bfi2_c',
        'bfi2_n': 'bfi2_n',
        'bfi2_o': 'bfi2_o'
    }
    print("Using BFI-2 format columns")
elif 'bfi_e' in data.columns:
    # Use alternative BFI format
    domain_columns = {
        'bfi2_e': 'bfi_e',
        'bfi2_a': 'bfi_a', 
        'bfi2_c': 'bfi_c',
        'bfi2_n': 'bfi_n',
        'bfi2_o': 'bfi_o'
    }
    print("Using alternative BFI format columns")
else:
    # Need to find the correct columns
    print("ERROR: Cannot find standard BFI domain columns")
    print("Available columns that might be domain scores:")
    potential_cols = [col for col in data.columns if any(x in col.lower() for x in ['extra', 'agree', 'consc', 'neuro', 'open'])]
    print(potential_cols)
    
    # For now, let's stop and ask user to clarify
    raise Exception("Cannot identify BFI domain columns. Please check the data structure above.")

# Create participant data
for idx, row in data.iterrows():
    participant = {'participant_id': idx}
    
    # Map the data columns to the expected binary baseline format
    for expected_col, actual_col in domain_columns.items():
        if actual_col in data.columns:
            participant[expected_col] = row[actual_col]
        else:
            print(f"Warning: Column {actual_col} not found in data")
    
    participants_data.append(participant)

print(f"Created {len(participants_data)} participant records")

# Show sample participant data
print("\nSample participant data:")
for i in range(min(3, len(participants_data))):
    print(f"Participant {i+1}: {participants_data[i]}")

# Generate binary personality descriptions
print("\n=== GENERATING BINARY DESCRIPTIONS ===")
participants_with_binary = create_binary_participant_data(participants_data)

# Display sample binary descriptions
print("\nSample binary personality descriptions:")
for i, p in enumerate(participants_with_binary[:2]):
    print(f"\nParticipant {i+1}:")
    print(f"Domain scores: E={p['bfi2_e']:.2f}, A={p['bfi2_a']:.2f}, C={p['bfi2_c']:.2f}, N={p['bfi2_n']:.2f}, O={p['bfi2_o']:.2f}")
    print(f"Binary description: {p['binary_personality'][:200]}...")


=== CREATING PARTICIPANT DATA ===
Using BFI-2 format columns
Created 470 participant records

Sample participant data:
Participant 1: {'participant_id': 0, 'bfi2_e': 4.083333333333333, 'bfi2_a': 4.583333333333333, 'bfi2_c': 3.1666666666666665, 'bfi2_n': 2.4166666666666665, 'bfi2_o': 3.1666666666666665}
Participant 2: {'participant_id': 1, 'bfi2_e': 2.9166666666666665, 'bfi2_a': 3.1666666666666665, 'bfi2_c': 3.0, 'bfi2_n': 3.0, 'bfi2_o': 3.3333333333333335}
Participant 3: {'participant_id': 2, 'bfi2_e': 2.0833333333333335, 'bfi2_a': 4.083333333333333, 'bfi2_c': 3.8333333333333335, 'bfi2_n': 3.1666666666666665, 'bfi2_o': 4.416666666666667}

=== GENERATING BINARY DESCRIPTIONS ===

Sample binary personality descriptions:

Participant 1:
Domain scores: E=4.08, A=4.58, C=3.17, N=2.42, O=3.17
Binary description: You are high in Extraversion. You are outgoing, sociable, assertive, and energetic. You are high in Agreeableness. You are compassionate, cooperative, trusting, and kind to others. Yo

In [4]:
# Configure simulation parameters
print("=== SIMULATION CONFIGURATION ===")

# Models to test - using correct model names from portal.py
models_to_test = [
    'openai-gpt-3.5-turbo-0125'
    # "gpt-4",
    # "gpt-4o", 
    # "llama",
    # "deepseek",
    # "gpt-3.5-turbo"  # Fixed model name
]

# Simulation parameters
temperature = 1.0
batch_size = 20
max_workers = 8

print(f"Models to test: {models_to_test}")
print(f"Temperature: {temperature}")
print(f"Batch size: {batch_size}")
print(f"Max workers: {max_workers}")

# Create output directory
output_dir = Path("old_result/study_2_binary_results")
output_dir.mkdir(exist_ok=True)
print(f"\nResults will be saved to: {output_dir}")

# Test the prompt generator
def binary_baseline_prompt_generator(personality_description):
    """
    Generate a binary baseline prompt for a participant.
    
    Args:
        personality_description (str): Binary personality description 
        
    Returns:
        str: Complete prompt for the LLM
    """
    return get_binary_prompt(personality_description)

# Test the prompt generator with first participant
if participants_with_binary:
    sample_personality = participants_with_binary[0]['binary_personality']
    sample_prompt = binary_baseline_prompt_generator(sample_personality)
    print(f"\nSample prompt length: {len(sample_prompt)} characters")
    print("Sample prompt preview:")
    print(sample_prompt[:300] + "...")
else:
    print("No participants available for testing")


=== SIMULATION CONFIGURATION ===
Models to test: ['openai-gpt-3.5-turbo-0125']
Temperature: 1.0
Batch size: 20
Max workers: 8

Results will be saved to: study_2_binary_results

Sample prompt length: 2197 characters
Sample prompt preview:
### Your Assigned Personality ### 
Based on your personality profile below, please rate yourself on the following traits. Consider how each trait applies to your high/low personality classification.
You are high in Extraversion. You are outgoing, sociable, assertive, and energetic. You are high in A...


In [5]:
# Run binary baseline simulation
print("=== RUNNING BINARY BASELINE SIMULATION ===")

# Run simulation for each model
all_results = {}

for model in models_to_test:
    print(f"\n{'='*50}")
    print(f"Running binary baseline simulation with {model}")
    print(f"{'='*50}")
    
    # Create simulation configuration
    config = SimulationConfig(
        model=model,
        temperature=temperature,
        batch_size=batch_size,
        max_workers=max_workers
    )
    
    # Create output filename
    output_filename = f"bfi_to_minimarker_binary_{model.replace('-', '_')}_temp{temperature:.0f}_0.json"
    
    try:
        # Run the simulation
        results = run_batch_simulation(
            participants_data=participants_with_binary,
            prompt_generator=binary_baseline_prompt_generator,
            config=config,
            personality_key='binary_personality',
            output_dir=str(output_dir),
            output_filename=output_filename
        )
        
        all_results[model] = results
        
        print(f"✓ Simulation completed for {model}")
        print(f"  - Total participants: {len(results)}")
        print(f"  - Successful responses: {sum(1 for r in results if 'error' not in r)}")
        print(f"  - Failed responses: {sum(1 for r in results if 'error' in r)}")
        print(f"  - Results saved to: {output_dir / output_filename}")
        
    except Exception as e:
        print(f"✗ Error running simulation for {model}: {str(e)}")
        all_results[model] = None


=== RUNNING BINARY BASELINE SIMULATION ===

Running binary baseline simulation with openai-gpt-3.5-turbo-0125
Starting simulation for 470 participants using openai-gpt-3.5-turbo-0125
Temperature: 1.0, Batch size: 20
Processing participants 0 to 19
Completed batch 0 to 19
Processing participants 20 to 39
Completed batch 20 to 39
Processing participants 40 to 59
Completed batch 40 to 59
Processing participants 60 to 79
Completed batch 60 to 79
Processing participants 80 to 99
Completed batch 80 to 99
Processing participants 100 to 119
Completed batch 100 to 119
Processing participants 120 to 139
Completed batch 120 to 139
Processing participants 140 to 159
Completed batch 140 to 159
Processing participants 160 to 179
Completed batch 160 to 179
Processing participants 180 to 199
Completed batch 180 to 199
Processing participants 200 to 219
Completed batch 200 to 219
Processing participants 220 to 239
Completed batch 220 to 239
Processing participants 240 to 259
Completed batch 240 to 259


In [1]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import json

# Add shared utilities to path
sys.path.append('../shared')

from simulation_utils import SimulationConfig, run_batch_simulation
from binary_baseline_prompt import (
    generate_binary_personality_description, 
    get_binary_prompt,
    create_binary_participant_data
)

print("Binary baseline simulation utilities loaded successfully")


Binary baseline simulation utilities loaded successfully


In [2]:
# Load the original Soto dataset with BFI-2 domain scores
data_path = Path('../../raw_data/Soto_data.xlsx')
if not data_path.exists():
    print(f"Error: Data file not found at {data_path}")
    print("Please ensure the Soto dataset is available in the raw_data directory")
else:
    # Load the data
    data = pd.read_excel(data_path)
    print(f"Loaded {len(data)} participants from Soto dataset")
    
    # Check available columns for BFI domain scores
    bfi_cols = [col for col in data.columns if col.startswith('bfi') and '_' in col and len(col) <= 6]
    print(f"Available BFI domain columns: {bfi_cols}")
    
    # Display sample data
    print("\nSample BFI domain scores:")
    print(data[bfi_cols].head())


Loaded 470 participants from Soto dataset
Available BFI domain columns: ['bfi2_e', 'bfi2_a', 'bfi2_c', 'bfi2_n', 'bfi2_o']

Sample BFI domain scores:
     bfi2_e    bfi2_a    bfi2_c    bfi2_n    bfi2_o
0  4.083333  4.583333  3.166667  2.416667  3.166667
1  2.916667  3.166667  3.000000  3.000000  3.333333
2  2.083333  4.083333  3.833333  3.166667  4.416667
3  3.333333  3.833333  2.416667  3.416667  4.000000
4  3.250000  3.500000  4.500000  3.083333  3.333333


In [3]:
# Convert to participant list format for simulation
participants_data = []

for idx, row in data.iterrows():
    participant = {
        'participant_id': idx,
        'bfi2_e': row['bfi2_e'],
        'bfi2_a': row['bfi2_a'], 
        'bfi2_c': row['bfi2_c'],
        'bfi2_n': row['bfi2_n'],
        'bfi2_o': row['bfi2_o']
    }
    participants_data.append(participant)

print(f"Prepared {len(participants_data)} participants for binary baseline simulation")

# Generate binary personality descriptions
participants_with_binary = create_binary_participant_data(participants_data)

# Display sample binary descriptions
print("\nSample binary personality descriptions:")
for i, p in enumerate(participants_with_binary[:3]):
    print(f"\nParticipant {i+1}:")
    print(f"Domain scores: E={p['bfi2_e']:.2f}, A={p['bfi2_a']:.2f}, C={p['bfi2_c']:.2f}, N={p['bfi2_n']:.2f}, O={p['bfi2_o']:.2f}")
    print(f"Binary description: {p['binary_personality'][:200]}...")


Prepared 470 participants for binary baseline simulation

Sample binary personality descriptions:

Participant 1:
Domain scores: E=4.08, A=4.58, C=3.17, N=2.42, O=3.17
Binary description: You are high in Extraversion. You are outgoing, sociable, assertive, and energetic. You are high in Agreeableness. You are compassionate, cooperative, trusting, and kind to others. You are high in Con...

Participant 2:
Domain scores: E=2.92, A=3.17, C=3.00, N=3.00, O=3.33
Binary description: You are high in Extraversion. You are outgoing, sociable, assertive, and energetic. You are high in Agreeableness. You are compassionate, cooperative, trusting, and kind to others. You are high in Con...

Participant 3:
Domain scores: E=2.08, A=4.08, C=3.83, N=3.17, O=4.42
Binary description: You are low in Extraversion. You are reserved, quiet, and prefer smaller social settings. You are high in Agreeableness. You are compassionate, cooperative, trusting, and kind to others. You are high ...


In [4]:
# Models to test
models_to_test = [
    # "gpt-4",
    # "gpt-4o", 
    # "llama",
    # "deepseek",
    "openai_gpt_3.5_turbo_0125"
]

# Simulation parameters
temperature = 1.0
batch_size = 20
max_workers = 10

print(f"Models to test: {models_to_test}")
print(f"Temperature: {temperature}")
print(f"Batch size: {batch_size}")
print(f"Max workers: {max_workers}")

# Create output directory
output_dir = Path("old_result/study_2_binary_results")
output_dir.mkdir(exist_ok=True)
print(f"\nResults will be saved to: {output_dir}")


Models to test: ['openai_gpt_3.5_turbo_0125']
Temperature: 1.0
Batch size: 20
Max workers: 10

Results will be saved to: study_2_binary_results


In [5]:
def binary_baseline_prompt_generator(participant_data):
    """
    Generate a binary baseline prompt for a participant.
    
    Args:
        participant_data (dict): Participant data with binary personality description
        
    Returns:
        str: Complete prompt for the LLM
    """
    if 'binary_personality' in participant_data:
        personality_description = participant_data['binary_personality']
    else:
        # Generate on the fly if not present
        personality_description = generate_binary_personality_description(participant_data)
    
    return get_binary_prompt(personality_description)

# Test the prompt generator
sample_prompt = binary_baseline_prompt_generator(participants_with_binary[0])
print("Sample binary baseline prompt (first 500 characters):")
print(sample_prompt[:500] + "...")


Sample binary baseline prompt (first 500 characters):
### Your Assigned Personality ### 
Based on your personality profile below, please rate yourself on the following traits. Consider how each trait applies to your high/low personality classification.
You are high in Extraversion. You are outgoing, sociable, assertive, and energetic. You are high in Agreeableness. You are compassionate, cooperative, trusting, and kind to others. You are high in Conscientiousness. You are organized, responsible, hardworking, and reliable. You are low in Neuroticism...


In [7]:
# Run simulation for each model
all_results = {}

for model in models_to_test:
    print(f"\n{'='*50}")
    print(f"Running binary baseline simulation with {model}")
    print(f"{'='*50}")
    
    # Create simulation configuration
    config = SimulationConfig(
        model=model,
        temperature=temperature,
        batch_size=batch_size,
        max_workers=max_workers
    )
    
    # Create output filename
    output_filename = f"bfi_to_minimarker_binary_{model.replace('-', '_')}_temp{temperature:.0f}_0.json"
    
    try:
        # Run the simulation
        results = run_batch_simulation(
            participants_data=participants_with_binary,
            prompt_generator=binary_baseline_prompt_generator,
            config=config,
            personality_key='binary_personality',
            output_dir=str(output_dir),
            output_filename=output_filename
        )
        
        all_results[model] = results
        
        print(f"✓ Simulation completed for {model}")
        print(f"  - Total participants: {len(results)}")
        print(f"  - Successful responses: {sum(1 for r in results if 'error' not in r)}")
        print(f"  - Failed responses: {sum(1 for r in results if 'error' in r)}")
        print(f"  - Results saved to: {output_dir / output_filename}")
        
    except Exception as e:
        print(f"✗ Error running simulation for {model}: {str(e)}")
        all_results[model] = None



Running binary baseline simulation with openai_gpt_3.5_turbo_0125
Starting simulation for 470 participants using openai_gpt_3.5_turbo_0125
Temperature: 1.0, Batch size: 20
Processing participants 0 to 19
JSON parsing failed (attempt #31): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #32): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #33): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #34): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #35): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #36): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #37): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #38): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #39): Unsupported model: openai_gpt_3.5_turbo_0125
JSON parsing failed (attempt #40): Unsupported model: openai_gpt_3.5_turbo_

KeyboardInterrupt: 

In [None]:
print("\n" + "="*60)
print("BINARY BASELINE SIMULATION SUMMARY")
print("="*60)

summary_data = []

for model, results in all_results.items():
    if results is not None:
        total = len(results)
        successful = sum(1 for r in results if 'error' not in r)
        failed = sum(1 for r in results if 'error' in r)
        success_rate = (successful / total) * 100 if total > 0 else 0
        
        summary_data.append({
            'Model': model,
            'Total': total,
            'Successful': successful,
            'Failed': failed,
            'Success Rate (%)': f"{success_rate:.1f}%"
        })
        
        print(f"{model:25} | {total:5d} | {successful:5d} | {failed:5d} | {success_rate:6.1f}%")
    else:
        print(f"{model:25} | ERROR - Simulation failed")

# Save summary
if summary_data:
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv(output_dir / 'binary_simulation_summary.csv', index=False)
    print(f"\nSummary saved to: {output_dir / 'binary_simulation_summary.csv'}")

print(f"\nAll results saved in: {output_dir}")
print("\nNext steps:")
print("1. Run convergent analysis to compare binary baseline with expanded/likert formats")
print("2. Analyze performance differences across models")
print("3. Evaluate which approach provides the most reliable personality simulation")
