In [1]:
import pandas as pd
import numpy as np
import sys
from pathlib import Path
import json
from datetime import datetime

# Add shared modules to path
sys.path.append('../shared')

from simulation_utils import (
    SimulationConfig, 
    run_bfi_to_minimarker_simulation,
    retry_failed_participants,
    run_enhanced_bfi_to_minimarker_simulation
)
from schema_bfi2 import expanded_scale
from mini_marker_prompt import get_expanded_prompt

print("Setup complete!")
print(f"Analysis started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


Setup complete!
Analysis started at: 2025-07-14 09:58:29


In [2]:
# Load the statistically simulated BFI-2 data
data_path = Path('facet_lvl_simulated_data.csv')
if not data_path.exists():
    print(f"Simulated data file not found at {data_path}")
    print("Please run bfi2_facet_level_parameter_extraction_and_simulation.py first to generate the data")
    raise FileNotFoundError(f"Data file not found: {data_path}")

data = pd.read_csv(data_path)
print(f"Loaded simulated data shape: {data.shape}")
print(f"Columns: {list(data.columns[:10])}...")  # Show first 10 columns
print(f"BFI columns: {[col for col in data.columns if col.startswith('bfi') and col.replace('bfi', '').replace('_', '').isdigit()][:10]}...")  # Show BFI item columns
print(f"Value range: {data[[col for col in data.columns if col.startswith('bfi') and col.replace('bfi', '').replace('_', '').isdigit()]].min().min()} to {data[[col for col in data.columns if col.startswith('bfi') and col.replace('bfi', '').replace('_', '').isdigit()]].max().max()}")
data.head()


Loaded simulated data shape: (200, 80)
Columns: ['bfi1', 'bfi16', 'bfi31', 'bfi46', 'bfi6', 'bfi21', 'bfi36', 'bfi51', 'bfi11', 'bfi26']...
BFI columns: ['bfi1', 'bfi16', 'bfi31', 'bfi46', 'bfi6', 'bfi21', 'bfi36', 'bfi51', 'bfi11', 'bfi26']...
Value range: 1.0 to 5.0


Unnamed: 0,bfi1,bfi16,bfi31,bfi46,bfi6,bfi21,bfi36,bfi51,bfi11,bfi26,...,bfi_n_depression,bfi_n_emotional_volatility,bfi_o_intellectual_curiosity,bfi_o_aesthetic_sensitivity,bfi_o_creative_imagination,bfi_e,bfi_a,bfi_c,bfi_n,bfi_o
0,3.0,2.0,4.0,3.0,2.0,2.0,3.0,5.0,3.0,2.0,...,2.0,4.5,4.0,4.5,3.25,2.666667,3.833333,2.333333,3.75,3.916667
1,4.0,2.0,3.0,3.0,3.0,3.0,3.0,2.0,3.0,4.0,...,2.75,4.5,2.5,3.25,3.0,3.25,2.833333,4.083333,3.5,2.916667
2,3.0,3.0,4.0,1.0,1.0,2.0,3.0,4.0,2.0,3.0,...,2.0,4.5,3.5,4.75,4.25,2.666667,2.75,3.833333,3.833333,4.166667
3,5.0,1.0,2.0,5.0,5.0,5.0,1.0,1.0,2.0,2.0,...,4.25,2.25,4.75,4.25,3.75,4.583333,3.166667,2.916667,3.583333,4.25
4,3.0,4.0,3.0,3.0,1.0,2.0,4.0,3.0,2.0,4.0,...,3.0,1.75,4.5,4.0,4.5,2.75,4.333333,2.5,3.0,4.333333


In [3]:
# Generate BFI column list for processing
bfi_columns = [f"bfi{i}" for i in range(1, 61)]
print(f"Expected BFI columns: {len(bfi_columns)}")
print(f"Available BFI columns: {len([col for col in data.columns if col in bfi_columns])}")

# Verify all expected columns are present
missing_cols = [col for col in bfi_columns if col not in data.columns]
if missing_cols:
    print(f"Warning: Missing columns: {missing_cols}")
else:
    print("All BFI columns are present!")

# Reverse coding map for BFI-2 items
reverse_coding_map = {
    'bfi1': 'bfi1', 'bfi2': 'bfi2', 'bfi3': 'bfi3R', 'bfi4': 'bfi4R', 'bfi5': 'bfi5R',
    'bfi6': 'bfi6', 'bfi7': 'bfi7', 'bfi8': 'bfi8R', 'bfi9': 'bfi9R', 'bfi10': 'bfi10',
    'bfi11': 'bfi11R', 'bfi12': 'bfi12R', 'bfi13': 'bfi13', 'bfi14': 'bfi14', 'bfi15': 'bfi15',
    'bfi16': 'bfi16R', 'bfi17': 'bfi17R', 'bfi18': 'bfi18', 'bfi19': 'bfi19', 'bfi20': 'bfi20',
    'bfi21': 'bfi21', 'bfi22': 'bfi22R', 'bfi23': 'bfi23R', 'bfi24': 'bfi24R', 'bfi25': 'bfi25R',
    'bfi26': 'bfi26R', 'bfi27': 'bfi27', 'bfi28': 'bfi28R', 'bfi29': 'bfi29R', 'bfi30': 'bfi30R',
    'bfi31': 'bfi31R', 'bfi32': 'bfi32', 'bfi33': 'bfi33', 'bfi34': 'bfi34', 'bfi35': 'bfi35',
    'bfi36': 'bfi36R', 'bfi37': 'bfi37R', 'bfi38': 'bfi38', 'bfi39': 'bfi39', 'bfi40': 'bfi40',
    'bfi41': 'bfi41', 'bfi42': 'bfi42R', 'bfi43': 'bfi43', 'bfi44': 'bfi44R', 'bfi45': 'bfi45R',
    'bfi46': 'bfi46', 'bfi47': 'bfi47R', 'bfi48': 'bfi48R', 'bfi49': 'bfi49R', 'bfi50': 'bfi50R',
    'bfi51': 'bfi51R', 'bfi52': 'bfi52', 'bfi53': 'bfi53', 'bfi54': 'bfi54', 'bfi55': 'bfi55R',
    'bfi56': 'bfi56', 'bfi57': 'bfi57', 'bfi58': 'bfi58R', 'bfi59': 'bfi59', 'bfi60': 'bfi60'
}

# Apply reverse coding
for key, value in reverse_coding_map.items():
    if value.endswith('R'):  # Reverse coded
        data[key] = 6 - data[key]
    # else: keep original value

print("Reverse coding applied successfully")

Expected BFI columns: 60
Available BFI columns: 60
All BFI columns are present!
Reverse coding applied successfully


In [4]:
# Map numeric values to Expanded format descriptions
def map_values_to_expanded(row):
    """Convert numeric BFI values to expanded format strings."""
    mapped_row = row.copy()
    for key in expanded_scale:
        if key in row.index and pd.notna(row[key]):  # Check if the key exists and value is not NaN
            index = int(row[key]) - 1  # Convert to 0-index (1-5 scale becomes 0-4 index)
            if 0 <= index < len(expanded_scale[key]):
                mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
            else:
                print(f"Warning: Invalid index {index} for {key} with value {row[key]}")
    return mapped_row

print("Converting BFI values to Expanded format descriptions...")

# Apply the mapping function to BFI columns
mapped_data = data[bfi_columns].apply(map_values_to_expanded, axis=1)

# Combine all BFI descriptions into a single personality description
mapped_data['combined_bfi2'] = mapped_data[bfi_columns].apply(lambda row: ' '.join(row.astype(str)), axis=1)

# Add combined description to original data
data['combined_bfi2'] = mapped_data['combined_bfi2']

print("Expanded format personality descriptions created successfully")
print(f"Final data shape: {data.shape}")


Converting BFI values to Expanded format descriptions...
Expanded format personality descriptions created successfully
Final data shape: (200, 81)


  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string


In [5]:
# Preview a personality description
print("Sample Expanded format personality description:")
print("=" * 80)
sample_description = data.iloc[0]['combined_bfi2']
print(sample_description + "...")
print("=" * 80)
print(f"Full description length: {len(sample_description)} characters")


Sample Expanded format personality description:
You are somewhat outgoing, sociable. You are fairly compassionate usually soft-hearted. You are very disorganized. You are very tense handle stress very poorly. You have many artistic interests. You are not particularly assertive. You are fairly respectful usually treat others with respect. You are sometimes lazy. You stay very optimistic after experiencing a setback. You are curious about a large number of things. You sometimes feel excited or eager. You rarely find fault with others. You are somewhat dependable, steady. You are very moody almost always have up and down mood swings. You are somewhat inventive sometimes find clever ways to do things. You are rarely quiet. You feel a fair amount of sympathy for others. You are not particularly systematic rarely keep things in order. You are almost always tense. You are quite fascinated by art music or literature. You are fairly submissive often act as a follower. You sometimes start argume

In [6]:
# Test prompt generation with first participant
first_participant = data.iloc[0]
sample_prompt = get_expanded_prompt(first_participant['combined_bfi2'])

print("=" * 80)
print("COMPLETE EXPANDED FORMAT PROMPT SENT TO LLM")
print("=" * 80)
print(sample_prompt)
print("=" * 80)
print(f"Prompt length: {len(sample_prompt)} characters")
print(f"Prompt word count: {len(sample_prompt.split())} words")


COMPLETE EXPANDED FORMAT PROMPT SENT TO LLM
### Your Assigned Personality ### 
Based on your detailed personality description below, please rate yourself on the following traits. Consider how each trait applies to you based on the personality profile provided.
You are somewhat outgoing, sociable. You are fairly compassionate usually soft-hearted. You are very disorganized. You are very tense handle stress very poorly. You have many artistic interests. You are not particularly assertive. You are fairly respectful usually treat others with respect. You are sometimes lazy. You stay very optimistic after experiencing a setback. You are curious about a large number of things. You sometimes feel excited or eager. You rarely find fault with others. You are somewhat dependable, steady. You are very moody almost always have up and down mood swings. You are somewhat inventive sometimes find clever ways to do things. You are rarely quiet. You feel a fair amount of sympathy for others. You are not

In [10]:
# Configuration for different models and temperatures
models_to_test = [
    # 'openai-gpt-3.5-turbo-0125',
                  'gpt-4', 
                  'gpt-4o', 
                  'llama', 
                  'deepseek'
				  ]
temperatures = [1]  # Use temperature 1 for stochastic responses
batch_size = 25  # Smaller batch size for stability across different APIs

# Create participant data list from DataFrame
participants_data = data.to_dict('records')

print(f"Simulation Configuration:")
print(f"  Models to test: {models_to_test}")
print(f"  Temperatures: {temperatures}")
print(f"  Batch size: {batch_size}")
print(f"  Total combinations: {len(models_to_test) * len(temperatures)}")
print(f"  Participants: {len(participants_data)}")

# Verify participant data structure
print(f"\nSample participant data keys: {list(participants_data[0].keys())[:10]}...")  # Show first 10 keys
print(f"Has 'combined_bfi2' key: {'combined_bfi2' in participants_data[0]}")


Simulation Configuration:
  Models to test: ['gpt-4', 'gpt-4o', 'llama', 'deepseek']
  Temperatures: [1]
  Batch size: 25
  Total combinations: 4
  Participants: 200

Sample participant data keys: ['bfi1', 'bfi16', 'bfi31', 'bfi46', 'bfi6', 'bfi21', 'bfi36', 'bfi51', 'bfi11', 'bfi26']...
Has 'combined_bfi2' key: True


In [11]:
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

# Thread-safe logging
log_lock = threading.Lock()

def safe_print(message, prefix="INFO"):
    """Thread-safe printing with timestamp and prefix"""
    timestamp = datetime.now().strftime("%H:%M:%S")
    with log_lock:
        print(f"[{timestamp}] {prefix}: {message}")

def run_simulation(model, temperature):
    """Run simulation for a single model-temperature combination."""
    simulation_id = f"{model}_temp{temperature}"
    
    # Start message
    safe_print(f"Starting simulation: {model} (temp={temperature})", "START")
    
    config = SimulationConfig(
        model=model,
        temperature=temperature,
        batch_size=batch_size,
        max_workers=10,
        max_retries=5,  # Enhanced retry logic
        base_wait_time=2.0,
        max_wait_time=60.0
    )
    
    start_time = time.time()
    
    try:
        # Use enhanced simulation with format validation and auto-retry
        results = run_enhanced_bfi_to_minimarker_simulation(
            participants_data=participants_data,
            config=config,
            output_dir="study_3_expanded_results",
            use_enhanced=True,  # Enable enhanced validation
            prompt_generator=get_expanded_prompt  # Use Expanded-specific prompts
        )
        
        # Check for failures
        failed_count = sum(1 for r in results if isinstance(r, dict) and 'error' in r)
        duration = time.time() - start_time
        
        if failed_count > 0:
            safe_print(f"Completed {simulation_id} in {duration:.1f}s - WARNING: {failed_count} participants failed", "WARN")
        else:
            safe_print(f"Completed {simulation_id} in {duration:.1f}s - All participants successful", "SUCCESS")
        
        return (simulation_id, results)
        
    except Exception as e:
        duration = time.time() - start_time
        safe_print(f"Failed {simulation_id} after {duration:.1f}s - Error: {str(e)}", "ERROR")
        return (simulation_id, {"error": str(e)})

# Main execution
print("=" * 80)
print("STARTING ENHANCED PARALLEL SIMULATIONS FOR STUDY 3 - EXPANDED FORMAT")
print(f"Models: {models_to_test}")
print(f"Temperatures: {temperatures}")
print(f"Total combinations: {len(models_to_test) * len(temperatures)}")
print(f"Participants: {len(participants_data)}")
print("=" * 80)

all_results = {}
start_time = time.time()

# Use ThreadPoolExecutor for parallel execution
with ThreadPoolExecutor(max_workers=len(models_to_test)) as executor:
    # Submit all jobs
    futures = [
        executor.submit(run_simulation, model, temperature)
        for model in models_to_test
        for temperature in temperatures
    ]
    
    # Collect results as they complete
    completed_count = 0
    total_jobs = len(futures)
    
    for future in as_completed(futures):
        key, result = future.result()
        all_results[key] = result
        completed_count += 1
        
        # Progress update
        safe_print(f"Progress: {completed_count}/{total_jobs} simulations completed", "PROGRESS")

total_duration = time.time() - start_time
print("\n" + "=" * 80)
print(f"ALL SIMULATIONS COMPLETED IN {total_duration:.1f} SECONDS")
print(f"Results keys: {list(all_results.keys())}")
print("=" * 80)

STARTING ENHANCED PARALLEL SIMULATIONS FOR STUDY 3 - EXPANDED FORMAT
Models: ['gpt-4', 'gpt-4o', 'llama', 'deepseek']
Temperatures: [1]
Total combinations: 4
Participants: 200
[09:59:05] START: Starting simulation: gpt-4 (temp=1)
Starting simulation for 200 participants using gpt-4
Temperature: 1, Batch size: 25
Processing participants 0 to 24
[09:59:05] START: Starting simulation: gpt-4o (temp=1)
Starting simulation for 200 participants using gpt-4o
Temperature: 1, Batch size: 25
Processing participants 0 to 24
[09:59:05] START: Starting simulation: llama (temp=1)
Starting simulation for 200 participants using llama
Temperature: 1, Batch size: 25
Processing participants 0 to 24
[09:59:05] START: Starting simulation: deepseek (temp=1)
Starting simulation for 200 participants using deepseek
Temperature: 1, Batch size: 25
Processing participants 0 to 24
Completed batch 0 to 24
Processing participants 25 to 49
Completed batch 0 to 24
Processing participants 25 to 49
Completed batch 0 to 2

In [12]:
# Retry any failed participants
print("Checking for failed participants and retrying if necessary...")

retry_count = 0
for key, results in all_results.items():
    if isinstance(results, list):
        failed_count = sum(1 for r in results if isinstance(r, dict) and 'error' in r)
        if failed_count > 0:
            print(f"Retrying {failed_count} failed participants for {key}")
            retry_count += 1
            
            # Extract model and temperature from key
            model = key.split('_temp')[0]
            temperature = float(key.split('_temp')[1])
            
            config = SimulationConfig(
                model=model,
                temperature=temperature,
                batch_size=batch_size
            )
            
            updated_results = retry_failed_participants(
                results=results,
                participants_data=participants_data,
                prompt_generator=get_expanded_prompt,  # Use expanded-specific prompt function
                config=config,
                personality_key='combined_bfi2'
            )
            
            all_results[key] = updated_results
            
            # Save updated results
            from simulation_utils import save_simulation_results
            save_simulation_results(updated_results, "study_3_expanded_results", "bfi_to_minimarker", config)

if retry_count == 0:
    print("No failed participants found - all simulations successful!")
else:
    print(f"Retry process completed for {retry_count} model(s)")


Checking for failed participants and retrying if necessary...
Retrying 35 failed participants for llama_temp1
Retrying participant 1
Llama error: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Error in get_personality_response: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Llama error: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Error in get_personality_response: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Llama error: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Error in get_personality_response: ('Cannot connect to proxy.', RemoteDisconnected('Remote end closed connection without response'))
Successfully retried participant 1
Retrying participant 6
Llama error: ('Cannot connect to proxy.', RemoteDisconnected('Remote end clos

In [13]:
# Summary of all results
print("=" * 80)
print("STUDY 3 EXPANDED SIMULATION RESULTS SUMMARY")
print("=" * 80)

for key, results in all_results.items():
    if isinstance(results, list):
        total_participants = len(results)
        successful_participants = sum(1 for r in results if isinstance(r, dict) and 'error' not in r)
        failed_participants = total_participants - successful_participants
        
        print(f"\n{key}:")
        print(f"  Total participants: {total_participants}")
        print(f"  Successful: {successful_participants}")
        print(f"  Failed: {failed_participants}")
        print(f"  Success rate: {(successful_participants/total_participants)*100:.1f}%")
        
        # Sample a successful response for validation
        successful_responses = [r for r in results if isinstance(r, dict) and 'error' not in r]
        if successful_responses:
            sample_response = successful_responses[0]
            print(f"  Sample response keys: {list(sample_response.keys())[:10]}...")  # Show first 10 keys
            print(f"  Sample response length: {len(sample_response)} traits")
    else:
        print(f"\n{key}: FAILED - {results}")

print("\n" + "=" * 80)


STUDY 3 EXPANDED SIMULATION RESULTS SUMMARY

gpt-4o_temp1:
  Total participants: 200
  Successful: 200
  Failed: 0
  Success rate: 100.0%
  Sample response keys: ['Bashful', 'Bold', 'Careless', 'Cold', 'Complex', 'Cooperative', 'Creative', 'Deep', 'Disorganized', 'Efficient']...
  Sample response length: 40 traits

gpt-4_temp1:
  Total participants: 200
  Successful: 200
  Failed: 0
  Success rate: 100.0%
  Sample response keys: ['Bashful', 'Bold', 'Careless', 'Cold', 'Complex', 'Cooperative', 'Creative', 'Deep', 'Disorganized', 'Efficient']...
  Sample response length: 40 traits

deepseek_temp1:
  Total participants: 200
  Successful: 200
  Failed: 0
  Success rate: 100.0%
  Sample response keys: ['Bashful', 'Bold', 'Careless', 'Cold', 'Complex', 'Cooperative', 'Creative', 'Deep', 'Disorganized', 'Efficient']...
  Sample response length: 40 traits

llama_temp1:
  Total participants: 200
  Successful: 185
  Failed: 15
  Success rate: 92.5%
  Sample response keys: ['Bashful', 'Bold', 'C

In [14]:
# Save the preprocessed data for reference
output_path = Path('study_3_expanded_results')
output_path.mkdir(exist_ok=True)

# Save preprocessed data
data.to_csv(output_path / 'study3_expanded_preprocessed_data.csv', index=False)
print(f"Preprocessed data saved to {output_path / 'study3_expanded_preprocessed_data.csv'}")

# Save simulation metadata
metadata = {
    'simulation_type': 'study_3_expanded',
    'models_tested': models_to_test,
    'temperatures': temperatures,
    'batch_size': batch_size,
    'total_participants': len(participants_data),
    'simulation_date': datetime.now().isoformat(),
    'data_source': 'statistically_simulated_bfi2',
    'format': 'expanded',
    'key_differences_from_likert': [
        'No reverse coding applied - data already in correct format',
        'Uses expanded_scale mapping instead of likert_scale',
        'Uses get_expanded_prompt instead of get_likert_prompt',
        'Descriptions are narrative ("You are very...")',
    ],
    'results_summary': {
        key: {
            'total': len(results) if isinstance(results, list) else 0,
            'successful': sum(1 for r in results if isinstance(r, dict) and 'error' not in r) if isinstance(results, list) else 0
        } for key, results in all_results.items()
    }
}

with open(output_path / 'simulation_metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"Simulation metadata saved to {output_path / 'simulation_metadata.json'}")

print("\n" + "=" * 60)
print("STUDY 3 EXPANDED FORMAT SIMULATION COMPLETE!")
print("\nNext steps:")
print("1. Run convergent validity analysis on the results")
print("2. Compare with Study 3 Likert results for format differences")
print("3. Compare with Study 2 results for study differences")
print("4. Results are saved in study_3_expanded_results/ directory")
print("5. Enhanced validation and retry logic handled format issues")
print("=" * 60)


Preprocessed data saved to study_3_expanded_results/study3_expanded_preprocessed_data.csv
Simulation metadata saved to study_3_expanded_results/simulation_metadata.json

STUDY 3 EXPANDED FORMAT SIMULATION COMPLETE!

Next steps:
1. Run convergent validity analysis on the results
2. Compare with Study 3 Likert results for format differences
3. Compare with Study 2 results for study differences
4. Results are saved in study_3_expanded_results/ directory
5. Enhanced validation and retry logic handled format issues
