In [1]:
import pandas as pd
import numpy as np
import random
import sys
from pathlib import Path

# Set random seeds for reproducibility
random.seed(1234)
np.random.seed(1234)

# Add shared modules to path
sys.path.append('../shared')

print("Setup complete")


Setup complete


In [2]:
# Load the original empirical data
empirical_data_path = Path('../../study_3/expanded_format/data.csv')
if not empirical_data_path.exists():
    raise FileNotFoundError(f'Empirical data not found: {empirical_data_path}')

data = pd.read_csv(empirical_data_path)
print(f'Loaded empirical data: {data.shape}')
print(f'Columns: {list(data.columns)[:10]}...')  # Show first 10 columns


Loaded empirical data: (470, 704)
Columns: ['case_id', 'age', 'sex', 'ethnicity', 'rel_acquaintance', 'rel_friend', 'rel_roommate', 'rel_boygirlfriend', 'rel_relative', 'rel_other']...


In [3]:
# Reverse coding mapping
reverse_coding_map = {
    'bfi1': 'reversed_bfi1', 'bfi2': 'reversed_bfi2', 'bfi3R': 'reversed_bfi3', 'bfi4R': 'reversed_bfi4',
    'bfi5R': 'reversed_bfi5', 'bfi6': 'reversed_bfi6', 'bfi7': 'reversed_bfi7', 'bfi8R': 'reversed_bfi8',
    'bfi9R': 'reversed_bfi9', 'bfi10': 'reversed_bfi10', 'bfi11R': 'reversed_bfi11', 'bfi12R': 'reversed_bfi12',
    'bfi13': 'reversed_bfi13', 'bfi14': 'reversed_bfi14', 'bfi15': 'reversed_bfi15', 'bfi16R': 'reversed_bfi16',
    'bfi17R': 'reversed_bfi17', 'bfi18': 'reversed_bfi18', 'bfi19': 'reversed_bfi19', 'bfi20': 'reversed_bfi20',
    'bfi21': 'reversed_bfi21', 'bfi22R': 'reversed_bfi22', 'bfi23R': 'reversed_bfi23', 'bfi24R': 'reversed_bfi24',
    'bfi25R': 'reversed_bfi25', 'bfi26R': 'reversed_bfi26', 'bfi27': 'reversed_bfi27', 'bfi28R': 'reversed_bfi28',
    'bfi29R': 'reversed_bfi29', 'bfi30R': 'reversed_bfi30', 'bfi31R': 'reversed_bfi31', 'bfi32': 'reversed_bfi32',
    'bfi33': 'reversed_bfi33', 'bfi34': 'reversed_bfi34', 'bfi35': 'reversed_bfi35', 'bfi36R': 'reversed_bfi36',
    'bfi37R': 'reversed_bfi37', 'bfi38': 'reversed_bfi38', 'bfi39': 'reversed_bfi39', 'bfi40': 'reversed_bfi40',
    'bfi41': 'reversed_bfi41', 'bfi42R': 'reversed_bfi42', 'bfi43': 'reversed_bfi43', 'bfi44R': 'reversed_bfi44',
    'bfi45R': 'reversed_bfi45', 'bfi46': 'reversed_bfi46', 'bfi47R': 'reversed_bfi47', 'bfi48R': 'reversed_bfi48',
    'bfi49R': 'reversed_bfi49', 'bfi50R': 'reversed_bfi50', 'bfi51R': 'reversed_bfi51', 'bfi52': 'reversed_bfi52',
    'bfi53': 'reversed_bfi53', 'bfi54': 'reversed_bfi54', 'bfi55R': 'reversed_bfi55', 'bfi56': 'reversed_bfi56',
    'bfi57': 'reversed_bfi57', 'bfi58R': 'reversed_bfi58', 'bfi59': 'reversed_bfi59', 'bfi60': 'reversed_bfi60'
}

# Apply reverse coding
for original, reversed_var in reverse_coding_map.items():
    if original.endswith('R'):
        data[reversed_var] = 6 - data[original[:-1]]
    else:
        data[reversed_var] = data[original]

print("Reverse coding applied successfully")


Reverse coding applied successfully


In [4]:
def domain_stats(df, domain_prefix):
    """Compute mean and standard deviation for domain facets."""
    filtered_columns = [col for col in df.columns if col.startswith(domain_prefix)]
    domain_df = df[filtered_columns]
    means = domain_df.mean()
    std_devs = domain_df.std()
    result_df = pd.DataFrame([means, std_devs], index=["Mean", "Standard Deviation"])
    return result_df

def domain_correlation(df, domain_prefix):
    """Compute correlation matrix for domain facets."""
    filtered_columns = [col for col in df.columns if col.startswith(domain_prefix)]
    filtered_columns = filtered_columns[:3]  # Take first 3 facets
    domain_df = df[filtered_columns]
    correlation_matrix = domain_df.corr()
    return correlation_matrix.iloc[:3, :3]

def average_correlation(items):
    """Calculate average correlation within a facet."""
    subset = data[items]
    corr_matrix = subset.corr()
    correlations = corr_matrix.values[np.triu_indices_from(corr_matrix, k=1)]
    return np.mean(correlations)

print("Helper functions defined")


Helper functions defined


In [5]:
# Define domains and facets
domains = {
    "Extraversion": [['reversed_bfi1', 'reversed_bfi16', 'reversed_bfi31', 'reversed_bfi46'], 
                      ['reversed_bfi6', 'reversed_bfi21', 'reversed_bfi36', 'reversed_bfi51'], 
                      ['reversed_bfi11', 'reversed_bfi26', 'reversed_bfi41', 'reversed_bfi56']],
    "Agreeableness": [['reversed_bfi2', 'reversed_bfi17', 'reversed_bfi32', 'reversed_bfi47'], 
                       ['reversed_bfi7', 'reversed_bfi22', 'reversed_bfi37', 'reversed_bfi52'], 
                       ['reversed_bfi12', 'reversed_bfi27', 'reversed_bfi42', 'reversed_bfi57']],
    "Conscientiousness": [['reversed_bfi3', 'reversed_bfi18', 'reversed_bfi33', 'reversed_bfi48'], 
                           ['reversed_bfi8', 'reversed_bfi23', 'reversed_bfi38', 'reversed_bfi53'], 
                           ['reversed_bfi13', 'reversed_bfi28', 'reversed_bfi43', 'reversed_bfi58']],
    "Neuroticism": [['reversed_bfi4', 'reversed_bfi19', 'reversed_bfi34', 'reversed_bfi49'], 
                     ['reversed_bfi9', 'reversed_bfi24', 'reversed_bfi39', 'reversed_bfi54'], 
                     ['reversed_bfi14', 'reversed_bfi29', 'reversed_bfi44', 'reversed_bfi59']],
    "Openness": [['reversed_bfi10', 'reversed_bfi25', 'reversed_bfi40', 'reversed_bfi55'], 
                  ['reversed_bfi5', 'reversed_bfi20', 'reversed_bfi35', 'reversed_bfi50'], 
                  ['reversed_bfi15', 'reversed_bfi30', 'reversed_bfi45', 'reversed_bfi60']]
}

# Calculate average domain correlations
domain_avg_correlations = {}
for domain, facets in domains.items():
    avg_corrs = [average_correlation(facet) for facet in facets]
    domain_avg_correlations[domain] = np.mean(avg_corrs)

average_domain_avg_correlations = np.mean(list(domain_avg_correlations.values()))
print(f'Average domain correlation: {average_domain_avg_correlations:.3f}')
print(f'Domain correlations: {domain_avg_correlations}')


Average domain correlation: 0.442
Domain correlations: {'Extraversion': 0.4610554748141877, 'Agreeableness': 0.3821996228465807, 'Conscientiousness': 0.40613896396510524, 'Neuroticism': 0.5055967450877641, 'Openness': 0.4570178582963769}


In [6]:
def simulate_item_responses(means, std_devs, corr_matrix, intra_group_corr, n_simulations):
    """
    Simulate item responses based on group characteristics and correlations.
    """
    num_groups = len(means)
    num_items_per_group = 4
    num_items = num_groups * num_items_per_group
    
    # Construct covariance matrix
    cov_matrix = np.outer(std_devs, std_devs) * corr_matrix
    
    # Generate group-level scores
    group_scores = np.random.multivariate_normal(means, cov_matrix, size=n_simulations)
    
    # Initialize item scores
    item_scores = np.zeros((n_simulations, num_items))
    
    # Calculate item-level standard deviation within groups
    item_std_dev_within_group = np.sqrt((1 - intra_group_corr) * std_devs**2)
    
    # Generate item scores
    for group_index in range(num_groups):
        start_idx = group_index * num_items_per_group
        for i in range(num_items_per_group):
            item_errors = np.random.normal(0, item_std_dev_within_group[group_index], n_simulations)
            item_scores[:, start_idx + i] = group_scores[:, group_index] + item_errors
    
    # Bound scores to [1, 5] range
    bounded_item_scores = np.clip(np.round(item_scores), 1, 5)
    
    return bounded_item_scores

print("Simulation function defined")


Simulation function defined


In [7]:
# Simulate data for each domain
n_simulations = 200

print("Simulating data for each domain...")

# Extraversion
domain_stat_e = domain_stats(data, 'bfi2_e')
means_e = np.array(domain_stat_e.loc['Mean'].values[:3])
std_devs_e = np.array(domain_stat_e.loc['Standard Deviation'].values[:3])
corr_e = domain_correlation(data, 'bfi2_e')
sim_e = simulate_item_responses(means_e, std_devs_e, corr_e, average_domain_avg_correlations, n_simulations)
extraversion_cols = ['bfi1', 'bfi16', 'bfi31', 'bfi46', 'bfi6', 'bfi21', 'bfi36', 'bfi51', 'bfi11', 'bfi26', 'bfi41', 'bfi56']
sim_e_df = pd.DataFrame(sim_e, columns=extraversion_cols)

# Agreeableness
domain_stat_a = domain_stats(data, 'bfi2_a')
means_a = np.array(domain_stat_a.loc['Mean'].values[:3])
std_devs_a = np.array(domain_stat_a.loc['Standard Deviation'].values[:3])
corr_a = domain_correlation(data, 'bfi2_a')
sim_a = simulate_item_responses(means_a, std_devs_a, corr_a, average_domain_avg_correlations, n_simulations)
agreeableness_cols = ['bfi2', 'bfi17', 'bfi32', 'bfi47', 'bfi7', 'bfi22', 'bfi37', 'bfi52', 'bfi12', 'bfi27', 'bfi42', 'bfi57']
sim_a_df = pd.DataFrame(sim_a, columns=agreeableness_cols)

print("✓ Extraversion and Agreeableness simulated")


Simulating data for each domain...
✓ Extraversion and Agreeableness simulated


In [8]:
# Conscientiousness
domain_stat_c = domain_stats(data, 'bfi2_c')
means_c = np.array(domain_stat_c.loc['Mean'].values[:3])
std_devs_c = np.array(domain_stat_c.loc['Standard Deviation'].values[:3])
corr_c = domain_correlation(data, 'bfi2_c')
sim_c = simulate_item_responses(means_c, std_devs_c, corr_c, average_domain_avg_correlations, n_simulations)
conscientiousness_cols = ['bfi3', 'bfi18', 'bfi33', 'bfi48', 'bfi8', 'bfi23', 'bfi38', 'bfi53', 'bfi13', 'bfi28', 'bfi43', 'bfi58']
sim_c_df = pd.DataFrame(sim_c, columns=conscientiousness_cols)

# Neuroticism
domain_stat_n = domain_stats(data, 'bfi2_n')
means_n = np.array(domain_stat_n.loc['Mean'].values[:3])
std_devs_n = np.array(domain_stat_n.loc['Standard Deviation'].values[:3])
corr_n = domain_correlation(data, 'bfi2_n')
sim_n = simulate_item_responses(means_n, std_devs_n, corr_n, average_domain_avg_correlations, n_simulations)
neuroticism_cols = ['bfi4', 'bfi19', 'bfi34', 'bfi49', 'bfi9', 'bfi24', 'bfi39', 'bfi54', 'bfi14', 'bfi29', 'bfi44', 'bfi59']
sim_n_df = pd.DataFrame(sim_n, columns=neuroticism_cols)

# Openness
domain_stat_o = domain_stats(data, 'bfi2_o')
means_o = np.array(domain_stat_o.loc['Mean'].values[:3])
std_devs_o = np.array(domain_stat_o.loc['Standard Deviation'].values[:3])
corr_o = domain_correlation(data, 'bfi2_o')
sim_o = simulate_item_responses(means_o, std_devs_o, corr_o, average_domain_avg_correlations, n_simulations)
openness_cols = ['bfi10', 'bfi25', 'bfi40', 'bfi55', 'bfi5', 'bfi20', 'bfi35', 'bfi50', 'bfi15', 'bfi30', 'bfi45', 'bfi60']
sim_o_df = pd.DataFrame(sim_o, columns=openness_cols)

print("✓ Conscientiousness, Neuroticism, and Openness simulated")


✓ Conscientiousness, Neuroticism, and Openness simulated


In [9]:
# Combine all domains
simulated_data = pd.concat([sim_e_df, sim_a_df, sim_c_df, sim_n_df, sim_o_df], axis=1)

# Add participant ID
simulated_data['participant_id'] = range(len(simulated_data))

print(f"Combined simulated data shape: {simulated_data.shape}")
print(f"Columns: {list(simulated_data.columns)}")


Combined simulated data shape: (200, 61)
Columns: ['bfi1', 'bfi16', 'bfi31', 'bfi46', 'bfi6', 'bfi21', 'bfi36', 'bfi51', 'bfi11', 'bfi26', 'bfi41', 'bfi56', 'bfi2', 'bfi17', 'bfi32', 'bfi47', 'bfi7', 'bfi22', 'bfi37', 'bfi52', 'bfi12', 'bfi27', 'bfi42', 'bfi57', 'bfi3', 'bfi18', 'bfi33', 'bfi48', 'bfi8', 'bfi23', 'bfi38', 'bfi53', 'bfi13', 'bfi28', 'bfi43', 'bfi58', 'bfi4', 'bfi19', 'bfi34', 'bfi49', 'bfi9', 'bfi24', 'bfi39', 'bfi54', 'bfi14', 'bfi29', 'bfi44', 'bfi59', 'bfi10', 'bfi25', 'bfi40', 'bfi55', 'bfi5', 'bfi20', 'bfi35', 'bfi50', 'bfi15', 'bfi30', 'bfi45', 'bfi60', 'participant_id']


In [10]:
# Calculate facet scores
print("Calculating facet scores...")

# Extraversion facets
simulated_data['bfi_e_sociability'] = (simulated_data['bfi1'] + simulated_data['bfi16'] + simulated_data['bfi31'] + simulated_data['bfi46'])/4
simulated_data['bfi_e_assertiveness'] = (simulated_data['bfi6'] + simulated_data['bfi21'] + simulated_data['bfi36'] + simulated_data['bfi51'])/4
simulated_data['bfi_e_energy_level'] = (simulated_data['bfi11'] + simulated_data['bfi26'] + simulated_data['bfi41'] + simulated_data['bfi56'])/4

# Agreeableness facets
simulated_data['bfi_a_compassion'] = (simulated_data['bfi2'] + simulated_data['bfi17'] + simulated_data['bfi32'] + simulated_data['bfi47'])/4
simulated_data['bfi_a_respectfulness'] = (simulated_data['bfi7'] + simulated_data['bfi22'] + simulated_data['bfi37'] + simulated_data['bfi52'])/4
simulated_data['bfi_a_trust'] = (simulated_data['bfi12'] + simulated_data['bfi27'] + simulated_data['bfi42'] + simulated_data['bfi57'])/4

# Conscientiousness facets
simulated_data['bfi_c_organization'] = (simulated_data['bfi3'] + simulated_data['bfi18'] + simulated_data['bfi33'] + simulated_data['bfi48'])/4
simulated_data['bfi_c_productiveness'] = (simulated_data['bfi8'] + simulated_data['bfi23'] + simulated_data['bfi38'] + simulated_data['bfi53'])/4
simulated_data['bfi_c_responsibility'] = (simulated_data['bfi13'] + simulated_data['bfi28'] + simulated_data['bfi43'] + simulated_data['bfi58'])/4

# Neuroticism facets
simulated_data['bfi_n_anxiety'] = (simulated_data['bfi4'] + simulated_data['bfi19'] + simulated_data['bfi34'] + simulated_data['bfi49'])/4
simulated_data['bfi_n_depression'] = (simulated_data['bfi9'] + simulated_data['bfi24'] + simulated_data['bfi39'] + simulated_data['bfi54'])/4
simulated_data['bfi_n_emotional_volatility'] = (simulated_data['bfi14'] + simulated_data['bfi29'] + simulated_data['bfi44'] + simulated_data['bfi59'])/4

# Openness facets
simulated_data['bfi_o_intellectual_curiosity'] = (simulated_data['bfi10'] + simulated_data['bfi25'] + simulated_data['bfi40'] + simulated_data['bfi55'])/4
simulated_data['bfi_o_aesthetic_sensitivity'] = (simulated_data['bfi5'] + simulated_data['bfi20'] + simulated_data['bfi35'] + simulated_data['bfi50'])/4
simulated_data['bfi_o_creative_imagination'] = (simulated_data['bfi15'] + simulated_data['bfi30'] + simulated_data['bfi45'] + simulated_data['bfi60'])/4

print("✓ Facet scores calculated")


Calculating facet scores...
✓ Facet scores calculated


In [11]:
# Calculate domain scores
print("Calculating domain scores...")

simulated_data['bfi_e'] = (simulated_data['bfi_e_sociability'] + simulated_data['bfi_e_assertiveness'] + simulated_data['bfi_e_energy_level'])/3
simulated_data['bfi_a'] = (simulated_data['bfi_a_compassion'] + simulated_data['bfi_a_respectfulness'] + simulated_data['bfi_a_trust'])/3
simulated_data['bfi_c'] = (simulated_data['bfi_c_organization'] + simulated_data['bfi_c_productiveness'] + simulated_data['bfi_c_responsibility'])/3
simulated_data['bfi_n'] = (simulated_data['bfi_n_anxiety'] + simulated_data['bfi_n_depression'] + simulated_data['bfi_n_emotional_volatility'])/3
simulated_data['bfi_o'] = (simulated_data['bfi_o_intellectual_curiosity'] + simulated_data['bfi_o_aesthetic_sensitivity'] + simulated_data['bfi_o_creative_imagination'])/3

print("✓ Domain scores calculated")

# Save the new simulated dataset
sim_data_path = Path('facet_lvl_simulated_data_NEW.csv')
simulated_data.to_csv(sim_data_path, index=False)
print(f"\n✅ New simulated data saved to: {sim_data_path.resolve()}")
print(f"Dataset shape: {simulated_data.shape}")
print(f"Sample data:")
print(simulated_data[['participant_id', 'bfi_e', 'bfi_a', 'bfi_c', 'bfi_n', 'bfi_o']].head())


Calculating domain scores...
✓ Domain scores calculated

✅ New simulated data saved to: /Users/mhhuang/Psychometrics4AI_revision/multi_model_studies/study_3/facet_lvl_simulated_data_NEW.csv
Dataset shape: (200, 81)
Sample data:
   participant_id     bfi_e     bfi_a     bfi_c     bfi_n     bfi_o
0               0  2.666667  3.833333  2.333333  3.750000  3.916667
1               1  3.250000  2.833333  4.083333  3.500000  2.916667
2               2  2.666667  2.750000  3.833333  3.833333  4.166667
3               3  4.583333  3.166667  2.916667  3.583333  4.250000
4               4  2.750000  4.333333  2.500000  3.000000  4.333333


In [12]:
# Import simulation utilities
from simulation_utils import (
    SimulationConfig, 
    run_bfi_to_minimarker_simulation,
    retry_failed_participants
)
from schema_bfi2 import expanded_scale
from mini_marker_prompt import get_expanded_prompt

# Reload the new simulated data
data = pd.read_csv('facet_lvl_simulated_data_NEW.csv')
print(f'Loaded new simulated data: {data.shape}')

# Define models to test
models = [
    'openai-gpt-3.5-turbo-0125',
    # "gpt-4",
    # "gpt-4o",
    # "llama",
    # "deepseek"
]

print(f"Models to test: {models}")

# Create output directories
output_base = Path("study_3_results")
output_base.mkdir(exist_ok=True)
print(f"Output directory: {output_base.absolute()}")


Loaded new simulated data: (200, 81)
Models to test: ['openai-gpt-3.5-turbo-0125']
Output directory: /Users/mhhuang/Psychometrics4AI_revision/multi_model_studies/study_3/study_3_results


In [13]:
# Apply reverse coding for LLM simulation (same as Study 2)
reverse_coding_map_llm = {
    'bfi1': 'bfi1', 'bfi2': 'bfi2', 'bfi3': 'bfi3R', 'bfi4': 'bfi4R', 'bfi5': 'bfi5R',
    'bfi6': 'bfi6', 'bfi7': 'bfi7', 'bfi8': 'bfi8R', 'bfi9': 'bfi9R', 'bfi10': 'bfi10',
    'bfi11': 'bfi11R', 'bfi12': 'bfi12R', 'bfi13': 'bfi13', 'bfi14': 'bfi14', 'bfi15': 'bfi15',
    'bfi16': 'bfi16R', 'bfi17': 'bfi17R', 'bfi18': 'bfi18', 'bfi19': 'bfi19', 'bfi20': 'bfi20',
    'bfi21': 'bfi21', 'bfi22': 'bfi22R', 'bfi23': 'bfi23R', 'bfi24': 'bfi24R', 'bfi25': 'bfi25R',
    'bfi26': 'bfi26R', 'bfi27': 'bfi27', 'bfi28': 'bfi28R', 'bfi29': 'bfi29R', 'bfi30': 'bfi30R',
    'bfi31': 'bfi31R', 'bfi32': 'bfi32', 'bfi33': 'bfi33', 'bfi34': 'bfi34', 'bfi35': 'bfi35',
    'bfi36': 'bfi36R', 'bfi37': 'bfi37R', 'bfi38': 'bfi38', 'bfi39': 'bfi39', 'bfi40': 'bfi40',
    'bfi41': 'bfi41', 'bfi42': 'bfi42R', 'bfi43': 'bfi43', 'bfi44': 'bfi44R', 'bfi45': 'bfi45R',
    'bfi46': 'bfi46', 'bfi47': 'bfi47R', 'bfi48': 'bfi48R', 'bfi49': 'bfi49R', 'bfi50': 'bfi50R',
    'bfi51': 'bfi51R', 'bfi52': 'bfi52', 'bfi53': 'bfi53', 'bfi54': 'bfi54', 'bfi55': 'bfi55R',
    'bfi56': 'bfi56', 'bfi57': 'bfi57', 'bfi58': 'bfi58R', 'bfi59': 'bfi59', 'bfi60': 'bfi60'
}

# Apply reverse coding to ensure proper format
for key, value in reverse_coding_map_llm.items():
    if value.endswith('R'):  # Reverse coded
        data[key] = 6 - data[key]
    # else: keep original value

print("Reverse coding applied for LLM simulation")


Reverse coding applied for LLM simulation


In [14]:
# Prepare preprocessed data for simulation
print("Preparing preprocessed data...")
preprocessed_data = data.copy()

# Select subset of participants for testing (first 50)
n_participants = 50
preprocessed_data = preprocessed_data.head(n_participants)

# Create combined_bfi2 personality descriptions for simulation
print("Creating personality descriptions...")

def create_personality_description(row):
    """Create expanded personality description from BFI items."""
    descriptions = []
    for i in range(1, 61):
        bfi_col = f'bfi{i}'
        if bfi_col in row and not pd.isna(row[bfi_col]):
            value = int(row[bfi_col])
            if bfi_col in expanded_scale:
                desc = expanded_scale[bfi_col]
                descriptions.append(f"I am {desc}")
    return " ".join(descriptions)

preprocessed_data['combined_bfi2'] = preprocessed_data.apply(create_personality_description, axis=1)

# Convert DataFrame to list of dicts for simulation function
participants_data = preprocessed_data.to_dict('records')

# Save preprocessed data
preprocess_path = output_base / "study3_preprocessed_data.csv"
preprocessed_data.to_csv(preprocess_path, index=False)
print(f"Preprocessed data saved to: {preprocess_path}")
print(f"Sample size for simulation: {len(preprocessed_data)} participants")
print(f"Converted to list of {len(participants_data)} participant dicts")
print(f"Sample personality description length: {len(participants_data[0]['combined_bfi2'])} characters")


Preparing preprocessed data...
Creating personality descriptions...
Preprocessed data saved to: study_3_results/study3_preprocessed_data.csv
Sample size for simulation: 50 participants
Converted to list of 50 participant dicts
Sample personality description length: 14542 characters


In [15]:
# Expanded format simulation for all models
expanded_results_dir = output_base / "study_3_expanded_results_i_am"
expanded_results_dir.mkdir(exist_ok=True)

print("Starting expanded format simulation...")

for model in models:
    print(f"\n{'='*50}")
    print(f"Running {model} - Expanded Format")
    print(f"{'='*50}")
    
    config = SimulationConfig(
        model=model,
        temperature=1.0,
        max_retries=10,
        batch_size=10
    )
    
    try:
        results = run_bfi_to_minimarker_simulation(
            participants_data,
            config,
            str(expanded_results_dir),
            False,
            get_expanded_prompt
        )
        print(f"✅ {model} expanded format completed. Results: {len(results)} participants.")
    except Exception as e:
        import traceback
        print(f"❌ {model} expanded format failed: {e}")
        traceback.print_exc()
        
print("\n" + "="*60)
print("EXPANDED FORMAT SIMULATION COMPLETED")
print("="*60)


Starting expanded format simulation...

Running openai-gpt-3.5-turbo-0125 - Expanded Format
Starting simulation for 50 participants using openai-gpt-3.5-turbo-0125
Temperature: 1.0, Batch size: 10
Processing participants 0 to 9
Completed batch 0 to 9
Processing participants 10 to 19
Completed batch 10 to 19
Processing participants 20 to 29
Completed batch 20 to 29
Processing participants 30 to 39
Completed batch 30 to 39
Processing participants 40 to 49
Completed batch 40 to 49
Results saved to study_3_results/study_3_expanded_results_i_am/bfi_to_minimarker_openai_gpt_3.5_turbo_0125_temp1_0.json
✅ openai-gpt-3.5-turbo-0125 expanded format completed. Results: 50 participants.

EXPANDED FORMAT SIMULATION COMPLETED


In [16]:
# Import binary format utilities
from binary_baseline_prompt import get_binary_prompt

# Binary format simulation for all models
binary_results_dir = output_base / "study_3_binary_results"
binary_results_dir.mkdir(exist_ok=True)

print("Starting binary format simulation...")

for model in models:
    print(f"\n{'='*50}")
    print(f"Running {model} - Binary Format")
    print(f"{'='*50}")
    
    config = SimulationConfig(
        model=model,
        temperature=1.0,
        max_retries=3,
        batch_size=10
    )
    
    try:
        results = run_bfi_to_minimarker_simulation(
            participants_data,
            config,
            str(binary_results_dir),
            False,
            get_binary_prompt
        )
        print(f"✅ {model} binary format completed. Results: {len(results)} participants.")
    except Exception as e:
        import traceback
        print(f"❌ {model} binary format failed: {e}")
        traceback.print_exc()
        
print("\n" + "="*60)
print("BINARY FORMAT SIMULATION COMPLETED")
print("="*60)


Starting binary format simulation...

Running openai-gpt-3.5-turbo-0125 - Binary Format
Starting simulation for 50 participants using openai-gpt-3.5-turbo-0125
Temperature: 1.0, Batch size: 10
Processing participants 0 to 9
Completed batch 0 to 9
Processing participants 10 to 19
Completed batch 10 to 19
Processing participants 20 to 29
Completed batch 20 to 29
Processing participants 30 to 39
Completed batch 30 to 39
Processing participants 40 to 49
Completed batch 40 to 49
Results saved to study_3_results/study_3_binary_results/bfi_to_minimarker_openai_gpt_3.5_turbo_0125_temp1_0.json
✅ openai-gpt-3.5-turbo-0125 binary format completed. Results: 50 participants.

BINARY FORMAT SIMULATION COMPLETED


In [19]:
# Import Likert format utilities
from schema_bfi2 import likert_scale
from mini_marker_prompt import get_likert_prompt

# Create Likert format personality descriptions
print("Creating Likert format personality descriptions...")

def create_likert_personality_description(row):
    """Create Likert-style personality description from BFI items."""
    descriptions = []
    for i in range(1, 61):
        bfi_col = f'bfi{i}'
        if bfi_col in row and not pd.isna(row[bfi_col]):
            value = int(row[bfi_col])
            if bfi_col in likert_scale:
                desc = likert_scale[bfi_col]
                descriptions.append(f"{desc} {value};")
    return " ".join(descriptions)

# Create Likert personality descriptions for all participants
preprocessed_data_likert = preprocessed_data.copy()
preprocessed_data_likert['combined_bfi2'] = preprocessed_data_likert.apply(create_likert_personality_description, axis=1)

# Convert to list of dicts for simulation
participants_data_likert = preprocessed_data_likert.to_dict('records')

print(f"Sample Likert personality description length: {len(participants_data_likert[0]['combined_bfi2'])} characters")
print(f"Sample Likert description: {participants_data_likert[0]['combined_bfi2'][:200]}...")

# Likert format simulation for all models
likert_results_dir = output_base / "study_3_likert_results"
likert_results_dir.mkdir(exist_ok=True)

print("Starting Likert format simulation...")

for model in models:
    print(f"\n{'='*50}")
    print(f"Running {model} - Likert Format")
    print(f"{'='*50}")
    
    config = SimulationConfig(
        model=model,
        temperature=1.0,
        max_retries=3,
        batch_size=10
    )
    
    try:
        results = run_bfi_to_minimarker_simulation(
            participants_data_likert,
            config,
            str(likert_results_dir),
            False,
            get_likert_prompt
        )
        print(f"✅ {model} Likert format completed. Results: {len(results)} participants.")
    except Exception as e:
        import traceback
        print(f"❌ {model} Likert format failed: {e}")
        traceback.print_exc()
        
print("\n" + "="*60)
print("LIKERT FORMAT SIMULATION COMPLETED")
print("="*60)


Creating Likert format personality descriptions...
Sample Likert personality description length: 2109 characters
Sample Likert description: Is outgoing, sociable: 3; Is compassionate, has a soft heart: 4; Tends to be disorganized: 5; Is relaxed, handles stress well: 1; Has few artistic interests: 1; Has an assertive personality: 2; Is res...
Starting Likert format simulation...

Running openai-gpt-3.5-turbo-0125 - Likert Format
Starting simulation for 50 participants using openai-gpt-3.5-turbo-0125
Temperature: 1.0, Batch size: 10
Processing participants 0 to 9
Completed batch 0 to 9
Processing participants 10 to 19
Completed batch 10 to 19
Processing participants 20 to 29
Completed batch 20 to 29
Processing participants 30 to 39
Completed batch 30 to 39
Processing participants 40 to 49
Completed batch 40 to 49
Results saved to study_3_results/study_3_likert_results/bfi_to_minimarker_openai_gpt_3.5_turbo_0125_temp1_0.json
✅ openai-gpt-3.5-turbo-0125 Likert format completed. Results: 50

In [None]:
print("="*80)
print("STUDY 3 MULTI-MODEL SIMULATION SUMMARY")
print("="*80)
print()
print("✅ Generated new simulated dataset using facet-level parameters")
print(f"   - Sample size: {len(simulated_data)} participants")
print(f"   - Dataset saved as: facet_lvl_simulated_data_NEW.csv")
print()
print("✅ Multi-model LLM simulation setup:")
print(f"   - Models tested: {', '.join(models)}")
print(f"   - Sample size for LLM simulation: {n_participants} participants")
print(f"   - Formats: Expanded ('I am...'), Binary, Likert (all implemented)")
print()
print("📁 Output directories created:")
print(f"   - {expanded_results_dir}")
print(f"   - {binary_results_dir}")
print(f"   - {likert_results_dir}")
print()
print("🔬 Next steps:")
print("   1. Run the simulation cells above")
print("   2. Check output files for successful completions")
print("   3. Run analysis scripts:")
print("      - study_3_expanded_convergent_analysis.py")
print("      - study_3_binary_convergent_analysis.py")
print("      - unified_convergent_analysis.py")
print()
print("="*80)


STUDY 3 MULTI-MODEL SIMULATION SUMMARY

✅ Generated new simulated dataset using facet-level parameters
   - Sample size: 200 participants
   - Dataset saved as: facet_lvl_simulated_data_NEW.csv

✅ Multi-model LLM simulation setup:
   - Models tested: openai-gpt-3.5-turbo-0125
   - Sample size for LLM simulation: 50 participants
   - Formats: Expanded ('I am...'), Binary, (Likert - placeholder)

📁 Output directories created:
   - study_3_results/study_3_expanded_results_i_am
   - study_3_results/study_3_binary_results
   - study_3_results/study_3_likert_results

🔬 Next steps:
   1. Run the simulation cells above
   2. Check output files for successful completions
   3. Run analysis scripts:
      - study_3_expanded_convergent_analysis.py
      - study_3_binary_convergent_analysis.py
      - unified_convergent_analysis.py

