# Multi-Model Personality Simulation - Study 2

This notebook refactors the original Study 2 BFI-2 to Mini-Marker simulation to work with multiple LLM models using the unified portal.py interface.

## Models to Test
- GPT-4
- GPT-4o  
- Llama-3.3-70B-Instruct
- DeepSeek-V3

## Data Flow
1. Load and preprocess Soto BFI-2 data
2. Apply reverse coding to personality items
3. Map numeric responses to expanded format descriptions
4. Generate personality simulation prompts
5. Run simulations across multiple models
6. Save results for analysis

## Next Steps
After running this notebook, use `study_2_analysis.ipynb` for comprehensive analysis of the results.

In [1]:
import pandas as pd
import sys
from pathlib import Path

# Add shared modules to path
sys.path.append('../shared')

from simulation_utils import (
    SimulationConfig, 
    run_bfi_to_minimarker_simulation,
    retry_failed_participants
)
from schema_bfi2 import expanded_scale
from mini_marker_prompt import get_prompt

## Data Loading and Preprocessing

In [2]:
# Load the Soto BFI-2 dataset
data_path = Path('../../raw_data/Soto_data.xlsx')
if not data_path.exists():
    print(f"Data file not found at {data_path}")
    print("Please ensure the raw_data/Soto_data.xlsx file exists in the project root")
    raise FileNotFoundError(f"Data file not found: {data_path}")

data = pd.read_excel(data_path, sheet_name='data')
print(f"Loaded data shape: {data.shape}")
data.head()

Loaded data shape: (470, 704)


Unnamed: 0,case_id,age,sex,ethnicity,rel_acquaintance,rel_friend,rel_roommate,rel_boygirlfriend,rel_relative,rel_other,...,tneo_n3_dep,tneo_n4_sel,tneo_n5_imp,tneo_n6_vul,tneo_o1_fan,tneo_o2_aes,tneo_o3_fee,tneo_o4_act,tneo_o5_ide,tneo_o6_val
0,1,27.0,M,2.0,,,,,,,...,51.25,40.181818,64.0,55.102041,46.639344,46.969697,66.7,57.065217,41.984127,58.039216
1,2,26.0,M,3.0,,,,,,,...,69.632353,60.636364,66.272727,65.306122,54.836066,56.439394,51.7,51.630435,51.904762,45.784314
2,3,24.0,F,4.0,,,,,,,...,60.441176,74.272727,54.909091,65.306122,75.327869,56.439394,56.7,40.76087,51.904762,58.039216
3,4,33.0,M,3.0,,1.0,,,,,...,67.794118,58.363636,64.0,52.55102,54.836066,50.757576,36.7,65.217391,63.809524,58.039216
4,5,23.0,F,5.0,,,,,,,...,62.279412,67.454545,41.272727,60.204082,50.737705,48.863636,49.2,46.195652,38.015873,38.431373


In [3]:
# Generate column names for TDA (Mini-Marker) and BFI-2 items
tda_columns = [f"tda{i}" for i in range(1, 41)]
sbfi_columns = [f"bfi{i}" for i in range(1, 61)]
selected_columns = tda_columns + sbfi_columns

print(f"Original data shape: {data.shape}")

# Remove rows with missing values in the selected columns
data = data.dropna(subset=selected_columns)
print(f"Data shape after removing missing values: {data.shape}")

Original data shape: (470, 704)
Data shape after removing missing values: (438, 704)


In [4]:
# Reverse coding map for BFI-2 items
reverse_coding_map = {
    'bfi1': 'bfi1', 'bfi2': 'bfi2', 'bfi3': 'bfi3R', 'bfi4': 'bfi4R', 'bfi5': 'bfi5R',
    'bfi6': 'bfi6', 'bfi7': 'bfi7', 'bfi8': 'bfi8R', 'bfi9': 'bfi9R', 'bfi10': 'bfi10',
    'bfi11': 'bfi11R', 'bfi12': 'bfi12R', 'bfi13': 'bfi13', 'bfi14': 'bfi14', 'bfi15': 'bfi15',
    'bfi16': 'bfi16R', 'bfi17': 'bfi17R', 'bfi18': 'bfi18', 'bfi19': 'bfi19', 'bfi20': 'bfi20',
    'bfi21': 'bfi21', 'bfi22': 'bfi22R', 'bfi23': 'bfi23R', 'bfi24': 'bfi24R', 'bfi25': 'bfi25R',
    'bfi26': 'bfi26R', 'bfi27': 'bfi27', 'bfi28': 'bfi28R', 'bfi29': 'bfi29R', 'bfi30': 'bfi30R',
    'bfi31': 'bfi31R', 'bfi32': 'bfi32', 'bfi33': 'bfi33', 'bfi34': 'bfi34', 'bfi35': 'bfi35',
    'bfi36': 'bfi36R', 'bfi37': 'bfi37R', 'bfi38': 'bfi38', 'bfi39': 'bfi39', 'bfi40': 'bfi40',
    'bfi41': 'bfi41', 'bfi42': 'bfi42R', 'bfi43': 'bfi43', 'bfi44': 'bfi44R', 'bfi45': 'bfi45R',
    'bfi46': 'bfi46', 'bfi47': 'bfi47R', 'bfi48': 'bfi48R', 'bfi49': 'bfi49R', 'bfi50': 'bfi50R',
    'bfi51': 'bfi51R', 'bfi52': 'bfi52', 'bfi53': 'bfi53', 'bfi54': 'bfi54', 'bfi55': 'bfi55R',
    'bfi56': 'bfi56', 'bfi57': 'bfi57', 'bfi58': 'bfi58R', 'bfi59': 'bfi59', 'bfi60': 'bfi60'
}

# Apply reverse coding
for key, value in reverse_coding_map.items():
    if value.endswith('R'):  # Reverse coded
        data[key] = 6 - data[key]
    # else: keep original value

print("Reverse coding applied successfully")

Reverse coding applied successfully


In [5]:
# Map numeric values to expanded format descriptions
def map_values(row):
    mapped_row = row.copy()
    for key in expanded_scale:
        if pd.notna(row[key]):  # Check if the value is not NaN
            index = int(row[key]) - 1  # Convert to 0-index
            mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
    return mapped_row

# Apply mapping to BFI columns
mapped_data = data[sbfi_columns].apply(map_values, axis=1)

# Create combined BFI-2 description
mapped_data['combined_bfi2'] = mapped_data[[f'bfi{i}' for i in range(1, 61)]].apply(
    lambda row: ' '.join(row), axis=1
)

# Add combined description to original data
data['combined_bfi2'] = mapped_data['combined_bfi2']

print("Personality descriptions created successfully")
print(f"Final data shape: {data.shape}")

Personality descriptions created successfully
Final data shape: (438, 705)


  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string
  mapped_row[key] = expanded_scale[key][index]  # Replace with corresponding string


In [6]:
# Preview a personality description
print("Sample personality description:")
print(data.iloc[0]['combined_bfi2'][:500] + "...")

Sample personality description:
I am very outgoing, sociable. I am very compassionate almost always soft-hearted. I am fairly organized. I am somewhat relaxed handle stress somewhat well. I have some artistic interests. I am quite assertive. I am very respectful almost always treat others with respect. I am often lazy. I stay very optimistic after experiencing a setback. I am curious about few things. I often feel excited or eager. I rarely find fault with others. I am very dependable steady. I am fairly moody often have up an...


## Multi-Model Simulation Configuration

In [7]:
# Configuration for different models and temperatures
models_to_test = ['gpt-4', 'gpt-4o', 'llama', 'deepseek']
temperatures = [1.0]  # Test both deterministic and stochastic responses
batch_size = 25  # Smaller batch size for stability across different APIs

# Create participant data list from DataFrame
participants_data = data.to_dict('records')
print(f"Prepared {len(participants_data)} participants for simulation")

# For testing: uncomment these lines to use a subset
# models_to_test = ['deepseek']
# participants_data = participants_data[:30]

Prepared 438 participants for simulation


## Run Simulations for All Models

In [8]:
# Run simulations for all model-temperature combinations
all_results = {}

for model in models_to_test:
    for temperature in temperatures:
        print(f"\n{'='*60}")
        print(f"Starting simulation: {model} with temperature {temperature}")
        print(f"{'='*60}")
        
        config = SimulationConfig(
            model=model,
            temperature=temperature,
            batch_size=batch_size,
            max_workers=10
        )
        
        try:
            results = run_bfi_to_minimarker_simulation(
                participants_data=participants_data,
                config=config,
                output_dir="study_2_results"
            )
            
            # Store results
            key = f"{model}_temp{temperature}"
            all_results[key] = results
            
            # Check for any failed participants
            failed_count = sum(1 for r in results if isinstance(r, dict) and 'error' in r)
            if failed_count > 0:
                print(f"Warning: {failed_count} participants failed. Consider retrying.")
                
            print(f"Completed simulation: {model} with temperature {temperature}")
            
        except Exception as e:
            print(f"Error in simulation {model} temp {temperature}: {str(e)}")
            all_results[f"{model}_temp{temperature}"] = {"error": str(e)}

print(f"\nCompleted all simulations. Results keys: {list(all_results.keys())}")


Starting simulation: gpt-4 with temperature 1.0
Starting simulation for 438 participants using gpt-4
Temperature: 1.0, Batch size: 25
Processing participants 0 to 24
Completed batch 0 to 24
Processing participants 25 to 49
Completed batch 25 to 49
Processing participants 50 to 74
Completed batch 50 to 74
Processing participants 75 to 99
Completed batch 75 to 99
Processing participants 100 to 124
Completed batch 100 to 124
Processing participants 125 to 149
Completed batch 125 to 149
Processing participants 150 to 174
Completed batch 150 to 174
Processing participants 175 to 199
Completed batch 175 to 199
Processing participants 200 to 224
Completed batch 200 to 224
Processing participants 225 to 249
Completed batch 225 to 249
Processing participants 250 to 274
Completed batch 250 to 274
Processing participants 275 to 299
Completed batch 275 to 299
Processing participants 300 to 324
Completed batch 300 to 324
Processing participants 325 to 349
Completed batch 325 to 349
Processing part

## Retry Failed Participants (if any)

In [9]:
# Retry any failed participants
for key, results in all_results.items():
    if isinstance(results, list):
        failed_count = sum(1 for r in results if isinstance(r, dict) and 'error' in r)
        if failed_count > 0:
            print(f"Retrying {failed_count} failed participants for {key}")
            
            # Extract model and temperature from key
            model = key.split('_temp')[0]
            temperature = float(key.split('_temp')[1])
            
            config = SimulationConfig(
                model=model,
                temperature=temperature,
                batch_size=batch_size
            )
            
            updated_results = retry_failed_participants(
                results=results,
                participants_data=participants_data,
                prompt_generator=get_prompt,  # Use imported get_prompt function
                config=config,
                personality_key='combined_bfi2'
            )
            
            all_results[key] = updated_results
            
            # Save updated results
            from simulation_utils import save_simulation_results
            save_simulation_results(updated_results, "study_2_results", "bfi_to_minimarker_retried", config)

print("Retry process completed")

Retry process completed


## Results Summary

In [10]:
# Analyze results summary
print("Simulation Results Summary:")
print("=" * 50)

for key, results in all_results.items():
    if isinstance(results, list):
        total_participants = len(results)
        successful = sum(1 for r in results if not (isinstance(r, dict) and 'error' in r))
        failed = total_participants - successful
        success_rate = (successful / total_participants) * 100
        
        print(f"{key}:")
        print(f"  Total: {total_participants}, Successful: {successful}, Failed: {failed}")
        print(f"  Success Rate: {success_rate:.1f}%")
        print()
    else:
        print(f"{key}: FAILED - {results.get('error', 'Unknown error')}")
        print()

Simulation Results Summary:
gpt-4_temp1.0:
  Total: 438, Successful: 438, Failed: 0
  Success Rate: 100.0%

gpt-4o_temp1.0:
  Total: 438, Successful: 438, Failed: 0
  Success Rate: 100.0%

llama_temp1.0:
  Total: 438, Successful: 438, Failed: 0
  Success Rate: 100.0%

deepseek_temp1.0:
  Total: 438, Successful: 438, Failed: 0
  Success Rate: 100.0%



## Save Preprocessed Data

In [11]:
# Save the preprocessed data for reference
output_path = Path('study_2_results')
output_path.mkdir(exist_ok=True)

data.to_csv(output_path / 'study2_preprocessed_data.csv', index=False)
print(f"Preprocessed data saved to {output_path / 'study2_preprocessed_data.csv'}")

print("\n" + "="*60)
print("SIMULATION COMPLETE!")
print("\nNext steps:")
print("1. Run study_2_analysis.ipynb for comprehensive analysis")
print("2. Results are saved in study_2_results/ directory")
print("3. Preprocessed data available for validation")
print("="*60)

Preprocessed data saved to study_2_results/study2_preprocessed_data.csv

SIMULATION COMPLETE!

Next steps:
1. Run study_2_analysis.ipynb for comprehensive analysis
2. Results are saved in study_2_results/ directory
3. Preprocessed data available for validation
