In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('data_clean/data_participants.csv')

df['delta_trust']           = df.post_trust  - df.pre_trust

df['delta_unintended']      = df.post_predict_unintended_behaviors - df.pre_predict_unintended_behaviors

df['delta_neg_unintended']  =  df.post_predict_negative_behaviors - df.pre_predict_negative_behaviors

df.to_csv('model_data/delta_differences.csv',index=False)

## 6. Correlating Trait Prediction

- take trait predictions and see if people are correlating them correctly.
- focus on first persona-vector generation
- relativize the prediction based on how far they are from the midpoint -- 0 activation
- map correctly those activations to the category to make them categorically similar

In [None]:
df = pd.read_csv('data_clean/persona_prediction.csv')

df.columns

In [None]:
# Load predictions (0–10 scale)
df_pred = pd.read_csv('data_clean/persona_prediction.csv')

# Lambda: returns (left_pole, right_pole) | function can return either 0 or 0-1
pole = lambda v: ((5 - v) / 5 if v < 5 else 0.0, (v - 5) / 5 if v > 5 else 0.0)

# left column (pre): original ordinal rating
# middle and right cols: activations in the polar directions as directed by the lambda function "pole"
# structure of normalized polar values: {trait}_{pole}_{normValue}
traits = [
    ('pre_empathy',       'empathy_empathetic_norm',        'empathy_unempathetic_norm'),
    ('pre_encouraging',   'encouraging_encouraging_norm',   'encouraging_discouraging_norm'),
    ('pre_formality',     'formality_casual_norm',          'formality_formal_norm'),
    ('pre_funniness',     'funniness_funny_norm',           'funniness_serious_norm'),
    ('pre_hallucination', 'hallucination_factual_norm',     'hallucination_hallucinatory_norm'),
    ('pre_honesty',       'honesty_honest_norm',            'honesty_sycophantic_norm'),
    ('pre_sociality',     'sociality_social_norm',          'sociality_antisocial_norm'),
    ('pre_toxicity',      'toxicity_respectful_norm',       'toxicity_toxic_norm'),
]

# Apply mapping per trait
for src, right_col, left_col in traits:
    left_right = df_pred[src].apply(pole).apply(pd.Series)  # col 0: left, col 1: right
    df_pred[left_col] = left_right[0]
    df_pred[right_col] = left_right[1]

# Quick preview
df_pred[['pre_empathy','empathy_empathetic_norm','empathy_unempathetic_norm',
        'pre_encouraging',   'encouraging_encouraging_norm',   'encouraging_discouraging_norm',
        'pre_formality',     'formality_casual_norm',          'formality_formal_norm',
        'pre_funniness',     'funniness_funny_norm',           'funniness_serious_norm',
        'pre_hallucination', 'hallucination_factual_norm',     'hallucination_hallucinatory_norm',
        'pre_honesty',       'honesty_honest_norm',            'honesty_sycophantic_norm',
        'pre_sociality',     'sociality_social_norm',          'sociality_antisocial_norm',
        'pre_toxicity',         'toxicity_respectful_norm', 'toxicity_toxic_norm']].head(5)

In [None]:
df_pred[['firebase_id', 'prolific_id', 'timestamp', 'condition', 'system_prompt',
        'empathy_empathetic', 'empathy_unempathetic', 'encouraging_encouraging',
        'encouraging_discouraging', 'formality_formal', 'formality_casual',
        'funniness_funny', 'funniness_serious', 'hallucination_factual',
        'hallucination_hallucinatory', 'sociality_social',
        'sociality_antisocial', 'sycophancy_honest', 'sycophancy_sycophantic',
        'toxicity_respectful', 'toxicity_toxic', 'condition_name',
        'pre_empathy','empathy_empathetic_norm','empathy_unempathetic_norm',
        'pre_formality','formality_formal_norm','formality_casual_norm',
        'pre_toxicity','toxicity_respectful_norm','toxicity_toxic_norm']].to_csv('model_data/persona_prediction.csv',index=False)

In [None]:
df = pd.read_csv('model_data/persona_prediction.csv')

df.columns

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the data
df = pd.read_csv('model_data/persona_prediction.csv')

# Prepare data for empathy scatter plot, norm == participant prediction, otherwise, it's a trait activation
empathetic_data = pd.DataFrame({
    'Normalized Prediction': df['empathy_empathetic_norm_polar'],
    'Activation': df['empathy_empathetic_polar'],
    'Pole': 'Empathetic'
})

unempathetic_data = pd.DataFrame({
    'Normalized Prediction': df['empathy_unempathetic_norm_polar'],
    'Activation': df['empathy_unempathetic_polar'],
    'Pole': 'Unempathetic'
})

# Combine the data
empathy_plot_data = pd.concat([empathetic_data, unempathetic_data], ignore_index=True)

empathy_plot_data.head()

empathy_plot_data.to_csv('model_data/empathy_activations.csv',index=False)

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Set up the plot
fig, ax = plt.subplots(figsize=(12, 8))
df = pd.read_csv('model_data/empathy_activations.csv')

# Combine all data for regression and plotting
all_x = df['Normalized Prediction']
all_y = df['Activation']

# Plot all data points in grey
ax.scatter(all_x, all_y, color='grey', alpha=0.6, s=60)

# Calculate regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(all_x, all_y)

# Create regression line
line_x = np.linspace(all_x.min(), all_x.max(), 100)
line_y = slope * line_x + intercept

# Calculate confidence interval for the regression line
n = len(all_x)
x_mean = np.mean(all_x)
sxx = np.sum((all_x - x_mean) ** 2)
residuals = all_y - (slope * all_x + intercept)
mse = np.sum(residuals ** 2) / (n - 2)  # Mean squared error
t_val = stats.t.ppf(0.975, n - 2)  # 95% confidence interval

# Calculate standard error for each point on the line
se_line = np.sqrt(mse * (1/n + (line_x - x_mean)**2 / sxx))
ci_lower = line_y - t_val * se_line
ci_upper = line_y + t_val * se_line

# Plot regression line with confidence interval
ax.plot(line_x, line_y, color='red', linewidth=2, linestyle='--', alpha=0.8)
ax.fill_between(line_x, ci_lower, ci_upper, color='red', alpha=0.2, label='95% Confidence Interval')

# Add vertical line at x=0
ax.axvline(x=0, color='lightgrey', linewidth=1, alpha=0.7)

# Add horizontal line at y=0
ax.axhline(y=0, color='lightgrey', linewidth=1, alpha=0.7)

# Create custom legend
legend_elements = [
    plt.Line2D([0], [0], color='red', linewidth=2, linestyle='--',
               label=f'Regression (r={r_value:.3f}, R²={r_value**2:.2f}, p < 0.001)'),
    plt.Rectangle((0,0),1,1, facecolor='black', alpha=0.2, label='95% Confidence Interval')
]
ax.legend(handles=legend_elements, loc='upper left', fontsize=16, frameon=True)

# Styling to match the reference image
ax.set_xlabel('User Prediction', fontsize=20)
ax.set_ylabel('Persona Activation', fontsize=20)

# Set axis limits and ticks to match the reference
ax.set_xlim(-1.0, 1.1)
ax.set_ylim(-0.6, 1.1)
ax.set_xticks([-0.75, -0.50, -0.25, 0.00, 0.25, 0.50, 0.75, 1.00])
ax.set_yticks([-0.4, -0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0])


ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

# Grid styling
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)

# Keep all spines visible but make them subtle
for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_color('black')
    spine.set_linewidth(0.8)

plt.tight_layout()
plt.show()

# Check data distribution
print(f"Data distribution:")
print(f"X-data mean: {x_mean:.3f}")
print(f"X-data range: [{all_x.min():.3f}, {all_x.max():.3f}]")
print(f"Negative values: {(all_x < 0).sum()} out of {len(all_x)} ({100*(all_x < 0).sum()/len(all_x):.1f}%)")
print(f"Positive values: {(all_x > 0).sum()} out of {len(all_x)} ({100*(all_x > 0).sum()/len(all_x):.1f}%)")

# Print additional statistics
print(f"Correlation coefficient: {r_value:.3f}")
print(f"R-squared: {r_value**2:.3f}")
print(f"P-value: {p_value:.3e}")
print(f"Regression equation: y = {slope:.3f}x + {intercept:.3f}")
print(f"Standard error of regression: {np.sqrt(mse):.3f}")

# Print confidence interval range (min and max of the CI band)
print(f"\nConfidence Interval Band Range:")
print(f"CI Lower bound range: {ci_lower.min():.3f} to {ci_lower.max():.3f}")
print(f"CI Upper bound range: {ci_upper.min():.3f} to {ci_upper.max():.3f}")

# Calculate 95% confidence intervals for coefficients (like JASP)
print(f"\n95% Confidence Intervals for Coefficients:")
print(f"Intercept: {intercept:.3f} ± {t_val * std_err * np.sqrt(1/n + x_mean**2/sxx):.3f}")
print(f"Slope: {slope:.3f} ± {t_val * std_err * np.sqrt(1/sxx):.3f}")

# More precise coefficient CIs
se_intercept = np.sqrt(mse * (1/n + x_mean**2/sxx))
se_slope = np.sqrt(mse/sxx)
intercept_ci_lower = intercept - t_val * se_intercept
intercept_ci_upper = intercept + t_val * se_intercept
slope_ci_lower = slope - t_val * se_slope
slope_ci_upper = slope + t_val * se_slope

print(f"\nCoefficient 95% CIs (matching JASP format):")
print(f"Intercept: [{intercept_ci_lower:.3f}, {intercept_ci_upper:.3f}]")
print(f"Slope: [{slope_ci_lower:.3f}, {slope_ci_upper:.3f}]")

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt

# Set up the plot
fig, ax = plt.subplots(figsize=(12, 8))
df = pd.read_csv('model_data/empathy_activations.csv')

# Combine all data for regression and plotting
all_x = df['Normalized Prediction']
all_y = df['Activation']

# Plot all data points in grey
ax.scatter(all_x, all_y, color='grey', alpha=0.6, s=60)

# Calculate regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(all_x, all_y)

# Create regression line
line_x = np.linspace(all_x.min(), all_x.max(), 100)
line_y = slope * line_x + intercept

# Calculate confidence interval for the regression line
n = len(all_x)
x_mean = np.mean(all_x)
sxx = np.sum((all_x - x_mean) ** 2)
residuals = all_y - (slope * all_x + intercept)
mse = np.sum(residuals ** 2) / (n - 2)  # Mean squared error
t_val = stats.t.ppf(0.975, n - 2)  # 95% confidence interval

# Calculate standard error for each point on the line
se_line = np.sqrt(mse * (1/n + (line_x - x_mean)**2 / sxx))
ci_lower = line_y - t_val * se_line
ci_upper = line_y + t_val * se_line

# Plot regression line with confidence interval
ax.plot(line_x, line_y, color='red', linewidth=2, linestyle='--', alpha=0.8)
ax.fill_between(line_x, ci_lower, ci_upper, color='red', alpha=0.2, label='95% Confidence Interval')

# Add vertical line at x=0
ax.axvline(x=0, color='lightgrey', linewidth=1, alpha=0.7)

# Add horizontal line at y=0
ax.axhline(y=0, color='lightgrey', linewidth=1, alpha=0.7)

# Create custom legend
legend_elements = [
    plt.Line2D([0], [0], color='red', linewidth=2, linestyle='--',
               label=f'Regression (r={r_value:.3f}, R²={r_value**2:.2f}, p < 0.001)'),
    plt.Rectangle((0,0),1,1, facecolor='black', alpha=0.2, label='95% Confidence Interval')
]
ax.legend(handles=legend_elements, loc='upper left', fontsize=16, frameon=True)

# Styling to match the reference image
ax.set_xlabel('User Prediction', fontsize=20)
ax.set_ylabel('Persona Activation', fontsize=20)

# Set axis limits and ticks to match the reference
ax.set_xlim(-1.0, 1.1)
ax.set_ylim(-0.6, 1.1)
ax.set_xticks([-0.75, -0.50, -0.25, 0.00, 0.25, 0.50, 0.75, 1.00])
ax.set_yticks([-0.4, -0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0])

ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

# Grid styling
ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5)

# Remove top and right spines, keep only bottom and left (x and y axes)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.spines['bottom'].set_color('black')
ax.spines['bottom'].set_linewidth(0.8)
ax.spines['left'].set_visible(True)
ax.spines['left'].set_color('black')
ax.spines['left'].set_linewidth(0.8)

plt.tight_layout()
plt.show()

# Check data distribution
print(f"Data distribution:")
print(f"X-data mean: {x_mean:.3f}")
print(f"X-data range: [{all_x.min():.3f}, {all_x.max():.3f}]")
print(f"Negative values: {(all_x < 0).sum()} out of {len(all_x)} ({100*(all_x < 0).sum()/len(all_x):.1f}%)")
print(f"Positive values: {(all_x > 0).sum()} out of {len(all_x)} ({100*(all_x > 0).sum()/len(all_x):.1f}%)")

# Print additional statistics
print(f"Correlation coefficient: {r_value:.3f}")
print(f"R-squared: {r_value**2:.3f}")
print(f"P-value: {p_value:.3e}")
print(f"Regression equation: y = {slope:.3f}x + {intercept:.3f}")
print(f"Standard error of regression: {np.sqrt(mse):.3f}")

# Print confidence interval range (min and max of the CI band)
print(f"\nConfidence Interval Band Range:")
print(f"CI Lower bound range: {ci_lower.min():.3f} to {ci_lower.max():.3f}")
print(f"CI Upper bound range: {ci_upper.min():.3f} to {ci_upper.max():.3f}")

# Calculate 95% confidence intervals for coefficients (like JASP)
print(f"\n95% Confidence Intervals for Coefficients:")
print(f"Intercept: {intercept:.3f} ± {t_val * std_err * np.sqrt(1/n + x_mean**2/sxx):.3f}")
print(f"Slope: {slope:.3f} ± {t_val * std_err * np.sqrt(1/sxx):.3f}")

# More precise coefficient CIs
se_intercept = np.sqrt(mse * (1/n + x_mean**2/sxx))
se_slope = np.sqrt(mse/sxx)
intercept_ci_lower = intercept - t_val * se_intercept
intercept_ci_upper = intercept + t_val * se_intercept
slope_ci_lower = slope - t_val * se_slope
slope_ci_upper = slope + t_val * se_slope

print(f"\nCoefficient 95% CIs (matching JASP format):")
print(f"Intercept: [{intercept_ci_lower:.3f}, {intercept_ci_upper:.3f}]")
print(f"Slope: [{slope_ci_lower:.3f}, {slope_ci_upper:.3f}]")

In [None]:
from scipy import stats

# Calculate correlations for each pole
empathetic_corr, empathetic_p = stats.pearsonr(
    df['empathy_empathetic_norm_polar'], 
    df['empathy_empathetic_polar']
)

unempathetic_corr, unempathetic_p = stats.pearsonr(
    df['empathy_unempathetic_norm_polar'], 
    df['empathy_unempathetic_polar']
)

print("Empathy Prediction Accuracy Analysis")
print("=" * 50)
print(f"\nEmpathetic:")
print(f"  Pearson r = {empathetic_corr:.3f}")
print(f"  p-value = {empathetic_p:.4f}")
print(f"  Significant: {'Yes' if empathetic_p < 0.05 else 'No'}")

print(f"\nUnempathetic:")
print(f"  Pearson r = {unempathetic_corr:.3f}")
print(f"  p-value = {unempathetic_p:.4f}")
print(f"  Significant: {'Yes' if unempathetic_p < 0.05 else 'No'}")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the data
df = pd.read_csv('model_data/persona_prediction.csv')

# Create bipolar representations
# Positive = empathetic, Negative = unempathetic
empathy_prediction_bipolar = pd.concat([
    df['empathy_empathetic_norm_polar'],      # Keep positive
    -df['empathy_unempathetic_norm_polar']    # Flip to negative
])

empathy_activation_bipolar = pd.concat([
    df['empathy_empathetic_polar'],           # Keep positive
    -df['empathy_unempathetic_polar']         # Flip to negative
])

# Create combined dataframe
empathy_bipolar_data = pd.DataFrame({
    'Prediction': empathy_prediction_bipolar,
    'Activation': empathy_activation_bipolar
})

empathy_bipolar_data.head(10)

In [None]:
from scipy import stats

# Calculate correlation for the entire bipolar trait
corr, p_value = stats.pearsonr(
    empathy_bipolar_data['Prediction'], 
    empathy_bipolar_data['Activation']
)

print("Empathy Bipolar Trait - Prediction Accuracy")
print("=" * 50)
print(f"Pearson r = {corr:.3f}")
print(f"p-value = {p_value:.4f}")
print(f"R² = {corr**2:.3f} (variance explained)")
print(f"Significant: {'Yes' if p_value < 0.05 else 'No'}")

In [None]:
import pandas as pd

# Read the data (with all norm columns now)
df = pd.read_csv('model_data/persona_prediction.csv')

# Define trait mappings: (positive_pole, negative_pole)
trait_mappings = {
    'empathy': ('empathetic', 'unempathetic'),
    'encouraging': ('encouraging', 'discouraging'),
    'formality': ('casual', 'formal'),  # Casual is positive, Formal is negative
    'funniness': ('funny', 'serious'),
    'hallucination': ('factual', 'hallucinatory'),
    'honesty': ('honest', 'sycophantic'),  # Changed from 'sycophancy' to 'honesty'
    'sociality': ('social', 'antisocial'),
    'toxicity': ('respectful', 'toxic'),
}

# Metadata columns to keep
metadata_cols = ['firebase_id', 'prolific_id', 'timestamp', 'condition', 'system_prompt', 'condition_name',
                 'pre_empathy', 'pre_encouraging', 'pre_formality', 'pre_funniness', 
                 'pre_hallucination', 'pre_honesty', 'pre_sociality', 'pre_toxicity']

# Create new dataframe with metadata
result_df = df[metadata_cols].copy()

# Process each trait
for trait, (pos_pole, neg_pole) in trait_mappings.items():
    # Activation columns
    pos_col = f'{trait}_{pos_pole}'
    neg_col = f'{trait}_{neg_pole}'
    
    if pos_col in df.columns:
        # Positive pole: keep as-is, add _polar suffix
        result_df[f'{pos_col}_polar'] = df[pos_col]
    
    if neg_col in df.columns:
        # Negative pole: multiply by -1, add _polar suffix
        result_df[f'{neg_col}_polar'] = df[neg_col] * -1
    
    # Norm columns (if they exist)
    pos_norm_col = f'{trait}_{pos_pole}_norm'
    neg_norm_col = f'{trait}_{neg_pole}_norm'
    
    if pos_norm_col in df.columns:
        # Positive pole norm: keep as-is, add _polar suffix
        result_df[f'{pos_norm_col}_polar'] = df[pos_norm_col]
    
    if neg_norm_col in df.columns:
        # Negative pole norm: multiply by -1, add _polar suffix
        result_df[f'{neg_norm_col}_polar'] = df[neg_norm_col] * -1

# Save the result
result_df.to_csv('model_data/persona_prediction.csv', index=False)

print(f"✓ Transformation complete!")
print(f"✓ Original columns: {len(df.columns)}")
print(f"✓ New columns: {len(result_df.columns)}")
print(f"\nNew column structure:")
print(f"  - Metadata columns: {len(metadata_cols)}")
print(f"  - Polar columns: {len(result_df.columns) - len(metadata_cols)}")

# Display the first few rows
result_df.head()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.stats import linregress

# Load the polar data
df = pd.read_csv('model_data/persona_prediction.csv')

# Combine empathy polar data (both poles into single bipolar representation)
empathy_prediction_polar = pd.concat([
    df['empathy_empathetic_norm_polar'],      # Positive values
    df['empathy_unempathetic_norm_polar']     # Negative values (already flipped)
])

empathy_activation_polar = pd.concat([
    df['empathy_empathetic_polar'],           # Positive values
    df['empathy_unempathetic_polar']          # Negative values (already flipped)
])

# Create dataframe for plotting
empathy_data = pd.DataFrame({
    'Prediction': empathy_prediction_polar,
    'Activation': empathy_activation_polar
})

empathy_data.head(10)

In [None]:
# Combine encouraging polar data (both poles into single bipolar representation)
encouraging_prediction_polar = pd.concat([
    df['encouraging_encouraging_norm_polar'],      # Positive values
    df['encouraging_discouraging_norm_polar']      # Negative values (already flipped)
])

encouraging_activation_polar = pd.concat([
    df['encouraging_encouraging_polar'],           # Positive values
    df['encouraging_discouraging_polar']           # Negative values (already flipped)
])

# Create dataframe for plotting
encouraging_data = pd.DataFrame({
    'Prediction': encouraging_prediction_polar,
    'Activation': encouraging_activation_polar
})

encouraging_data.head(10)

In [None]:
# Check what columns exist in the dataframe
print("Available columns:")
print(df.columns.tolist())

### Trying again


In [None]:
import pandas as pd

# Load original predictions (0–10 scale)
df_pred = pd.read_csv('data_clean/persona_prediction.csv')

# Lambda: returns (left_pole, right_pole)
pole = lambda v: ((5 - v) / 5 if v < 5 else 0.0, (v - 5) / 5 if v > 5 else 0.0)

# source_col, right_col_name, left_col_name
traits = [
    ('pre_empathy',       'empathy_empathetic_norm',        'empathy_unempathetic_norm'),
    ('pre_encouraging',   'encouraging_encouraging_norm',   'encouraging_discouraging_norm'),
    ('pre_formality',     'formality_casual_norm',          'formality_formal_norm'),
    ('pre_funniness',     'funniness_funny_norm',           'funniness_serious_norm'),
    ('pre_hallucination', 'hallucination_factual_norm',     'hallucination_hallucinatory_norm'),
    ('pre_honesty',       'honesty_honest_norm',            'honesty_sycophantic_norm'),
    ('pre_sociality',     'sociality_social_norm',          'sociality_antisocial_norm'),
    ('pre_toxicity',      'toxicity_respectful_norm',       'toxicity_toxic_norm'),
]

# Apply mapping per trait
for src, right_col, left_col in traits:
    left_right = df_pred[src].apply(pole).apply(pd.Series)  # col 0: left, col 1: right
    df_pred[left_col] = left_right[0]
    df_pred[right_col] = left_right[1]

# Save with norm columns
df_pred.to_csv('model_data/persona_prediction.csv', index=False)

print("✓ Step 1 complete: Normalization for all traits!")
print(f"Total columns: {len(df_pred.columns)}")
df_pred.head()

In [None]:
# Read the normalized data
df = pd.read_csv('model_data/persona_prediction.csv')

# Define trait mappings: (trait_name_for_activation, trait_name_for_norm, positive_pole, negative_pole)
trait_mappings = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging'),
    ('formality', 'formality', 'casual', 'formal'),
    ('funniness', 'funniness', 'funny', 'serious'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic'),  # Different names!
    ('sociality', 'sociality', 'social', 'antisocial'),
    ('toxicity', 'toxicity', 'respectful', 'toxic'),
]

# Metadata columns to keep
metadata_cols = ['firebase_id', 'prolific_id', 'timestamp', 'condition', 'system_prompt', 'condition_name',
                 'pre_empathy', 'pre_encouraging', 'pre_formality', 'pre_funniness', 
                 'pre_hallucination', 'pre_honesty', 'pre_sociality', 'pre_toxicity']

# Create new dataframe with metadata
result_df = df[metadata_cols].copy()

# Process each trait
for trait_act, trait_norm, pos_pole, neg_pole in trait_mappings:
    # Activation columns (use trait_act)
    pos_col = f'{trait_act}_{pos_pole}'
    neg_col = f'{trait_act}_{neg_pole}'
    
    if pos_col in df.columns:
        result_df[f'{pos_col}_polar'] = df[pos_col]
    
    if neg_col in df.columns:
        result_df[f'{neg_col}_polar'] = df[neg_col] * -1
    
    # Norm columns (use trait_norm)
    pos_norm_col = f'{trait_norm}_{pos_pole}_norm'
    neg_norm_col = f'{trait_norm}_{neg_pole}_norm'
    
    if pos_norm_col in df.columns:
        result_df[f'{pos_norm_col}_polar'] = df[pos_norm_col]
    
    if neg_norm_col in df.columns:
        result_df[f'{neg_norm_col}_polar'] = df[neg_norm_col] * -1

# Save the result
result_df.to_csv('model_data/persona_prediction.csv', index=False)

print(f"✓ Step 2 complete: Polar transformation (CORRECTED)!")
print(f"Total columns: {len(result_df.columns)}")

# Check columns
activation_polar_cols = [col for col in result_df.columns if '_polar' in col and '_norm_polar' not in col]
norm_polar_cols = [col for col in result_df.columns if '_norm_polar' in col]
print(f"\nActivation polar columns: {len(activation_polar_cols)}")
print(f"Norm polar columns: {len(norm_polar_cols)}")

result_df.head()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.stats import linregress

# Load the polar data
df = pd.read_csv('model_data/persona_prediction.csv')

# Combine encouraging polar data (both poles into single bipolar representation)
encouraging_prediction_polar = pd.concat([
    df['encouraging_encouraging_norm_polar'],      # Positive values
    df['encouraging_discouraging_norm_polar']      # Negative values (already flipped)
])

encouraging_activation_polar = pd.concat([
    df['encouraging_encouraging_polar'],           # Positive values
    df['encouraging_discouraging_polar']           # Negative values (already flipped)
])

# Create dataframe for plotting
encouraging_data = pd.DataFrame({
    'Prediction': encouraging_prediction_polar,
    'Activation': encouraging_activation_polar
})

encouraging_data.head(10)

In [None]:
import pandas as pd

df = pd.read_csv('model_data/persona_prediction.csv')

# Check a few examples
print("Checking empathy transformations:\n")
for idx in [0, 1, 2, 10, 20]:
    if idx >= len(df):
        continue
    
    pre = df.loc[idx, 'pre_empathy']
    emp_norm = df.loc[idx, 'empathy_empathetic_norm_polar']
    unemp_norm = df.loc[idx, 'empathy_unempathetic_norm_polar']
    
    print(f"Participant {idx}:")
    print(f"  pre_empathy = {pre}")
    print(f"  Expected: empathetic_norm = {max(0, (pre-5)/5):.2f}, unempathetic_norm = {max(0, (5-pre)/5)*-1:.2f}")
    print(f"  Actual:   empathetic_norm = {emp_norm:.2f}, unempathetic_norm = {unemp_norm:.2f}")
    print()

In [None]:
import pandas as pd

df = pd.read_csv('model_data/persona_prediction.csv')

firebase_id = df.loc[18, 'firebase_id']
prolific_id = df.loc[18, 'prolific_id']
condition = df.loc[18, 'condition_name']

print(f"Participant 18:")
print(f"  Firebase ID: {firebase_id}")
print(f"  Prolific ID: {prolific_id}")
print(f"  Condition: {condition}")

In [None]:
import pandas as pd

df = pd.read_csv('model_data/persona_prediction.csv')

# Get participant 18's data
p18 = df.loc[18]

traits = [
    ('Empathy', 'empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathetic', 'Unempathetic'),
    ('Encouraging', 'encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging', 'Discouraging'),
    ('Formality', 'formality', 'formality', 'casual', 'formal', 'Casual', 'Formal'),
    ('Funniness', 'funniness', 'funniness', 'funny', 'serious', 'Funny', 'Serious'),
    ('Hallucination', 'hallucination', 'hallucination', 'factual', 'hallucinatory', 'Factual', 'Hallucinatory'),
    ('Honesty', 'sycophancy', 'honesty', 'honest', 'sycophantic', 'Honest', 'Sycophantic'),
    ('Sociality', 'sociality', 'sociality', 'social', 'antisocial', 'Social', 'Antisocial'),
    ('Toxicity', 'toxicity', 'toxicity', 'respectful', 'toxic', 'Respectful', 'Toxic'),
]

print("\\begin{table}[h]")
print("\\centering")
print("\\caption{Example Participant Trait Predictions vs Actual Activations}")
print("\\label{tab:participant18}")
print("\\begin{tabular}{lcccccc}")
print("\\hline")
print("\\textbf{Trait} & \\multicolumn{2}{c}{\\textbf{Predicted}} & \\multicolumn{2}{c}{\\textbf{Actual}} & \\multicolumn{2}{c}{\\textbf{Error}} \\\\")
print("               & Positive & Negative & Positive & Negative & Positive & Negative \\\\")
print("\\hline")

total_error = 0
error_count = 0

for trait_name, trait_act, trait_norm, pos_pole, neg_pole, pos_label, neg_label in traits:
    pred_pos = p18[f'{trait_norm}_{pos_pole}_norm_polar']
    pred_neg = p18[f'{trait_norm}_{neg_pole}_norm_polar']
    act_pos = p18[f'{trait_act}_{pos_pole}_polar']
    act_neg = p18[f'{trait_act}_{neg_pole}_polar']
    
    error_pos = abs(pred_pos - act_pos)
    error_neg = abs(pred_neg - act_neg)
    
    total_error += error_pos + error_neg
    error_count += 2
    
    print(f"{trait_name:15s} & {pred_pos:5.2f} & {pred_neg:5.2f} & {act_pos:5.2f} & {act_neg:5.2f} & {error_pos:5.2f} & {error_neg:5.2f} \\\\")

mae = total_error / error_count
print("\\hline")
print(f"\\multicolumn{{7}}{{l}}{{\\textbf{{Mean Absolute Error:}} {mae:.3f}}} \\\\")
print("\\hline")

# Add system prompt as a table note
system_prompt = p18['system_prompt'].replace('&', '\\&').replace('_', '\\_').replace('%', '\\%')
# Break long prompt into multiple lines if needed
print("\\multicolumn{7}{p{0.9\\textwidth}}{\\small \\textbf{System Prompt:} " + system_prompt + "} \\\\")
print("\\hline")
print("\\end{tabular}")
print("\\end{table}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits with poles
trait_poles = [
    ('empathy', 'empathy', 'empathetic', 'Empathetic'),
    ('empathy', 'empathy', 'unempathetic', 'Unempathetic'),
    ('encouraging', 'encouraging', 'encouraging', 'Encouraging'),
    ('encouraging', 'encouraging', 'discouraging', 'Discouraging'),
    ('formality', 'formality', 'casual', 'Casual'),
    ('formality', 'formality', 'formal', 'Formal'),
    ('funniness', 'funniness', 'funny', 'Funny'),
    ('funniness', 'funniness', 'serious', 'Serious'),
    ('hallucination', 'hallucination', 'factual', 'Factual'),
    ('hallucination', 'hallucination', 'hallucinatory', 'Hallucinatory'),
    ('sycophancy', 'honesty', 'honest', 'Honest'),
    ('sycophancy', 'honesty', 'sycophantic', 'Sycophantic'),
    ('sociality', 'sociality', 'social', 'Social'),
    ('sociality', 'sociality', 'antisocial', 'Antisocial'),
    ('toxicity', 'toxicity', 'respectful', 'Respectful'),
    ('toxicity', 'toxicity', 'toxic', 'Toxic'),
]

# Calculate mean prediction and mean activation for each trait pole
mean_predictions = []
mean_activations = []
labels = []

for trait_act, trait_norm, pole, label in trait_poles:
    # Get absolute values (remove negative sign for plotting)
    pred_col = f'{trait_norm}_{pole}_norm_polar'
    act_col = f'{trait_act}_{pole}_polar'
    
    mean_pred = df[pred_col].abs().mean()
    mean_act = df[act_col].abs().mean()
    
    mean_predictions.append(mean_pred)
    mean_activations.append(mean_act)
    labels.append(label)

# Create the plot
fig, ax = plt.subplots(figsize=(12, 10))

# Scatter plot
scatter = ax.scatter(mean_predictions, mean_activations, s=150, alpha=0.7, 
                     c=range(len(labels)), cmap='tab20')

# Add diagonal line (perfect prediction)
max_val = max(max(mean_predictions), max(mean_activations))
ax.plot([0, max_val], [0, max_val], 'k--', linewidth=2, alpha=0.5, label='Perfect Prediction')

# Add labels to points
for i, label in enumerate(labels):
    ax.annotate(label, (mean_predictions[i], mean_activations[i]), 
                fontsize=9, alpha=0.8, 
                xytext=(5, 5), textcoords='offset points')

# Labels and styling
ax.set_xlabel('Mean Predicted Activation (|normalized|)', fontsize=13, fontweight='bold')
ax.set_ylabel('Mean Actual Activation (|absolute value|)', fontsize=13, fontweight='bold')
ax.set_title('Systematic Over/Under-Estimation of Trait Activations', fontsize=15, fontweight='bold', pad=20)
ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
ax.legend(fontsize=11)

# Add text annotations for interpretation
ax.text(0.05, 0.95, 'Above line = Underestimated\n(actual > predicted)', 
        transform=ax.transAxes, fontsize=10, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))
ax.text(0.95, 0.05, 'Below line = Overestimated\n(predicted > actual)', 
        transform=ax.transAxes, fontsize=10, verticalalignment='bottom',
        horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5))

# Clean styling
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()

# Calculate overall bias
overall_diff = np.array(mean_activations) - np.array(mean_predictions)
print(f"\nSystematic Bias Analysis:")
print(f"  Mean difference (actual - predicted): {overall_diff.mean():.4f}")
print(f"  Positive = Underestimation, Negative = Overestimation")
print(f"\nTraits most underestimated (actual >> predicted):")
sorted_diffs = sorted(zip(labels, overall_diff), key=lambda x: x[1], reverse=True)
for label, diff in sorted_diffs[:5]:
    print(f"    {label:15s}: {diff:+.4f}")
print(f"\nTraits most overestimated (predicted >> actual):")
for label, diff in sorted_diffs[-5:]:
    print(f"    {label:15s}: {diff:+.4f}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits
traits = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathy'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging'),
    ('formality', 'formality', 'casual', 'formal', 'Formality'),
    ('funniness', 'funniness', 'funny', 'serious', 'Funniness'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory', 'Hallucination'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic', 'Honesty'),
    ('sociality', 'sociality', 'social', 'antisocial', 'Sociality'),
    ('toxicity', 'toxicity', 'respectful', 'toxic', 'Toxicity'),
]

# Create figure with subplots (4 rows x 2 columns)
fig, axes = plt.subplots(4, 2, figsize=(16, 20))
axes = axes.flatten()

for idx, (trait_act, trait_norm, pos_pole, neg_pole, title) in enumerate(traits):
    ax = axes[idx]
    
    # Combine polar data (bipolar representation)
    prediction_polar = pd.concat([
        df[f'{trait_norm}_{pos_pole}_norm_polar'],
        df[f'{trait_norm}_{neg_pole}_norm_polar']
    ])
    
    activation_polar = pd.concat([
        df[f'{trait_act}_{pos_pole}_polar'],
        df[f'{trait_act}_{neg_pole}_polar']
    ])
    
    # Create violin plots
    parts = ax.violinplot([prediction_polar, activation_polar], 
                          positions=[1, 2], 
                          showmeans=True, showmedians=True,
                          widths=0.7)
    
    # Color the violins
    for pc, color in zip(parts['bodies'], ['lightblue', 'lightcoral']):
        pc.set_facecolor(color)
        pc.set_alpha(0.7)
    
    # Customize mean and median lines
    parts['cmeans'].set_color('black')
    parts['cmeans'].set_linewidth(2)
    parts['cmedians'].set_color('darkred')
    parts['cmedians'].set_linewidth(2)
    
    # Add box plots on top for quartiles
    bp = ax.boxplot([prediction_polar, activation_polar], 
                    positions=[1, 2], 
                    widths=0.3,
                    patch_artist=True,
                    showfliers=False,
                    boxprops=dict(facecolor='white', alpha=0.5),
                    whiskerprops=dict(linewidth=1.5),
                    capprops=dict(linewidth=1.5))
    
    # Add reference line at zero
    ax.axhline(y=0, color='gray', linestyle='--', linewidth=1, alpha=0.5)
    
    # Labels and styling
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Predicted', 'Actual'], fontsize=11)
    ax.set_ylabel('Activation (Bipolar)', fontsize=10)
    ax.set_title(title, fontsize=13, fontweight='bold')
    ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.5)
    
    # Add statistics
    mean_pred = prediction_polar.mean()
    mean_act = activation_polar.mean()
    std_pred = prediction_polar.std()
    std_act = activation_polar.std()
    
    textstr = f'μ_pred={mean_pred:.3f} (σ={std_pred:.3f})\nμ_act={mean_act:.3f} (σ={std_act:.3f})'
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.3)
    ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=8,
            verticalalignment='top', bbox=props)

plt.suptitle('Distribution of Predicted vs Actual Trait Activations', 
             fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout(rect=[0, 0, 1, 0.995])
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits
traits = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathy'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging'),
    ('formality', 'formality', 'casual', 'formal', 'Formality'),
    ('funniness', 'funniness', 'funny', 'serious', 'Funniness'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory', 'Hallucination'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic', 'Honesty'),
    ('sociality', 'sociality', 'social', 'antisocial', 'Sociality'),
    ('toxicity', 'toxicity', 'respectful', 'toxic', 'Toxicity'),
]

# Create figure with subplots (4 rows x 2 columns)
fig, axes = plt.subplots(4, 2, figsize=(16, 20))
axes = axes.flatten()

for idx, (trait_act, trait_norm, pos_pole, neg_pole, title) in enumerate(traits):
    ax = axes[idx]
    
    # Combine polar data (bipolar representation)
    prediction_polar = pd.concat([
        df[f'{trait_norm}_{pos_pole}_norm_polar'],
        df[f'{trait_norm}_{neg_pole}_norm_polar']
    ])
    
    activation_polar = pd.concat([
        df[f'{trait_act}_{pos_pole}_polar'],
        df[f'{trait_act}_{neg_pole}_polar']
    ])
    
    # Create box plots
    bp = ax.boxplot([prediction_polar, activation_polar], 
                    positions=[1, 2], 
                    widths=0.5,
                    patch_artist=True,
                    showmeans=True,
                    meanprops=dict(marker='D', markerfacecolor='red', markeredgecolor='red', markersize=8),
                    boxprops=dict(facecolor='lightblue', alpha=0.7, linewidth=1.5),
                    medianprops=dict(color='darkblue', linewidth=2),
                    whiskerprops=dict(linewidth=1.5),
                    capprops=dict(linewidth=1.5),
                    flierprops=dict(marker='o', markerfacecolor='gray', markersize=5, alpha=0.5))
    
    # Color the boxes differently
    bp['boxes'][0].set_facecolor('lightblue')
    bp['boxes'][1].set_facecolor('lightcoral')
    
    # Add reference line at zero
    ax.axhline(y=0, color='gray', linestyle='--', linewidth=1, alpha=0.5)
    
    # Set y-axis limits to -1 to 1
    ax.set_ylim(-1, 1)
    
    # Labels and styling
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Predicted', 'Actual'], fontsize=11)
    ax.set_ylabel('Activation (Bipolar)', fontsize=10)
    ax.set_title(title, fontsize=13, fontweight='bold')
    ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.5)
    
    # Add statistics
    mean_pred = prediction_polar.mean()
    mean_act = activation_polar.mean()
    
    textstr = f'μ_pred={mean_pred:.3f}\nμ_act={mean_act:.3f}'
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.3)
    ax.text(0.02, 0.98, textstr, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=props)
    
    # Clean styling
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.suptitle('Distribution of Predicted vs Actual Trait Activations', 
             fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout(rect=[0, 0, 1, 0.995])
plt.show()

In [None]:
import pandas as pd
import numpy as np
from scipy import stats

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits
traits = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathy'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging'),
    ('formality', 'formality', 'casual', 'formal', 'Formality'),
    ('funniness', 'funniness', 'funny', 'serious', 'Funniness'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory', 'Hallucination'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic', 'Honesty'),
    ('sociality', 'sociality', 'social', 'antisocial', 'Sociality'),
    ('toxicity', 'toxicity', 'respectful', 'toxic', 'Toxicity'),
]

print("STATISTICAL TESTS: Predicted vs Actual Activations")
print("=" * 80)
print("\nNote: Tests are on paired differences (actual - predicted)")
print("Positive mean difference = Underestimation, Negative = Overestimation\n")

results = []

for trait_act, trait_norm, pos_pole, neg_pole, title in traits:
    print(f"\n{title.upper()}")
    print("-" * 80)
    
    # Combine polar data (bipolar representation)
    prediction_polar = pd.concat([
        df[f'{trait_norm}_{pos_pole}_norm_polar'],
        df[f'{trait_norm}_{neg_pole}_norm_polar']
    ])
    
    activation_polar = pd.concat([
        df[f'{trait_act}_{pos_pole}_polar'],
        df[f'{trait_act}_{neg_pole}_polar']
    ])
    
    # Calculate differences (actual - predicted)
    differences = activation_polar - prediction_polar
    
    # Descriptive statistics
    mean_diff = differences.mean()
    std_diff = differences.std()
    mean_pred = prediction_polar.mean()
    mean_act = activation_polar.mean()
    
    # Test for normality of differences (Shapiro-Wilk)
    shapiro_stat, shapiro_p = stats.shapiro(differences)
    is_normal = shapiro_p > 0.05
    
    # Paired t-test
    t_stat, t_pval = stats.ttest_rel(activation_polar, prediction_polar)
    
    # Wilcoxon signed-rank test (non-parametric)
    wilcoxon_stat, wilcoxon_pval = stats.wilcoxon(activation_polar, prediction_polar)
    
    # Effect size (Cohen's d for paired samples)
    cohens_d = mean_diff / std_diff
    
    print(f"  Mean Predicted: {mean_pred:.4f}")
    print(f"  Mean Actual:    {mean_act:.4f}")
    print(f"  Mean Difference: {mean_diff:.4f} (SD = {std_diff:.4f})")
    print(f"\n  Normality Test (Shapiro-Wilk):")
    print(f"    W = {shapiro_stat:.4f}, p = {shapiro_p:.4f} {'✓ Normal' if is_normal else '✗ Non-normal'}")
    print(f"\n  Paired t-test:")
    print(f"    t = {t_stat:.4f}, p = {t_pval:.4f} {'***' if t_pval < 0.001 else '**' if t_pval < 0.01 else '*' if t_pval < 0.05 else 'n.s.'}")
    print(f"\n  Wilcoxon signed-rank test (non-parametric):")
    print(f"    W = {wilcoxon_stat:.4f}, p = {wilcoxon_pval:.4f} {'***' if wilcoxon_pval < 0.001 else '**' if wilcoxon_pval < 0.01 else '*' if wilcoxon_pval < 0.05 else 'n.s.'}")
    print(f"\n  Effect Size (Cohen's d): {cohens_d:.4f} ({'small' if abs(cohens_d) < 0.5 else 'medium' if abs(cohens_d) < 0.8 else 'large'})")
    
    results.append({
        'Trait': title,
        'Mean_Predicted': mean_pred,
        'Mean_Actual': mean_act,
        'Mean_Diff': mean_diff,
        'Cohens_d': cohens_d,
        't_stat': t_stat,
        't_pval': t_pval,
        'wilcoxon_pval': wilcoxon_pval,
        'normal': is_normal
    })

# Summary table
print("\n\n" + "=" * 80)
print("SUMMARY TABLE")
print("=" * 80)
results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

# Bonferroni correction for multiple comparisons
alpha = 0.05
n_tests = len(traits)
bonferroni_alpha = alpha / n_tests
print(f"\n\nBonferroni-corrected significance level: α = {bonferroni_alpha:.4f}")
print(f"Significant after correction (t-test):")
for _, row in results_df.iterrows():
    if row['t_pval'] < bonferroni_alpha:
        print(f"  {row['Trait']}: p = {row['t_pval']:.4f} ***")

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits with poles
trait_poles = [
    ('empathy', 'empathy', 'empathetic', 'Empathetic'),
    ('empathy', 'empathy', 'unempathetic', 'Unempathetic'),
    ('encouraging', 'encouraging', 'encouraging', 'Encouraging'),
    ('encouraging', 'encouraging', 'discouraging', 'Discouraging'),
    ('formality', 'formality', 'casual', 'Casual'),
    ('formality', 'formality', 'formal', 'Formal'),
    ('funniness', 'funniness', 'funny', 'Funny'),
    ('funniness', 'funniness', 'serious', 'Serious'),
    ('hallucination', 'hallucination', 'factual', 'Factual'),
    ('hallucination', 'hallucination', 'hallucinatory', 'Hallucinatory'),
    ('sycophancy', 'honesty', 'honest', 'Honest'),
    ('sycophancy', 'honesty', 'sycophantic', 'Sycophantic'),
    ('sociality', 'sociality', 'social', 'Social'),
    ('sociality', 'sociality', 'antisocial', 'Antisocial'),
    ('toxicity', 'toxicity', 'respectful', 'Respectful'),
    ('toxicity', 'toxicity', 'toxic', 'Toxic'),
]

# Collect all predicted and actual values (absolute values)
predicted = []
actual = []

for trait_act, trait_norm, pole, label in trait_poles:
    pred_col = f'{trait_norm}_{pole}_norm_polar'
    act_col = f'{trait_act}_{pole}_polar'
    
    predicted.extend(df[pred_col].abs().tolist())
    actual.extend(df[act_col].abs().tolist())

# Create dataframe for JASP
jasp_data = pd.DataFrame({
    'predicted': predicted,
    'actual': actual,
    'difference': np.array(predicted) - np.array(actual)
})

# Save
jasp_data.to_csv('model_data/predicted_vs_actual_jasp.csv', index=False)

print(f"Total observations: {len(jasp_data)}")
print(f"\nDescriptive statistics:")
print(jasp_data.describe())
print(f"\nMean predicted: {jasp_data['predicted'].mean():.4f}")
print(f"Mean actual:    {jasp_data['actual'].mean():.4f}")
print(f"Mean difference: {jasp_data['difference'].mean():.4f}")
print(f"\nSaved to: model_data/predicted_vs_actual_jasp.csv")

jasp_data.head(20)

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits with poles
trait_poles = [
    ('empathy', 'empathy', 'empathetic', 'Empathetic'),
    ('empathy', 'empathy', 'unempathetic', 'Unempathetic'),
    ('encouraging', 'encouraging', 'encouraging', 'Encouraging'),
    ('encouraging', 'encouraging', 'discouraging', 'Discouraging'),
    ('formality', 'formality', 'casual', 'Casual'),
    ('formality', 'formality', 'formal', 'Formal'),
    ('funniness', 'funniness', 'funny', 'Funny'),
    ('funniness', 'funniness', 'serious', 'Serious'),
    ('hallucination', 'hallucination', 'factual', 'Factual'),
    ('hallucination', 'hallucination', 'hallucinatory', 'Hallucinatory'),
    ('sycophancy', 'honesty', 'honest', 'Honest'),
    ('sycophancy', 'honesty', 'sycophantic', 'Sycophantic'),
    ('sociality', 'sociality', 'social', 'Social'),
    ('sociality', 'sociality', 'antisocial', 'Antisocial'),
    ('toxicity', 'toxicity', 'respectful', 'Respectful'),
    ('toxicity', 'toxicity', 'toxic', 'Toxic'),
]

# Create long-format data: one row per participant per trait pole
rows = []

for trait_act, trait_norm, pole, label in trait_poles:
    pred_col = f'{trait_norm}_{pole}_norm_polar'
    act_col = f'{trait_act}_{pole}_polar'
    
    for idx in range(len(df)):
        rows.append({
            'trait_pole': label,
            'predicted': abs(df.loc[idx, pred_col]),
            'actual': abs(df.loc[idx, act_col]),
            'difference': abs(df.loc[idx, pred_col]) - abs(df.loc[idx, act_col])
        })

jasp_data = pd.DataFrame(rows)

# Save
jasp_data.to_csv('model_data/predicted_vs_actual_by_trait.csv', index=False)

print(f"Total observations: {len(jasp_data)} ({len(df)} participants × 16 trait poles)")
print(f"\nOverall:")
print(f"  Mean predicted: {jasp_data['predicted'].mean():.4f}")
print(f"  Mean actual:    {jasp_data['actual'].mean():.4f}")
print(f"  Mean difference: {jasp_data['difference'].mean():.4f}")

print(f"\nBy trait pole:")
summary = jasp_data.groupby('trait_pole')[['predicted', 'actual', 'difference']].mean()
summary = summary.sort_values('difference')
print(summary)

print(f"\nSaved to: model_data/predicted_vs_actual_by_trait.csv")

jasp_data.head(20)

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits with poles
trait_poles = [
    ('empathy', 'empathy', 'empathetic'),
    ('empathy', 'empathy', 'unempathetic'),
    ('encouraging', 'encouraging', 'encouraging'),
    ('encouraging', 'encouraging', 'discouraging'),
    ('formality', 'formality', 'casual'),
    ('formality', 'formality', 'formal'),
    ('funniness', 'funniness', 'funny'),
    ('funniness', 'funniness', 'serious'),
    ('hallucination', 'hallucination', 'factual'),
    ('hallucination', 'hallucination', 'hallucinatory'),
    ('sycophancy', 'honesty', 'honest'),
    ('sycophancy', 'honesty', 'sycophantic'),
    ('sociality', 'sociality', 'social'),
    ('sociality', 'sociality', 'antisocial'),
    ('toxicity', 'toxicity', 'respectful'),
    ('toxicity', 'toxicity', 'toxic'),
]

# Create wide-format data
jasp_data = pd.DataFrame()

for trait_act, trait_norm, pole in trait_poles:
    pred_col = f'{trait_norm}_{pole}_norm_polar'
    act_col = f'{trait_act}_{pole}_polar'
    
    # Add columns with absolute values
    jasp_data[f'predicted_{pole}'] = df[pred_col].abs()
    jasp_data[f'actual_{pole}'] = df[act_col].abs()

# Save
jasp_data.to_csv('model_data/predicted_vs_actual_wide.csv', index=False)

print(f"Shape: {jasp_data.shape} ({jasp_data.shape[0]} participants × {jasp_data.shape[1]} columns)")
print(f"\nColumns: {list(jasp_data.columns)}")
print(f"\nSaved to: model_data/predicted_vs_actual_wide.csv")

jasp_data.head()

## Systematic Over/Underestimation Plot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Set nice font
plt.rcParams['font.family'] = 'Helvetica'
# plt.rcParams['font.serif'] = ['Georgia', 'Times New Roman']
plt.rcParams['font.size'] = 14

# Results from JASP (standard Cohen's d with 95% CIs)
results = {
    'trait': ['Empathetic', 'Unempathetic', 'Encouraging', 'Discouraging',
              'Casual', 'Formal', 'Funny', 'Serious',
              'Factual', 'Hallucinatory', 'Honest', 'Sycophantic',
              'Social', 'Antisocial', 'Respectful'],
    't': [6.642, 0.095, 10.648, -1.512, 9.509, -3.147, 9.290, -16.286,
          8.796, 1.569, 9.477, -3.789, 0.240, 2.093, -2.615],
    'p': [0.0001, 0.924, 0.0001, 0.135, 0.0001, 0.002, 0.0001, 0.0001,
          0.0001, 0.121, 0.0001, 0.0001, 0.811, 0.040, 0.011],
    'cohens_d': [0.743, 0.011, 1.191, -0.169, 1.063, -0.352, 1.039, -1.821,
                 0.983, 0.175, 1.060, -0.424, 0.027, 0.234, -0.292],
    'ci_lower': [0.493, -0.208, 0.901, -0.389, 0.786, -0.577, 0.764, -2.177,
                 0.714, -0.046, 0.783, -0.651, -0.192, 0.011, -0.515],
    'ci_upper': [0.988, 0.230, 1.475, 0.052, 1.336, -0.125, 1.309, -1.460,
                 1.249, 0.396, 1.332, -0.194, 0.246, 0.455, -0.068]
}

df_results = pd.DataFrame(results)

df_results = df_results.sort_values('cohens_d')

# Use the actual JASP confidence intervals (AFTER sorting)
# Calculate asymmetric error bars for plotting
df_results['ci_error_lower'] = df_results['cohens_d'] - df_results['ci_lower']
df_results['ci_error_upper'] = df_results['ci_upper'] - df_results['cohens_d']

# For matplotlib, we need the error values as arrays [lower_errors, upper_errors]
ci_errors = [df_results['ci_error_lower'].values, df_results['ci_error_upper'].values]

# Create figure - single plot, more compact
fig, ax = plt.subplots(figsize=(8, 6))

# Gray color scheme
color = '#808080'

# Create bars with JASP 95% CI error bars (asymmetric)
bars = ax.barh(df_results['trait'], df_results['cohens_d'],
               color=color, alpha=0.85, edgecolor='#2d2d2d', linewidth=0.8,
               xerr=ci_errors, capsize=3,
               error_kw={'linewidth': 1.5, 'ecolor': '#2d2d2d'})

# Add significance markers
for i, (idx, row) in enumerate(df_results.iterrows()):
    if row['p'] < 0.001:
        marker = '***'
    elif row['p'] < 0.01:
        marker = '**'
    elif row['p'] < 0.05:
        marker = '*'
    else:
        marker = ''
    
    if marker:
        # Use the actual CI bounds for positioning significance markers
        x_pos = row['ci_upper'] + 0.08
        if row['cohens_d'] < 0:
            x_pos = row['ci_lower'] - 0.08
        ax.text(x_pos, i, marker, va='center',
                ha='left' if row['cohens_d'] > 0 else 'right',
                fontsize=9, color='#2d2d2d', fontweight='bold')

# Reference lines
ax.axvline(x=0, color='#2d2d2d', linewidth=1.2, linestyle='-', zorder=0)

# Subtle styling
ax.set_xlabel("Cohen's d (with 95% CI)", fontsize=16, color='#2d2d2d', fontweight='600')
ax.set_ylabel("")
# ax.set_title("Systematic Over/Under-Estimation of Trait Activations",
#              fontsize=12, fontweight='600', color='#2d2d2d', pad=12)

# Clean up spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_linewidth(0.8)
ax.spines['left'].set_color('#2d2d2d')
ax.spines['bottom'].set_linewidth(0.8)
ax.spines['bottom'].set_color('#2d2d2d')

# Grid
ax.grid(axis='x', alpha=0.15, linestyle='-', linewidth=0.5, color='#2d2d2d')
ax.set_axisbelow(True)

# Tighter layout
plt.tight_layout()
plt.savefig('figures/overprediction_barplot_95ci.png', dpi=300, bbox_inches='tight',
            facecolor='white', edgecolor='none')
plt.show()

print("Saved to: figures/overprediction_barplot_95ci.png")

# Print JASP confidence intervals for verification
print("\nJASP 95% Confidence Intervals:")
print("Trait\t\t\tCohen's d\tCI Lower\tCI Upper")
print("-" * 50)
for _, row in df_results.iterrows():
    print(f"{row['trait']:<15}\t{row['cohens_d']:.3f}\t\t{row['ci_lower']:.3f}\t\t{row['ci_upper']:.3f}")

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import linregress, pearsonr

# Load the polar data
df = pd.read_csv('model_data/persona_prediction.csv')

# Define all traits
traits = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathy'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging'),
    ('formality', 'formality', 'casual', 'formal', 'Formality'),
    ('funniness', 'funniness', 'funny', 'serious', 'Funniness'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory', 'Hallucination'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic', 'Honesty'),
    ('sociality', 'sociality', 'social', 'antisocial', 'Sociality'),
    ('toxicity', 'toxicity', 'respectful', 'toxic', 'Toxicity'),
]

# Calculate correlations for each trait
correlation_results = []

for trait_act, trait_norm, pos_pole, neg_pole, title in traits:
    # Combine polar data (bipolar representation)
    prediction_polar = pd.concat([
        df[f'{trait_norm}_{pos_pole}_norm_polar'],
        df[f'{trait_norm}_{neg_pole}_norm_polar']
    ])
    
    activation_polar = pd.concat([
        df[f'{trait_act}_{pos_pole}_polar'],
        df[f'{trait_act}_{neg_pole}_polar']
    ])
    
    # Calculate correlation
    r, p = pearsonr(prediction_polar, activation_polar)
    
    # Calculate regression
    slope, intercept, r_reg, p_reg, stderr = linregress(prediction_polar, activation_polar)
    
    # Calculate directional accuracy (% correct predictions)
    sign_mismatch = (prediction_polar * activation_polar) < 0
    directional_accuracy = (1 - sign_mismatch.sum() / len(sign_mismatch)) * 100
    
    correlation_results.append({
        'Trait': title,
        'r': r,
        'r²': r**2,
        'p': p,
        'Significance': '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else 'n.s.',
        'Directional_Accuracy_%': directional_accuracy,
        'n': len(prediction_polar)
    })

# Create dataframe
corr_df = pd.DataFrame(correlation_results)

# Sort by correlation strength
corr_df = corr_df.sort_values('r', ascending=False)

# Save to CSV
corr_df.to_csv('model_data/prediction_correlations_summary.csv', index=False)

print("=" * 90)
print("PREDICTION ACCURACY: CORRELATIONS BETWEEN PREDICTED AND ACTUAL ACTIVATIONS")
print("=" * 90)
print()
print(corr_df.to_string(index=False))
print()
print("=" * 90)
print("SUMMARY STATISTICS")
print("=" * 90)
print(f"Mean correlation (r):        {corr_df['r'].mean():.3f}")
print(f"Median correlation (r):      {corr_df['r'].median():.3f}")
print(f"Range:                       {corr_df['r'].min():.3f} to {corr_df['r'].max():.3f}")
print(f"Mean R²:                     {corr_df['r²'].mean():.3f} ({corr_df['r²'].mean()*100:.1f}% variance explained)")
print(f"Traits with r > 0.5:         {(corr_df['r'] > 0.5).sum()}/8")
print(f"Significant correlations:    {(corr_df['p'] < 0.05).sum()}/8")
print(f"Mean directional accuracy:   {corr_df['Directional_Accuracy_%'].mean():.1f}%")
print()

# Textual summary for reporting
print("=" * 90)
print("TEXTUAL SUMMARY FOR REPORTING")
print("=" * 90)
print()
print("Prediction-activation correlations ranged from r = {:.3f} (p {}) for {} to".format(
    corr_df.iloc[-1]['r'], 
    '<.001' if corr_df.iloc[-1]['p'] < 0.001 else f"= {corr_df.iloc[-1]['p']:.3f}",
    corr_df.iloc[-1]['Trait']
))
print("r = {:.3f} (p {}) for {}. Overall, the mean correlation was r = {:.3f},".format(
    corr_df.iloc[0]['r'],
    '<.001' if corr_df.iloc[0]['p'] < 0.001 else f"= {corr_df.iloc[0]['p']:.3f}",
    corr_df.iloc[0]['Trait'],
    corr_df['r'].mean()
))
print("explaining an average of {:.1f}% of the variance in actual trait activations.".format(
    corr_df['r²'].mean() * 100
))
print()

# Strong vs weak predictors
strong = corr_df[corr_df['r'] > 0.5]
weak = corr_df[corr_df['r'] < 0.3]

if len(strong) > 0:
    print("Participants showed strong prediction accuracy for {} ({}).".format(
        ', '.join(strong['Trait'].tolist()[:-1]) + (' and ' + strong['Trait'].tolist()[-1] if len(strong) > 1 else strong['Trait'].tolist()[0]),
        ', '.join([f"r = {r:.3f}" for r in strong['r'].tolist()])
    ))
    print()

if len(weak) > 0:
    print("Weaker correlations were observed for {} ({}).".format(
        ', '.join(weak['Trait'].tolist()[:-1]) + (' and ' + weak['Trait'].tolist()[-1] if len(weak) > 1 else weak['Trait'].tolist()[0]),
        ', '.join([f"r = {r:.3f}" for r in weak['r'].tolist()])
    ))
    print()

print("=" * 90)

corr_df

In [None]:
import pandas as pd
from scipy.stats import pearsonr

# Load data
df = pd.read_csv('model_data/persona_prediction.csv')

# Define traits
traits = [
    ('empathy', 'empathy', 'empathetic', 'unempathetic', 'Empathy'),
    ('encouraging', 'encouraging', 'encouraging', 'discouraging', 'Encouraging'),
    ('formality', 'formality', 'casual', 'formal', 'Formality'),
    ('funniness', 'funniness', 'funny', 'serious', 'Funniness'),
    ('hallucination', 'hallucination', 'factual', 'hallucinatory', 'Hallucination'),
    ('sycophancy', 'honesty', 'honest', 'sycophantic', 'Honesty'),
    ('sociality', 'sociality', 'social', 'antisocial', 'Sociality'),
    ('toxicity', 'toxicity', 'respectful', 'toxic', 'Toxicity'),
]

# Calculate correlations
results = []

for trait_act, trait_norm, pos_pole, neg_pole, title in traits:
    # Combine both poles
    predicted = pd.concat([
        df[f'{trait_norm}_{pos_pole}_norm_polar'],
        df[f'{trait_norm}_{neg_pole}_norm_polar']
    ])
    
    actual = pd.concat([
        df[f'{trait_act}_{pos_pole}_polar'],
        df[f'{trait_act}_{neg_pole}_polar']
    ])
    
    # Correlation
    r, p = pearsonr(predicted, actual)
    
    results.append({
        'Trait': title,
        'r': f"{r:.3f}",
        'p': f"{p:.3f}" if p >= 0.001 else "<.001",
        'sig': '***' if p < 0.001 else '**' if p < 0.01 else '*' if p < 0.05 else ''
    })

# Create table
corr_table = pd.DataFrame(results)

print("Correlations: Predicted vs Actual Activations")
print("=" * 50)
print(corr_table.to_string(index=False))

corr_table

## Suvey Results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Set font to DejaVu Sans (similar to Inter and widely available)
plt.rcParams['font.family'] = 'Helvetica'

# Load data
df = pd.read_csv('data_clean/data_participants.csv')

# Define the rating questions
post_questions = {
    'post_visualization_helpful': 'Visualization helped understand AI behavior',
    'post_see_visualization_again': 'Would like to see visualization again',
}

between_groups = {
    'post_trust': 'Trust in the AI after interaction',
    'post_arrived_desired_character': 'Arrived at desired character',
}

def calculate_distribution(df_subset, column):
    """Calculate percentage distribution for ratings 1-7"""
    total = df_subset[column].notna().sum()
    if total == 0:
        return [0] * 7
    
    distribution = []
    for rating in range(1, 8):
        count = (df_subset[column] == rating).sum()
        percentage = (count / total) * 100
        distribution.append(percentage)
    return distribution

# Split by condition
experimental_df = df[df['condition_name'] == 'experimental']
control_df = df[df['condition_name'] == 'control']

# Color scheme (red to green gradient)
colors = ['#d73027', '#fc8d59', '#fee090', '#d9d9d9', '#91cf60', '#66bd63', '#1a9850']

# Create figure with adjusted spacing
fig, axes = plt.subplots(2, 1, figsize=(16, 7), gridspec_kw={'height_ratios': [1, 1], 'hspace': 0.2})

# Panel 1: Between groups (split by condition) - MINIMAL LABELS
def create_grouped_stacked_bars(ax, questions_dict):
    questions_list = list(questions_dict.keys())
    labels = list(questions_dict.values())
    
    experimental_data = [calculate_distribution(experimental_df, q) for q in questions_list]
    control_data = [calculate_distribution(control_df, q) for q in questions_list]
    
    bar_height = 0.35
    pair_spacing = 1.0
    n_questions = len(labels)
    y_positions = np.arange(n_questions) * pair_spacing
    
    # Plot control group (top of each pair)
    left_ctrl = np.zeros(n_questions)
    for i, color in enumerate(colors):
        values = [d[i] for d in control_data]
        bars = ax.barh(y_positions - bar_height/2 - 0.03, values, bar_height, left=left_ctrl, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%) - ALL WHITE
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left_ctrl[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left_ctrl += values
    
    # Plot experimental group (bottom of each pair)
    left_exp = np.zeros(n_questions)
    for i, color in enumerate(colors):
        values = [d[i] for d in experimental_data]
        bars = ax.barh(y_positions + bar_height/2 + 0.03, values, bar_height, left=left_exp, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%) - ALL WHITE
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left_exp[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left_exp += values
    
    # Minimal formatting - no labels
    ax.set_xlim(0, 100)
    ax.set_ylim(-0.5, y_positions[-1] + 0.5)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.invert_yaxis()

# Panel 2: Post questions (no condition split) - MINIMAL LABELS
def create_single_stacked_bars(ax, questions_dict):
    questions_list = list(questions_dict.keys())
    labels = list(questions_dict.values())
    
    data = [calculate_distribution(df, q) for q in questions_list]
    
    bar_width = 0.65
    y_positions = np.arange(len(labels))
    
    left = np.zeros(len(labels))
    for i, color in enumerate(colors):
        values = [d[i] for d in data]
        bars = ax.barh(y_positions, values, bar_width, left=left, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%) - ALL WHITE
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left += values
    
    # Minimal formatting - no labels
    ax.set_xlim(0, 100)
    ax.set_ylim(-0.5, len(labels) - 0.5)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.invert_yaxis()

# Create the two panels
create_grouped_stacked_bars(axes[0], between_groups)
create_single_stacked_bars(axes[1], post_questions)

# Remove all spacing and padding for clean export, but keep panel separation
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0.4)

plt.savefig('figures/subjective_ratings_stacked_bars.png', dpi=300, bbox_inches='tight', 
           pad_inches=0, facecolor='white')
plt.show()

print("Chart saved to: figures/subjective_ratings_stacked_bars.png")
print(f"All participants n={len(df)}")
print(f"Experimental group n={len(experimental_df)}, Control group n={len(control_df)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Rectangle

# Set font to Helvetica
plt.rcParams['font.family'] = 'Helvetica'

# Load data
df = pd.read_csv('data_clean/data_participants.csv')

# Define the rating questions
post_questions = {
    'post_visualization_helpful': 'Visualization helped understand AI behavior',
    'post_see_visualization_again': 'Would like to see visualization again',
}

between_groups = {
    'post_trust': 'Trust in the AI after interaction',
    'post_arrived_desired_character': 'Arrived at desired character',
}

def calculate_distribution(df_subset, column):
    """Calculate percentage distribution for ratings 1-7"""
    total = df_subset[column].notna().sum()
    if total == 0:
        return [0] * 7
    
    distribution = []
    for rating in range(1, 8):
        count = (df_subset[column] == rating).sum()
        percentage = (count / total) * 100
        distribution.append(percentage)
    return distribution

# Split by condition
experimental_df = df[df['condition_name'] == 'experimental']
control_df = df[df['condition_name'] == 'control']

# Color scheme (red to green gradient)
colors = ['#d73027', '#fc8d59', '#fee090', '#d9d9d9', '#91cf60', '#66bd63', '#1a9850']

# Create figure with adjusted spacing for legend
fig, axes = plt.subplots(2, 1, figsize=(16, 8), gridspec_kw={'height_ratios': [1, 1], 'hspace': 0.2})

# Panel 1: Between groups (split by condition)
def create_grouped_stacked_bars(ax, questions_dict):
    questions_list = list(questions_dict.keys())
    labels = list(questions_dict.values())
    
    experimental_data = [calculate_distribution(experimental_df, q) for q in questions_list]
    control_data = [calculate_distribution(control_df, q) for q in questions_list]
    
    bar_height = 0.35
    pair_spacing = 1.0
    n_questions = len(labels)
    y_positions = np.arange(n_questions) * pair_spacing
    
    # Plot control group (top of each pair)
    left_ctrl = np.zeros(n_questions)
    for i, color in enumerate(colors):
        values = [d[i] for d in control_data]
        bars = ax.barh(y_positions - bar_height/2 - 0.03, values, bar_height, left=left_ctrl, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%)
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left_ctrl[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left_ctrl += values
    
    # Plot experimental group (bottom of each pair)
    left_exp = np.zeros(n_questions)
    for i, color in enumerate(colors):
        values = [d[i] for d in experimental_data]
        bars = ax.barh(y_positions + bar_height/2 + 0.03, values, bar_height, left=left_exp, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%)
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left_exp[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left_exp += values
    
    # Minimal formatting
    ax.set_xlim(0, 100)
    ax.set_ylim(-0.5, y_positions[-1] + 0.5)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.invert_yaxis()

# Panel 2: Post questions (no condition split)
def create_single_stacked_bars(ax, questions_dict):
    questions_list = list(questions_dict.keys())
    labels = list(questions_dict.values())
    
    data = [calculate_distribution(df, q) for q in questions_list]
    
    bar_width = 0.65
    y_positions = np.arange(len(labels))
    
    left = np.zeros(len(labels))
    for i, color in enumerate(colors):
        values = [d[i] for d in data]
        bars = ax.barh(y_positions, values, bar_width, left=left, 
                       color=color, edgecolor='white', linewidth=0.5)
        
        # Add percentage labels (only if >= 5%)
        for j, (bar, value) in enumerate(zip(bars, values)):
            if value >= 5:
                x_pos = left[j] + value / 2
                ax.text(x_pos, bar.get_y() + bar.get_height() / 2, 
                       f'{int(value)}', 
                       ha='center', va='center', fontsize=18, fontweight='bold',
                       color='white')
        
        left += values
    
    # Minimal formatting
    ax.set_xlim(0, 100)
    ax.set_ylim(-0.5, len(labels) - 0.5)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.invert_yaxis()

# Create the two panels
create_grouped_stacked_bars(axes[0], between_groups)
create_single_stacked_bars(axes[1], post_questions)

# Add legend at the bottom
legend_labels = ['1 (Not at all)', '2', '3', '4 (Neutral)', '5', '6', '7 (Extremely)']
legend_handles = [Rectangle((0, 0), 1, 1, fc=color) for color in colors]

# Create legend below the plot
fig.legend(legend_handles, legend_labels, loc='lower center', ncol=7, 
          frameon=False, fontsize=18, bbox_to_anchor=(0.5, -0.02))

# Adjust layout
plt.subplots_adjust(left=0, bottom=0.05, right=1, top=1, wspace=0, hspace=0.4)

plt.savefig('figures/subjective_ratings_stacked_bars.png', dpi=300, bbox_inches='tight', 
           pad_inches=0.1, facecolor='white')
plt.show()

print("Chart saved to: figures/subjective_ratings_stacked_bars.png")
print(f"All participants n={len(df)}")
print(f"Experimental group n={len(experimental_df)}, Control group n={len(control_df)}")

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# Set font to Helvetica
plt.rcParams['font.family'] = 'Helvetica'

# Color scheme (red to green gradient)
colors = ['#d73027', '#fc8d59', '#fee090', '#d9d9d9', '#91cf60', '#66bd63', '#1a9850']

# Legend labels
legend_labels = ['1 (Not at all)', '2', '3', '4 (Neutral)', '5', '6', '7 (Extremely)']

# Create legend handles
legend_handles = [Rectangle((0, 0), 1, 1, fc=color) for color in colors]

# Create a figure with minimal size
fig, ax = plt.subplots(figsize=(14, 0.8))

# Hide the axes
ax.axis('off')

# Create the legend
legend = ax.legend(legend_handles, legend_labels, loc='center', ncol=7, 
                   frameon=False, fontsize=12, handlelength=2, handleheight=1.5)

# Adjust layout to minimize whitespace
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)

# Save the legend
plt.savefig('figures/rating_scale_legend.png', dpi=300, bbox_inches='tight', 
            pad_inches=0.1, facecolor='white')
plt.show()

print("Legend saved to: figures/rating_scale_legend.png")

In [None]:
import pandas as pd
import plotly.graph_objects as go

# Create the data
data = {
    'Measure': ['delta_neg_unintended', 'delta_trust', 'delta_unintended', 'pre_trust', 
         'pre_predict_unintended_behaviors', 'post_predict_unintended_behaviors',
         'num_user_messages', 'post_arrived_desired_character', 'post_trust',
         'pre_predict_negative_behaviors', 'post_predict_negative_behaviors'],
    't': [0.611, -0.782, -1.422, -1.314, 0.855, -0.639, 1.128, -1.011, -2.065, 0.292, 0.777],
    # 'df': [78] * 11,
    'p': ['.543', '.437', '.159', '.193', '.395', '.525', '.263', '.315', '.042', '.771', '.440']
}

df = pd.DataFrame(data)

# Create alternating row colors
row_colors = ['white' if i % 2 == 0 else '#E8E8E8' for i in range(len(df))]

# Create the table
fig = go.Figure(data=[go.Table(
    columnwidth=[300, 100, 80, 100],  # Reduced first column width
    header=dict(
        values=['<b>' + col + '</b>' for col in df.columns],
        fill_color='white',
        align=['center', 'center', 'center', 'center'],
        font=dict(family='Helvetica', size=18, color='black'),
        line=dict(color='black', width=0),
        height=30
    ),
    cells=dict(
        values=[df[col] for col in df.columns],
        fill_color=[['white', '#E8E8E8'] * 6],  # Alternating colors
        align=['left', 'center', 'center', 'center'],
        font=dict(family='Helvetica', size=18, color='black'),
        line=dict(color='white', width=0),
        height=28
    )
)])

# Update layout
fig.update_layout(
    width=650,
    height=400,
    margin=dict(l=5, r=5, t=5, b=50),
    font=dict(family='Helvetica')
)

# Add note as annotation
fig.add_annotation(
    text="<i>Note. Student's t-test.</i>",
    xref="paper", yref="paper",
    x=0, y=-0.1,
    showarrow=False,
    font=dict(size=11, family='Helvetica'),
    xanchor='left',
    align='left'
)

# Save the figure
fig.write_image('figures/ttest_table.png', scale=3)
fig.show()

print("Table saved to: figures/ttest_table.png")