# Pictopercept Image Pair Sampling

Creating balanced image pairs from Chicago Face Database for bias measurement.

In [1]:
# load necessary libraries and data

from skimpy import skim
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean
from itertools import combinations, product
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
np.random.seed(42)

# Define columns to keep - focused on earnings-relevant covariates
columns_to_keep = [
    'Model', 'EthnicitySelf', 'GenderSelf', 'AgeRated', 'FemaleProb', 'MaleProb', 
    'AsianProb', 'BlackProb', 'LatinoProb', 'MultiProb', 'OtherProb', 'WhiteProb', 
    'Attractive', 'Dominant', 'Trustworthy', 'LuminanceMedian'
]

df = pd.read_excel('../CFD 3.0 Norming Data and Codebook.xlsx', 
                   sheet_name='CFD U.S. Norming Data', 
                   header=7,
                   skiprows=[8],
                   usecols=columns_to_keep)

df

Unnamed: 0,Model,EthnicitySelf,GenderSelf,AgeRated,FemaleProb,MaleProb,AsianProb,BlackProb,LatinoProb,MultiProb,OtherProb,WhiteProb,Attractive,Dominant,Trustworthy,LuminanceMedian
0,AF-200,A,F,32.571429,1.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,4.111111,1.928571,3.925926,174.0
1,AF-201,A,F,23.666667,1.000000,0.000000,0.962963,0.000000,0.000000,0.037037,0.000000,0.000000,3.111111,2.111111,3.538462,172.0
2,AF-202,A,F,24.448276,0.827586,0.172414,0.310345,0.068966,0.137931,0.448276,0.034483,0.000000,3.000000,2.862069,3.379310,153.5
3,AF-203,A,F,22.758621,1.000000,0.000000,0.758621,0.000000,0.068966,0.172414,0.000000,0.000000,3.275862,1.750000,3.793103,175.5
4,AF-204,A,F,30.137931,1.000000,0.000000,0.827586,0.000000,0.068966,0.103448,0.000000,0.000000,3.172414,1.758621,3.310345,168.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
592,WM-254,W,M,30.307692,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,3.153846,3.230769,3.230769,174.0
593,WM-255,W,M,34.071429,0.035714,0.964286,0.000000,0.000000,0.111111,0.185185,0.111111,0.592593,2.259259,3.214286,2.892857,162.5
594,WM-256,W,M,30.961538,0.000000,1.000000,0.000000,0.000000,0.115385,0.076923,0.038462,0.769231,2.269231,3.230769,3.000000,166.5
595,WM-257,W,M,30.037037,0.000000,1.000000,0.000000,0.000000,0.111111,0.037037,0.000000,0.851852,3.740741,2.185185,3.923077,172.0


In [None]:
skim(df)

## Goal: 68 trials for Pictopercept study

- 64 target pairs testing gender/ethnicity bias in earnings perception
- 4 attention check pairs
- Focus on balancing age, attractiveness, dominance, trustworthiness

In [2]:
print(f"Original dataset shape: {df.shape}")
print(f"Unique ethnicities: {df['EthnicitySelf'].unique()}")
print(f"Unique genders: {df['GenderSelf'].unique()}")
print(f"\nDataset overview:")
print(df.groupby(['EthnicitySelf', 'GenderSelf']).size())

Original dataset shape: (597, 16)
Unique ethnicities: ['A' 'B' 'L' 'W']
Unique genders: ['F' 'M']

Dataset overview:
EthnicitySelf  GenderSelf
A              F              57
               M              52
B              F             104
               M              93
L              F              56
               M              52
W              F              90
               M              93
dtype: int64


## Data cleaning: keep faces where self-reports match rater perceptions

In [3]:
# Define earnings-relevant balancing covariates
balance_vars = ['AgeRated', 'Attractive', 'Trustworthy', 'Dominant']

# Check for missing data
print("Missing data check:")
missing_check = df[['Model', 'EthnicitySelf', 'GenderSelf'] + balance_vars].isnull().sum()
print(missing_check[missing_check > 0])

# Remove rows with missing values
df_clean = df.dropna(subset=['Model', 'EthnicitySelf', 'GenderSelf'] + balance_vars).copy()
print(f"\nAfter removing missing data: {df_clean.shape[0]} images")

Missing data check:
Series([], dtype: int64)

After removing missing data: 597 images


In [4]:
# Filter faces where category probabilities align with self-report
def filter_by_category_alignment(df, prob_threshold=0.60):
    # Gender alignment
    gender_aligned = (
        ((df['GenderSelf'] == 'F') & (df['FemaleProb'] > prob_threshold)) |
        ((df['GenderSelf'] == 'M') & (df['MaleProb'] > prob_threshold))
    )
    
    # Ethnicity alignment
    ethnicity_probs = df[['AsianProb', 'BlackProb', 'LatinoProb', 'WhiteProb', 'MultiProb', 'OtherProb']]
    ethnicity_argmax = ethnicity_probs.idxmax(axis=1)
    
    ethnicity_map = {'A': 'AsianProb', 'B': 'BlackProb', 'L': 'LatinoProb', 'W': 'WhiteProb'}
    expected_col = df['EthnicitySelf'].map(ethnicity_map)
    
    ethnicity_vals = []
    for idx, col in expected_col.items():
        if pd.notna(col):
            ethnicity_vals.append(df.loc[idx, col])
        else:
            ethnicity_vals.append(0)
    ethnicity_vals = pd.Series(ethnicity_vals, index=df.index)
    
    ethnicity_aligned = (
        (ethnicity_vals > prob_threshold) &
        (ethnicity_argmax == expected_col)
    )
    
    return df[gender_aligned & ethnicity_aligned].copy()

df_aligned = filter_by_category_alignment(df_clean, prob_threshold=0.60)
print(f"After category alignment filtering: {df_aligned.shape[0]} images")
print(f"Final sample by group:")
print(df_aligned.groupby(['EthnicitySelf', 'GenderSelf']).size())

After category alignment filtering: 466 images
Final sample by group:
EthnicitySelf  GenderSelf
A              F             46
               M             39
B              F             98
               M             80
L              F             22
               M             15
W              F             81
               M             85
dtype: int64


## Normalize earnings-relevant covariates and remove outliers

In [5]:
# Normalize balancing covariates to Z-scores
df_normalized = df_aligned.copy()

for var in balance_vars:
    df_normalized[f'{var}_z'] = (df_aligned[var] - df_aligned[var].mean()) / df_aligned[var].std()

balance_vars_z = [f'{var}_z' for var in balance_vars]

# Remove extreme outliers (|Z| > 3)
outlier_mask = np.abs(df_normalized[balance_vars_z]).max(axis=1) > 3
print(f"Found {outlier_mask.sum()} extreme outliers")

df_final = df_normalized[~outlier_mask].copy()
print(f"Final dataset: {df_final.shape[0]} images")
print(f"Final sample by group:")
print(df_final.groupby(['EthnicitySelf', 'GenderSelf']).size())

Found 6 extreme outliers
Final dataset: 460 images
Final sample by group:
EthnicitySelf  GenderSelf
A              F             46
               M             38
B              F             96
               M             78
L              F             22
               M             15
W              F             80
               M             85
dtype: int64


## Generate demographic contrast pairs

Three types: gender-only, ethnicity-only, intersectional contrasts

In [6]:
def calculate_covariate_distance(row1, row2, balance_vars_z):
    """Calculate Euclidean distance between two faces on balancing covariates."""
    vec1 = row1[balance_vars_z].values
    vec2 = row2[balance_vars_z].values
    return euclidean(vec1, vec2)

def generate_pairs_for_contrast(df, group1_filter, group2_filter, contrast_name, 
                               balance_vars_z, distance_threshold=1.5, max_pairs_per_face=2):
    """Generate balanced pairs for a demographic contrast."""
    group1 = df[group1_filter].copy()
    group2 = df[group2_filter].copy()
    
    if len(group1) == 0 or len(group2) == 0:
        print(f"Warning: {contrast_name} has empty groups")
        return []
    
    pairs = []
    face_usage = {}
    
    for _, face1 in group1.iterrows():
        for _, face2 in group2.iterrows():
            
            if (face_usage.get(face1['Model'], 0) >= max_pairs_per_face or 
                face_usage.get(face2['Model'], 0) >= max_pairs_per_face):
                continue
            
            distance = calculate_covariate_distance(face1, face2, balance_vars_z)
            
            if distance <= distance_threshold:
                pair_info = {
                    'contrast_type': contrast_name,
                    'left_model': face1['Model'],
                    'right_model': face2['Model'],
                    'left_ethnicity': face1['EthnicitySelf'],
                    'left_gender': face1['GenderSelf'],
                    'right_ethnicity': face2['EthnicitySelf'],
                    'right_gender': face2['GenderSelf'],
                    'covariate_distance': distance,
                    'pair_id': f"{contrast_name}_{face1['Model']}_{face2['Model']}"
                }
                
                # Add covariate values
                for var in balance_vars_z:
                    pair_info[f'left_{var}'] = face1[var]
                    pair_info[f'right_{var}'] = face2[var]
                
                pairs.append(pair_info)
                face_usage[face1['Model']] = face_usage.get(face1['Model'], 0) + 1
                face_usage[face2['Model']] = face_usage.get(face2['Model'], 0) + 1
    
    print(f"{contrast_name}: {len(pairs)} pairs")
    return pairs

## Generate all contrast pairs with reasonable distance threshold

In [7]:
# Generate all target contrasts
all_pairs = []

# 1. Gender-only contrasts (within ethnicity)
print("=== GENDER-ONLY CONTRASTS ===")
for ethnicity in ['A', 'B', 'L', 'W']:
    female_filter = (df_final['EthnicitySelf'] == ethnicity) & (df_final['GenderSelf'] == 'F')
    male_filter = (df_final['EthnicitySelf'] == ethnicity) & (df_final['GenderSelf'] == 'M')
    
    pairs = generate_pairs_for_contrast(
        df_final, female_filter, male_filter, 
        f'GenderOnly_{ethnicity}', balance_vars_z
    )
    all_pairs.extend(pairs)

# 2. Ethnicity-only contrasts (within gender)
print("\n=== ETHNICITY-ONLY CONTRASTS ===")
ethnicity_combinations = [('A', 'B'), ('A', 'L'), ('A', 'W'), ('B', 'L'), ('B', 'W'), ('L', 'W')]

for gender in ['F', 'M']:
    for eth1, eth2 in ethnicity_combinations:
        filter1 = (df_final['EthnicitySelf'] == eth1) & (df_final['GenderSelf'] == gender)
        filter2 = (df_final['EthnicitySelf'] == eth2) & (df_final['GenderSelf'] == gender)
        
        pairs = generate_pairs_for_contrast(
            df_final, filter1, filter2, 
            f'EthnicityOnly_{gender}_{eth1}vs{eth2}', balance_vars_z
        )
        all_pairs.extend(pairs)

# 3. Intersectional contrasts
print("\n=== INTERSECTIONAL CONTRASTS ===")
intersectional_contrasts = [
    (('F', 'W'), ('M', 'B')),  # White Female vs Black Male
    (('F', 'B'), ('M', 'W')),  # Black Female vs White Male
    (('F', 'A'), ('M', 'W')),  # Asian Female vs White Male
    (('F', 'W'), ('M', 'A')),  # White Female vs Asian Male
    (('F', 'L'), ('M', 'W')),  # Latino Female vs White Male
    (('F', 'W'), ('M', 'L')),  # White Female vs Latino Male
]

for (g1, e1), (g2, e2) in intersectional_contrasts:
    filter1 = (df_final['GenderSelf'] == g1) & (df_final['EthnicitySelf'] == e1)
    filter2 = (df_final['GenderSelf'] == g2) & (df_final['EthnicitySelf'] == e2)
    
    pairs = generate_pairs_for_contrast(
        df_final, filter1, filter2, 
        f'Intersectional_{g1}{e1}vs{g2}{e2}', balance_vars_z
    )
    all_pairs.extend(pairs)

print(f"\nTotal pairs generated: {len(all_pairs)}")

=== GENDER-ONLY CONTRASTS ===
GenderOnly_A: 48 pairs
GenderOnly_B: 124 pairs
GenderOnly_L: 21 pairs
GenderOnly_B: 124 pairs
GenderOnly_L: 21 pairs
GenderOnly_W: 119 pairs

=== ETHNICITY-ONLY CONTRASTS ===
GenderOnly_W: 119 pairs

=== ETHNICITY-ONLY CONTRASTS ===
EthnicityOnly_F_AvsB: 86 pairs
EthnicityOnly_F_AvsL: 39 pairs
EthnicityOnly_F_AvsB: 86 pairs
EthnicityOnly_F_AvsL: 39 pairs
EthnicityOnly_F_AvsW: 91 pairs
EthnicityOnly_F_BvsL: 44 pairs
EthnicityOnly_F_AvsW: 91 pairs
EthnicityOnly_F_BvsL: 44 pairs
EthnicityOnly_F_BvsW: 140 pairs
EthnicityOnly_F_LvsW: 43 pairs
EthnicityOnly_F_BvsW: 140 pairs
EthnicityOnly_F_LvsW: 43 pairs
EthnicityOnly_M_AvsB: 66 pairs
EthnicityOnly_M_AvsL: 27 pairs
EthnicityOnly_M_AvsB: 66 pairs
EthnicityOnly_M_AvsL: 27 pairs
EthnicityOnly_M_AvsW: 70 pairs
EthnicityOnly_M_BvsL: 29 pairs
EthnicityOnly_M_AvsW: 70 pairs
EthnicityOnly_M_BvsL: 29 pairs
EthnicityOnly_M_BvsW: 118 pairs
EthnicityOnly_M_LvsW: 30 pairs

=== INTERSECTIONAL CONTRASTS ===
EthnicityOnly_M_Bv

## Sample 64 target pairs + 4 attention checks

In [8]:
# Convert pairs to DataFrame
pairs_df = pd.DataFrame(all_pairs)

def sample_balanced_pairs(pairs_df, target_total=64, random_seed=42):
    """Sample balanced set of 64 target pairs ensuring no identity repeats."""
    np.random.seed(random_seed)
    
    selected_pairs = []
    used_models = set()
    
    # 1. Gender-only pairs (aim for 16)
    gender_pairs = pairs_df[pairs_df['contrast_type'].str.contains('GenderOnly')].copy()
    gender_selected = []
    
    for ethnicity in ['A', 'B', 'L', 'W']:
        eth_pairs = gender_pairs[gender_pairs['contrast_type'] == f'GenderOnly_{ethnicity}']
        if len(eth_pairs) > 0:
            n_sample = min(4, len(eth_pairs))
            sampled = eth_pairs.sample(n=n_sample, random_state=random_seed+ord(ethnicity))
            gender_selected.extend(sampled.to_dict('records'))
            
            for _, row in sampled.iterrows():
                used_models.add(row['left_model'])
                used_models.add(row['right_model'])
    
    selected_pairs.extend(gender_selected[:16])
    print(f"Selected {len(gender_selected[:16])} gender-only pairs")
    
    # 2. Ethnicity-only pairs (aim for 24)
    ethnicity_pairs = pairs_df[pairs_df['contrast_type'].str.contains('EthnicityOnly')].copy()
    ethnicity_pairs = ethnicity_pairs[
        ~(ethnicity_pairs['left_model'].isin(used_models) | 
          ethnicity_pairs['right_model'].isin(used_models))
    ]
    
    if len(ethnicity_pairs) > 0:
        n_sample = min(24, len(ethnicity_pairs))
        sampled = ethnicity_pairs.sample(n=n_sample, random_state=random_seed+1)
        selected_pairs.extend(sampled.to_dict('records'))
        
        for _, row in sampled.iterrows():
            used_models.add(row['left_model'])
            used_models.add(row['right_model'])
        print(f"Selected {n_sample} ethnicity-only pairs")
    
    # 3. Intersectional pairs (fill remaining slots)
    intersectional_pairs = pairs_df[pairs_df['contrast_type'].str.contains('Intersectional')].copy()
    intersectional_pairs = intersectional_pairs[
        ~(intersectional_pairs['left_model'].isin(used_models) | 
          intersectional_pairs['right_model'].isin(used_models))
    ]
    
    if len(intersectional_pairs) > 0:
        remaining_needed = target_total - len(selected_pairs)
        n_sample = min(remaining_needed, len(intersectional_pairs))
        sampled = intersectional_pairs.sample(n=n_sample, random_state=random_seed+2)
        selected_pairs.extend(sampled.to_dict('records'))
        print(f"Selected {n_sample} intersectional pairs")
    
    return pd.DataFrame(selected_pairs)

# Sample 64 target pairs
target_pairs = sample_balanced_pairs(pairs_df, target_total=64)
print(f"\nFinal target pairs: {len(target_pairs)}")
print(target_pairs['contrast_type'].value_counts())

Selected 16 gender-only pairs
Selected 24 ethnicity-only pairs
Selected 24 intersectional pairs

Final target pairs: 64
contrast_type
Intersectional_FWvsMB    9
EthnicityOnly_F_BvsW     7
Intersectional_FBvsMW    6
EthnicityOnly_M_BvsW     5
GenderOnly_A             4
GenderOnly_B             4
GenderOnly_L             4
GenderOnly_W             4
EthnicityOnly_F_AvsW     4
Intersectional_FAvsMW    4
EthnicityOnly_F_AvsB     3
Intersectional_FLvsMW    3
EthnicityOnly_F_LvsW     2
EthnicityOnly_M_AvsW     2
Intersectional_FWvsMA    2
EthnicityOnly_M_LvsW     1
Name: count, dtype: int64


In [9]:
# Create 4 attention check trials
def create_attention_checks(pairs_df, n_checks=4):
    """Create attention check trials with clear instructions."""
    attention_pairs = pairs_df.sample(n_checks, random_state=123)
    attention_checks = []
    
    for i, (_, pair) in enumerate(attention_pairs.iterrows()):
        check = {
            'trial_type': 'attention_check',
            'trial_id': f'attention_{i+1}',
            'contrast_type': 'attention_check',
            'left_model': pair['left_model'],
            'right_model': pair['right_model'],
            'left_ethnicity': pair['left_ethnicity'],
            'left_gender': pair['left_gender'],
            'right_ethnicity': pair['right_ethnicity'],
            'right_gender': pair['right_gender'],
            'instruction': f'Select the face on the {"left" if i % 2 == 0 else "right"}',
            'correct_choice': 'left' if i % 2 == 0 else 'right'
        }
        attention_checks.append(check)
    
    return attention_checks

attention_trials = create_attention_checks(pairs_df, n_checks=4)
print(f"Created {len(attention_trials)} attention check trials")

Created 4 attention check trials


## Create final trial structure with side randomization

In [10]:
def create_final_trial_dataframe(target_pairs, attention_trials, random_seed=42):
    """Create final trial dataframe with side randomization."""
    np.random.seed(random_seed)
    
    all_trials = []
    
    # Add target trials
    for _, row in target_pairs.iterrows():
        trial = {
            'trial_id': f"target_{len(all_trials)+1:02d}",
            'trial_type': 'target',
            'contrast_type': row['contrast_type'],
            'left_model': row['left_model'],
            'right_model': row['right_model'],
            'left_ethnicity': row['left_ethnicity'],
            'left_gender': row['left_gender'],
            'right_ethnicity': row['right_ethnicity'],
            'right_gender': row['right_gender'],
            'covariate_distance': row['covariate_distance'],
            'is_attention_check': False
        }
        
        # Add original covariate values
        for var in balance_vars:
            trial[f'left_{var}'] = df_final[df_final['Model'] == row['left_model']][var].iloc[0]
            trial[f'right_{var}'] = df_final[df_final['Model'] == row['right_model']][var].iloc[0]
        
        all_trials.append(trial)
    
    # Add attention checks
    for trial in attention_trials:
        trial_dict = {
            'trial_id': trial['trial_id'],
            'trial_type': 'attention_check',
            'contrast_type': 'attention_check',
            'left_model': trial['left_model'],
            'right_model': trial['right_model'],
            'left_ethnicity': trial['left_ethnicity'],
            'left_gender': trial['left_gender'],
            'right_ethnicity': trial['right_ethnicity'],
            'right_gender': trial['right_gender'],
            'covariate_distance': np.nan,
            'is_attention_check': True,
            'correct_choice': trial['correct_choice']
        }
        
        # Add covariate values for attention checks too
        for var in balance_vars:
            trial_dict[f'left_{var}'] = df_final[df_final['Model'] == trial['left_model']][var].iloc[0]
            trial_dict[f'right_{var}'] = df_final[df_final['Model'] == trial['right_model']][var].iloc[0]
            
        all_trials.append(trial_dict)
    
    trials_df = pd.DataFrame(all_trials)
    
    # Add side randomization (50/50)
    trials_df['side_randomized'] = np.random.choice(['original', 'flipped'], 
                                                   size=len(trials_df), p=[0.5, 0.5])
    
    # Create display columns (after randomization)
    trials_df['display_left_model'] = trials_df['left_model'].copy()
    trials_df['display_right_model'] = trials_df['right_model'].copy()
    
    # Flip when randomized
    flipped_mask = trials_df['side_randomized'] == 'flipped'
    trials_df.loc[flipped_mask, 'display_left_model'] = trials_df.loc[flipped_mask, 'right_model']
    trials_df.loc[flipped_mask, 'display_right_model'] = trials_df.loc[flipped_mask, 'left_model']
    
    # Add presentation order
    trials_df['presentation_order'] = np.random.permutation(len(trials_df)) + 1
    trials_df = trials_df.sort_values('presentation_order').reset_index(drop=True)
    
    return trials_df

final_trials = create_final_trial_dataframe(target_pairs, attention_trials)
print(f"Final trial structure: {len(final_trials)} trials")
print(f"Target: {sum(final_trials['trial_type'] == 'target')}")
print(f"Attention: {sum(final_trials['is_attention_check'])}")

Final trial structure: 68 trials
Target: 64
Attention: 4


## Map to image files and create balance table

In [11]:
# Map model IDs to JPG filenames
import os
import glob

def create_model_to_filename_mapping(neutral_faces_path):
    jpg_files = glob.glob(os.path.join(neutral_faces_path, "*.jpg"))
    model_to_filename = {}
    
    for filepath in jpg_files:
        filename = os.path.basename(filepath)
        if filename.startswith('CFD-') and filename.endswith('-N.jpg'):
            parts = filename.split('-')
            if len(parts) >= 4:
                ethnicity_gender = parts[1]  # e.g., 'AF'
                number = parts[2]            # e.g., '200'
                model_id = f"{ethnicity_gender}-{number}"  # e.g., 'AF-200'
                model_to_filename[model_id] = filename
    
    return model_to_filename

model_to_filename = create_model_to_filename_mapping("../neutral_faces")
print(f"Mapped {len(model_to_filename)} models to filenames")

# Add filename columns
final_trials['left_image_file'] = final_trials['display_left_model'].map(model_to_filename)
final_trials['right_image_file'] = final_trials['display_right_model'].map(model_to_filename)

missing = final_trials['left_image_file'].isnull().sum() + final_trials['right_image_file'].isnull().sum()
print(f"Missing image files: {missing}")
if missing == 0:
    print("✅ All trials mapped to image files!")

Mapped 597 models to filenames
Missing image files: 0
✅ All trials mapped to image files!


In [12]:
# Create balance table showing left vs right image characteristics
balance_data = []

# Collect data for left images
left_models = final_trials['display_left_model'].tolist()
left_data = df_final[df_final['Model'].isin(left_models)]

# Collect data for right images  
right_models = final_trials['display_right_model'].tolist()
right_data = df_final[df_final['Model'].isin(right_models)]

# Calculate means for each covariate
balance_vars_all = ['AgeRated', 'Attractive', 'Dominant', 'Trustworthy']

print("=== BALANCE TABLE: Left vs Right Images ===")
print("Should see minimal differences if randomization worked well\n")

balance_table = pd.DataFrame({
    'Covariate': balance_vars_all + ['EthnicitySelf_A', 'EthnicitySelf_B', 'EthnicitySelf_L', 'EthnicitySelf_W', 'GenderSelf_F', 'GenderSelf_M'],
    'Left_Images_Mean': [
        left_data['AgeRated'].mean(),
        left_data['Attractive'].mean(), 
        left_data['Dominant'].mean(),
        left_data['Trustworthy'].mean(),
        (left_data['EthnicitySelf'] == 'A').mean(),
        (left_data['EthnicitySelf'] == 'B').mean(),
        (left_data['EthnicitySelf'] == 'L').mean(),
        (left_data['EthnicitySelf'] == 'W').mean(),
        (left_data['GenderSelf'] == 'F').mean(),
        (left_data['GenderSelf'] == 'M').mean()
    ],
    'Right_Images_Mean': [
        right_data['AgeRated'].mean(),
        right_data['Attractive'].mean(),
        right_data['Dominant'].mean(), 
        right_data['Trustworthy'].mean(),
        (right_data['EthnicitySelf'] == 'A').mean(),
        (right_data['EthnicitySelf'] == 'B').mean(),
        (right_data['EthnicitySelf'] == 'L').mean(),
        (right_data['EthnicitySelf'] == 'W').mean(),
        (right_data['GenderSelf'] == 'F').mean(),
        (right_data['GenderSelf'] == 'M').mean()
    ]
})

balance_table['Difference'] = balance_table['Right_Images_Mean'] - balance_table['Left_Images_Mean']
balance_table['Abs_Difference'] = abs(balance_table['Difference'])

print(balance_table.round(3))

print(f"\n=== BALANCE ASSESSMENT ===")
print(f"Mean absolute difference across all covariates: {balance_table['Abs_Difference'].mean():.3f}")
print(f"Max absolute difference: {balance_table['Abs_Difference'].max():.3f}")

# Check if differences are small (good balance)
large_diffs = balance_table[balance_table['Abs_Difference'] > 0.1]
if len(large_diffs) == 0:
    print("✅ Good balance achieved - all differences < 0.1")
else:
    print(f"⚠️  Large differences found in:")
    for _, row in large_diffs.iterrows():
        print(f"   {row['Covariate']}: {row['Difference']:.3f}")
        
print(f"\nNote: Differences should be small if side randomization worked well.")
print("Large differences suggest systematic bias in left/right assignment.")

=== BALANCE TABLE: Left vs Right Images ===
Should see minimal differences if randomization worked well

         Covariate  Left_Images_Mean  Right_Images_Mean  Difference  \
0         AgeRated            27.815             28.064       0.249   
1       Attractive             3.206              3.296       0.090   
2         Dominant             2.706              2.752       0.046   
3      Trustworthy             3.488              3.477      -0.012   
4  EthnicitySelf_A             0.164              0.231       0.067   
5  EthnicitySelf_B             0.373              0.215      -0.158   
6  EthnicitySelf_L             0.134              0.062      -0.073   
7  EthnicitySelf_W             0.328              0.492       0.164   
8     GenderSelf_F             0.582              0.538      -0.044   
9     GenderSelf_M             0.418              0.462       0.044   

   Abs_Difference  
0           0.249  
1           0.090  
2           0.046  
3           0.012  
4           0

## Presentation Order Logic Explained

**Overall Presentation Order (Single Survey):**
1. Trials are combined (64 targets + 4 attention checks)
2. `np.random.permutation()` with seed=42 creates completely random order
3. All 68 trials get shuffled together with no constraints

**Block-Level Presentation Order:**
1. Start with the original random presentation order from single survey
2. Sort target trials by this order (preserves the randomization)
3. Split into sequential chunks: trials 1-16 → Block 1, trials 17-32 → Block 2, etc.
4. Add 1 attention check per block
5. **Re-shuffle within each block** using `sample(frac=1, random_state=42+block_num)`

The blocked version adds a second layer of randomization within blocks while respecting the global randomization structure.

In [14]:
# Create 4 blocks of 17 trials each (16 target + 1 attention check)
# This preserves all randomization while organizing trials into manageable blocks

def create_blocked_structure(final_trials, n_blocks=4):
    """
    Split trials into blocks while preserving randomization.
    Each block gets 16 target trials + 1 attention check.
    """
    # Separate target and attention trials
    target_trials = final_trials[final_trials['trial_type'] == 'target'].copy()
    attention_trials = final_trials[final_trials['trial_type'] == 'attention_check'].copy()
    
    # Verify we have the right numbers
    print(f"Target trials: {len(target_trials)}")
    print(f"Attention trials: {len(attention_trials)}")
    
    # Sort target trials by current presentation order (preserves existing randomization)
    target_trials = target_trials.sort_values('presentation_order').reset_index(drop=True)
    
    # Create blocks
    blocked_trials = []
    targets_per_block = 16
    
    for block_num in range(n_blocks):
        block_trials = []
        
        # Add 16 target trials to this block
        start_idx = block_num * targets_per_block
        end_idx = start_idx + targets_per_block
        block_targets = target_trials.iloc[start_idx:end_idx].copy()
        
        # Add block information
        block_targets['block_number'] = block_num + 1
        block_targets['trial_within_block'] = range(1, len(block_targets) + 1)
        
        # Add one attention check to this block
        if block_num < len(attention_trials):
            attention_trial = attention_trials.iloc[block_num:block_num+1].copy()
            attention_trial['block_number'] = block_num + 1
            attention_trial['trial_within_block'] = targets_per_block + 1  # Last trial in block
            
            # Combine block trials
            block_all = pd.concat([block_targets, attention_trial], ignore_index=True)
        else:
            block_all = block_targets
        
        # Shuffle trials within block (preserves randomization principle)
        block_all = block_all.sample(frac=1, random_state=42+block_num).reset_index(drop=True)
        
        # Update within-block trial numbers after shuffle
        block_all['trial_within_block'] = range(1, len(block_all) + 1)
        
        blocked_trials.append(block_all)
        
        print(f"Block {block_num + 1}: {len(block_targets)} targets + {1 if block_num < len(attention_trials) else 0} attention = {len(block_all)} total")
    
    # Combine all blocks
    final_blocked = pd.concat(blocked_trials, ignore_index=True)
    
    # Add overall trial numbering
    final_blocked['overall_trial_number'] = range(1, len(final_blocked) + 1)
    
    return final_blocked

# Create blocked structure
blocked_trials = create_blocked_structure(final_trials)

print(f"\n=== BLOCKED STRUCTURE SUMMARY ===")
print(f"Total trials: {len(blocked_trials)}")
print(f"Blocks created: {blocked_trials['block_number'].nunique()}")

# Show trials per block
block_summary = blocked_trials.groupby('block_number').agg({
    'trial_type': 'count',
    'trial_type': lambda x: (x == 'target').sum(),  # Count targets
    'is_attention_check': 'sum'  # Count attention checks
}).rename(columns={'trial_type': 'target_trials', 'is_attention_check': 'attention_checks'})

# Recalculate properly
block_summary = blocked_trials.groupby('block_number').agg({
    'overall_trial_number': 'count',
    'is_attention_check': 'sum'
}).rename(columns={'overall_trial_number': 'total_trials', 'is_attention_check': 'attention_checks'})
block_summary['target_trials'] = block_summary['total_trials'] - block_summary['attention_checks']

print(f"\nTrials per block:")
print(block_summary)

Target trials: 64
Attention trials: 4
Block 1: 16 targets + 1 attention = 17 total
Block 2: 16 targets + 1 attention = 17 total
Block 3: 16 targets + 1 attention = 17 total
Block 4: 16 targets + 1 attention = 17 total

=== BLOCKED STRUCTURE SUMMARY ===
Total trials: 68
Blocks created: 4

Trials per block:
              total_trials  attention_checks  target_trials
block_number                                               
1                       17                 1             16
2                       17                 1             16
3                       17                 1             16
4                       17                 1             16


In [20]:
# Demonstrate the presentation order logic with examples

print("=== PRESENTATION ORDER ANALYSIS ===")

# Show how original order was created
print("1. ORIGINAL RANDOMIZATION (Single Survey):")
print("   - np.random.permutation(68) with seed=42")
print("   - Completely random shuffle of all 68 trials")
print(f"   - First 10 presentation orders: {final_trials['presentation_order'].head(10).tolist()}")

print("\n2. BLOCKED RANDOMIZATION:")
print("   - Takes original random order and splits sequentially")
print("   - Then re-shuffles within each block")

# Show how the blocking preserves then reshuffles
target_trials = final_trials[final_trials['trial_type'] == 'target'].sort_values('presentation_order')
print(f"\n   Original order for first 16 targets: {target_trials['presentation_order'].head(16).tolist()}")

# Show the actual block assignments
print(f"\n3. BLOCK ASSIGNMENTS:")
for block_num in range(1, 5):
    block_data = blocked_trials[blocked_trials['block_number'] == block_num]
    block_targets = block_data[block_data['trial_type'] == 'target']
    original_orders = []
    
    # Get original presentation orders for trials in this block
    for _, trial in block_targets.iterrows():
        original_trial = final_trials[
            (final_trials['left_model'] == trial['left_model']) & 
            (final_trials['right_model'] == trial['right_model'])
        ]
        if len(original_trial) > 0:
            original_orders.append(original_trial['presentation_order'].iloc[0])
    
    print(f"   Block {block_num}:")
    print(f"     Original orders: {sorted(original_orders)[:8]}... (showing first 8)")
    print(f"     Final within-block order: {block_targets['trial_within_block'].tolist()[:8]}... (showing first 8)")

print(f"\n4. RANDOMIZATION SUMMARY:")
print("   ✓ Global randomization: np.random.permutation(68) with seed=42")
print("   ✓ Block assignment: Sequential chunks from global order") 
print("   ✓ Within-block shuffle: sample(frac=1) with block-specific seeds")
print("   ✓ Result: Two-level randomization preserving overall balance")

=== PRESENTATION ORDER ANALYSIS ===
1. ORIGINAL RANDOMIZATION (Single Survey):
   - np.random.permutation(68) with seed=42
   - Completely random shuffle of all 68 trials
   - First 10 presentation orders: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

2. BLOCKED RANDOMIZATION:
   - Takes original random order and splits sequentially
   - Then re-shuffles within each block

   Original order for first 16 targets: [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

3. BLOCK ASSIGNMENTS:
   Block 1:
     Original orders: [np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(10), np.int64(11)]... (showing first 8)
     Final within-block order: [1, 2, 3, 4, 5, 6, 7, 8]... (showing first 8)
   Block 2:
     Original orders: [np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27)]... (showing first 8)
     Final within-block order: [1, 2, 3, 5, 6, 7, 8, 9]... (showing first 8)
   Block 3:
     Original order

In [16]:
# Display sample of blocked trials
print("\n=== SAMPLE BLOCKED TRIALS ===")
sample_cols = ['block_number', 'trial_within_block', 'overall_trial_number', 
               'trial_type', 'contrast_type', 'left_image_file', 'right_image_file']

# Show first few trials from each block
for block_num in range(1, 5):
    print(f"\n--- Block {block_num} (first 5 trials) ---")
    block_data = blocked_trials[blocked_trials['block_number'] == block_num]
    print(block_data[sample_cols].head(5).to_string(index=False))

print(f"\n=== EXPORT OPTIONS ===")
print("For blocked survey: blocked_trials.to_csv('pictopercept_blocked_trials.csv', index=False)")
print("Original format: final_trials.to_csv('pictopercept_68_trials.csv', index=False)")
print("\nBlocked structure preserves all randomization while reducing survey fatigue ✅")


=== SAMPLE BLOCKED TRIALS ===

--- Block 1 (first 5 trials) ---
 block_number  trial_within_block  overall_trial_number trial_type         contrast_type      left_image_file     right_image_file
            1                   1                     1     target          GenderOnly_B CFD-BF-008-001-N.jpg CFD-BM-001-014-N.jpg
            1                   2                     2     target  EthnicityOnly_F_BvsW CFD-BF-010-001-N.jpg CFD-WF-013-003-N.jpg
            1                   3                     3     target Intersectional_FWvsMB CFD-WF-215-145-N.jpg CFD-BM-253-004-N.jpg
            1                   4                     4     target  EthnicityOnly_M_AvsW CFD-WM-033-025-N.jpg CFD-AM-237-154-N.jpg
            1                   5                     5     target  EthnicityOnly_F_LvsW CFD-LF-226-174-N.jpg CFD-WF-213-031-N.jpg

--- Block 2 (first 5 trials) ---
 block_number  trial_within_block  overall_trial_number      trial_type         contrast_type      left_image_file 

In [17]:
# Verify balance is preserved across blocks
print("=== BALANCE CHECK ACROSS BLOCKS ===")
print("Ensuring randomization balance is maintained within each block\n")

for block_num in range(1, 5):
    block_data = blocked_trials[blocked_trials['block_number'] == block_num]
    target_block = block_data[block_data['trial_type'] == 'target']
    
    print(f"Block {block_num}:")
    print(f"  Target trials: {len(target_block)}")
    print(f"  Attention checks: {block_data['is_attention_check'].sum()}")
    
    # Check demographic distribution
    left_ethnicity = target_block['left_ethnicity'].value_counts()
    right_ethnicity = target_block['right_ethnicity'].value_counts()
    
    print(f"  Left ethnicity: {dict(left_ethnicity)}")
    print(f"  Right ethnicity: {dict(right_ethnicity)}")
    
    # Check contrast types
    contrast_dist = target_block['contrast_type'].value_counts()
    print(f"  Top contrasts: {dict(contrast_dist.head(3))}")
    print()

print("✅ Blocking preserves original randomization and demographic balance")

=== BALANCE CHECK ACROSS BLOCKS ===
Ensuring randomization balance is maintained within each block

Block 1:
  Target trials: 16
  Attention checks: 1
  Left ethnicity: {'B': np.int64(6), 'A': np.int64(5), 'L': np.int64(3), 'W': np.int64(2)}
  Right ethnicity: {'W': np.int64(10), 'B': np.int64(4), 'L': np.int64(2)}
  Top contrasts: {'EthnicityOnly_F_BvsW': np.int64(2), 'GenderOnly_L': np.int64(2), 'Intersectional_FBvsMW': np.int64(2)}

Block 2:
  Target trials: 16
  Attention checks: 1
  Left ethnicity: {'W': np.int64(5), 'A': np.int64(5), 'B': np.int64(4), 'L': np.int64(2)}
  Right ethnicity: {'W': np.int64(7), 'B': np.int64(6), 'A': np.int64(3)}
  Top contrasts: {'Intersectional_FWvsMB': np.int64(4), 'GenderOnly_A': np.int64(3), 'EthnicityOnly_M_BvsW': np.int64(2)}

Block 3:
  Target trials: 16
  Attention checks: 1
  Left ethnicity: {'B': np.int64(6), 'A': np.int64(4), 'L': np.int64(4), 'W': np.int64(2)}
  Right ethnicity: {'W': np.int64(10), 'L': np.int64(2), 'B': np.int64(2), 'A':

In [18]:
# Display final trial structure options
print("=== FINAL TRIAL STRUCTURE OPTIONS ===")
print(f"Total trials: {len(final_trials)}")
print(f"Target trials: {sum(final_trials['trial_type'] == 'target')}")
print(f"Attention checks: {sum(final_trials['is_attention_check'])}")

print(f"\n=== OPTION 1: SINGLE 68-TRIAL SURVEY ===")
target_only = final_trials[final_trials['trial_type'] == 'target']
contrast_counts = target_only['contrast_type'].value_counts()
print(f"All 68 trials in one session (≈ 8-10 minutes)")

print(f"\n=== OPTION 2: BLOCKED SURVEY (RECOMMENDED) ===")
print(f"4 blocks of 17 trials each (≈ 2-3 minutes per block)")
print(f"Reduces survey fatigue while preserving randomization")

for block_num in range(1, 5):
    block_data = blocked_trials[blocked_trials['block_number'] == block_num]
    target_count = len(block_data[block_data['trial_type'] == 'target'])
    attention_count = block_data['is_attention_check'].sum()
    print(f"  Block {block_num}: {target_count} targets + {attention_count} attention = {len(block_data)} total")

print(f"\n=== EXPORT INSTRUCTIONS ===")
print("Single survey: final_trials.to_csv('pictopercept_68_trials.csv', index=False)")
print("Blocked survey: blocked_trials.to_csv('pictopercept_blocked_trials.csv', index=False)")
print("\n✅ Both formats preserve all randomization and balance properties")

=== FINAL TRIAL STRUCTURE OPTIONS ===
Total trials: 68
Target trials: 64
Attention checks: 4

=== OPTION 1: SINGLE 68-TRIAL SURVEY ===
All 68 trials in one session (≈ 8-10 minutes)

=== OPTION 2: BLOCKED SURVEY (RECOMMENDED) ===
4 blocks of 17 trials each (≈ 2-3 minutes per block)
Reduces survey fatigue while preserving randomization
  Block 1: 16 targets + 1 attention = 17 total
  Block 2: 16 targets + 1 attention = 17 total
  Block 3: 16 targets + 1 attention = 17 total
  Block 4: 16 targets + 1 attention = 17 total

=== EXPORT INSTRUCTIONS ===
Single survey: final_trials.to_csv('pictopercept_68_trials.csv', index=False)
Blocked survey: blocked_trials.to_csv('pictopercept_blocked_trials.csv', index=False)

✅ Both formats preserve all randomization and balance properties


In [21]:
final_trials

Unnamed: 0,trial_id,trial_type,contrast_type,left_model,right_model,left_ethnicity,left_gender,right_ethnicity,right_gender,covariate_distance,is_attention_check,left_AgeRated,right_AgeRated,left_Attractive,right_Attractive,left_Trustworthy,right_Trustworthy,left_Dominant,right_Dominant,correct_choice,side_randomized,display_left_model,display_right_model,presentation_order,left_image_file,right_image_file
0,attention_2,attention_check,attention_check,BF-003,BM-013,B,F,B,M,,True,25.808989,22.977528,3.191011,2.707865,3.797753,3.550562,2.185185,2.920000,right,flipped,BM-013,BF-003,1,CFD-BM-013-002-N.jpg,CFD-BF-003-003-N.jpg
1,target_07,target,GenderOnly_B,BF-008,BM-001,B,F,B,M,1.426188,False,24.574713,26.085106,3.068966,2.849462,3.321839,3.063830,2.400000,3.230769,,original,BF-008,BM-001,2,CFD-BF-008-001-N.jpg,CFD-BM-001-014-N.jpg
2,target_33,target,EthnicityOnly_F_BvsW,BF-010,WF-013,B,F,W,F,1.227515,False,29.200000,26.423913,2.722222,3.565217,3.900000,4.010870,2.178571,2.222222,,original,BF-010,WF-013,3,CFD-BF-010-001-N.jpg,CFD-WF-013-003-N.jpg
3,target_18,target,EthnicityOnly_M_BvsW,BM-242,WM-254,B,M,W,M,1.021112,False,31.520000,30.307692,3.307692,3.153846,3.307692,3.230769,3.884615,3.230769,,flipped,WM-254,BM-242,4,CFD-WM-254-152-N.jpg,CFD-BM-242-233-N.jpg
4,target_55,target,Intersectional_FAvsMW,AF-215,WM-013,A,F,W,M,0.521593,False,30.444444,32.379310,2.814815,2.755814,3.370370,3.209302,2.555556,2.640000,,flipped,WM-013,AF-215,5,CFD-WM-013-001-N.jpg,CFD-AF-215-70-N.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,target_43,target,Intersectional_FBvsMW,BF-039,WM-213,B,F,W,M,1.476641,False,24.876404,26.360000,3.322222,3.708333,3.719101,3.560000,2.111111,3.000000,,original,BF-039,WM-213,64,CFD-BF-039-031-N.jpg,CFD-WM-213-076-N.jpg
64,attention_4,attention_check,attention_check,AF-224,AM-221,A,F,A,M,,True,28.760000,24.115385,3.240000,3.230769,3.291667,2.846154,2.800000,3.043478,right,flipped,AM-221,AF-224,65,CFD-AM-221-184-N.jpg,CFD-AF-224-026-N.jpg
65,target_36,target,EthnicityOnly_F_AvsW,AF-235,WF-207,A,F,W,F,0.920524,False,23.500000,24.875000,4.192308,4.458333,4.269231,3.958333,2.153846,2.375000,,flipped,WF-207,AF-235,66,CFD-WF-207-014-N.jpg,CFD-AF-235-170-N.jpg
66,target_49,target,Intersectional_FAvsMW,AF-206,WM-011,A,F,W,M,1.476255,False,26.523810,30.988636,2.714286,3.170455,3.428571,3.056818,2.857143,2.384615,,flipped,WM-011,AF-206,67,CFD-WM-011-002-N.jpg,CFD-AF-206-079-N.jpg


In [22]:
blocked_trials

Unnamed: 0,trial_id,trial_type,contrast_type,left_model,right_model,left_ethnicity,left_gender,right_ethnicity,right_gender,covariate_distance,is_attention_check,left_AgeRated,right_AgeRated,left_Attractive,right_Attractive,left_Trustworthy,right_Trustworthy,left_Dominant,right_Dominant,correct_choice,side_randomized,display_left_model,display_right_model,presentation_order,left_image_file,right_image_file,block_number,trial_within_block,overall_trial_number
0,target_07,target,GenderOnly_B,BF-008,BM-001,B,F,B,M,1.426188,False,24.574713,26.085106,3.068966,2.849462,3.321839,3.063830,2.400000,3.230769,,original,BF-008,BM-001,2,CFD-BF-008-001-N.jpg,CFD-BM-001-014-N.jpg,1,1,1
1,target_33,target,EthnicityOnly_F_BvsW,BF-010,WF-013,B,F,W,F,1.227515,False,29.200000,26.423913,2.722222,3.565217,3.900000,4.010870,2.178571,2.222222,,original,BF-010,WF-013,3,CFD-BF-010-001-N.jpg,CFD-WF-013-003-N.jpg,1,2,2
2,target_50,target,Intersectional_FWvsMB,WF-215,BM-253,W,F,B,M,1.114797,False,30.640000,31.875000,3.280000,2.958333,2.960000,3.000000,2.360000,3.043478,,original,WF-215,BM-253,7,CFD-WF-215-145-N.jpg,CFD-BM-253-004-N.jpg,1,3,3
3,target_21,target,EthnicityOnly_M_AvsW,AM-237,WM-033,A,M,W,M,1.011496,False,29.440000,26.593407,3.269231,3.846154,3.423077,3.582418,2.615385,2.814815,,flipped,WM-033,AM-237,19,CFD-WM-033-025-N.jpg,CFD-AM-237-154-N.jpg,1,4,4
4,target_25,target,EthnicityOnly_F_LvsW,LF-226,WF-213,L,F,W,F,0.635771,False,25.384615,25.655172,3.500000,3.241379,2.807692,3.000000,3.040000,2.862069,,original,LF-226,WF-213,15,CFD-LF-226-174-N.jpg,CFD-WF-213-031-N.jpg,1,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,target_37,target,EthnicityOnly_M_BvsW,BM-216,WM-208,B,M,W,M,1.227523,False,29.538462,26.137931,2.148148,2.655172,3.851852,3.586207,2.576923,2.178571,,original,BM-216,WM-208,52,CFD-BM-216-088-N.jpg,CFD-WM-208-068-N.jpg,4,13,64
64,target_43,target,Intersectional_FBvsMW,BF-039,WM-213,B,F,W,M,1.476641,False,24.876404,26.360000,3.322222,3.708333,3.719101,3.560000,2.111111,3.000000,,original,BF-039,WM-213,64,CFD-BF-039-031-N.jpg,CFD-WM-213-076-N.jpg,4,14,65
65,target_31,target,EthnicityOnly_F_BvsW,BF-237,WF-244,B,F,W,F,1.450639,False,33.250000,26.636364,4.370370,3.818182,3.964286,3.909091,2.892857,2.454545,,flipped,WF-244,BF-237,55,CFD-WF-244-163-N.jpg,CFD-BF-237-172-N.jpg,4,15,66
66,target_49,target,Intersectional_FAvsMW,AF-206,WM-011,A,F,W,M,1.476255,False,26.523810,30.988636,2.714286,3.170455,3.428571,3.056818,2.857143,2.384615,,flipped,WM-011,AF-206,67,CFD-WM-011-002-N.jpg,CFD-AF-206-079-N.jpg,4,16,67


In [19]:
# Export both formats
final_trials.to_csv("../sampled_cfd_images.csv", encoding='utf-8', index=False)
blocked_trials.to_csv("../sampled_cfd_images_blocked.csv", encoding='utf-8', index=False)

print("✅ Exported both formats:")
print("  - sampled_cfd_images.csv (single 68-trial survey)")
print("  - sampled_cfd_images_blocked.csv (4 blocks of 17 trials)")

✅ Exported both formats:
  - sampled_cfd_images.csv (single 68-trial survey)
  - sampled_cfd_images_blocked.csv (4 blocks of 17 trials)
