# Study 2 Analysis

This notebook contains all analyses for Study 2 of the article, systematically going through each analysis and statistic reported.


## Imports and Configuration

Import necessary libraries and set up helper functions for the analyses.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import scipy.stats as stats
from scipy.stats import ttest_rel, ttest_ind, pearsonr
import pingouin as pg
import warnings


# Ignore warnings - for cleaner output
warnings.filterwarnings('ignore')

# Display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Helper function: Spearman-Brown correction for split-half reliability
def spearman_brown(r):
    """Apply Spearman-Brown correction to split-half reliability."""
    return (2 * r) / (1 + r)

# Helper function: Fisher z transformation for comparing correlations
def fisher_z(r):
    """Fisher z transformation."""
    return 0.5 * np.log((1 + r) / (1 - r))

def compare_correlations(r1, r2, n1, n2):
    """
    Compare two independent correlations using Fisher z test.
    
    Parameters:
    r1, r2: correlation coefficients
    n1, n2: sample sizes
    
    Returns:
    z: z-statistic
    p: p-value (two-tailed)
    """
    z1 = fisher_z(r1)
    z2 = fisher_z(r2)
    se_diff = np.sqrt(1/(n1-3) + 1/(n2-3))
    z = (z1 - z2) / se_diff
    p = 2 * (1 - stats.norm.cdf(abs(z)))
    return z, p

# Helper function: Cohen's d for paired samples
def cohens_d_paired(x1, x2):
    """Calculate Cohen's d for paired samples."""
    diff = x1 - x2
    d = diff.mean() / diff.std(ddof=1)
    return d


## Data Loading

Load the combined participants file and filter to Study 2 only. Handle exclusions and verify sample sizes.


In [39]:
# Define paths
BASE_DIR = Path('..')
DATA_DIR = BASE_DIR / "data"

# Load the combined participants file
combined_file = DATA_DIR / "combined_participants_S1&S2.csv"
df_all = pd.read_csv(combined_file)

print(f"Total participants in combined file: {len(df_all)}")

# Filter to Study 2 only
# Study column might be 'S2' or '2', so we'll check both
df_s2 = df_all[df_all['study'].astype(str).str.upper().isin(['S2', '2'])].copy()

print(f"Study 2 participants (before exclusions): {len(df_s2)}")

# Handle exclusions
# The 'exclude' column indicates if participant should be excluded from entire study
if 'exclude' in df_s2.columns:
    # Check what values indicate exclusion (likely True/1 for excluded)
    print(f"\nExclusion column value counts:")
    print(df_s2['exclude'].value_counts())
    
    # Filter out excluded participants
    # Assuming True, 1, or 'True' means excluded
    df_s2_included = df_s2[~df_s2['exclude'].astype(str).str.upper().isin(['TRUE', '1', 'YES'])].copy()
    print(f"Study 2 participants (after exclusions): {len(df_s2_included)}")
else:
    print("Warning: 'exclude' column not found. Using all participants.")
    df_s2_included = df_s2.copy()

# Verify sample size matches article (need to check what N is expected for S2)
print(f"\nExpected N = ? (after exclusions - check article)")
print(f"Actual N = {len(df_s2_included)}")

# Convert key columns to appropriate types
df_s2_included['id'] = df_s2_included['id'].astype(str)
df_s2_included['study'] = df_s2_included['study'].astype(str)
df_s2_included['task'] = df_s2_included['task'].astype(str)
df_s2_included['group'] = pd.to_numeric(df_s2_included['group'], errors='coerce')

# Check group distribution
print(f"\nGroup distribution:")
print(df_s2_included['group'].value_counts().sort_index())
print("(0 = Control, 1 = Faking Low/Introversion, 2 = Faking High/Extraversion)")

# Check task distribution
print(f"\nTask distribution:")
print(df_s2_included['task'].value_counts())

# Store as main dataframe for analyses
df = df_s2_included.copy()
print(f"\nDataframe ready for analysis: {len(df)} participants")


Total participants in combined file: 898
Study 2 participants (before exclusions): 600

Exclusion column value counts:
exclude
0    528
1     72
Name: count, dtype: int64
Study 2 participants (after exclusions): 528

Expected N = ? (after exclusions - check article)
Actual N = 528

Group distribution:
group
0    181
1    169
2    178
Name: count, dtype: int64
(0 = Control, 1 = Faking Low/Introversion, 2 = Faking High/Extraversion)

Task distribution:
task
qIAT    267
IAT     261
Name: count, dtype: int64

Dataframe ready for analysis: 528 participants


## Load Task Data Files

Load all task data files from the tasks/S2 folder. These files contain detailed trial-level data and participant-level statistics (D-scores, split-half reliability data, etc.) that may be needed for analyses.


In [40]:
# Define tasks directory
TASKS_DIR = DATA_DIR / "tasks" / "S2"

# Task file mapping: (task_type, time) -> file_path
task_files = {
    ('qIAT', 1): TASKS_DIR / "qiat1.xlsx",
    ('qIAT', 2): TASKS_DIR / "qiat2.xlsx",
    ('IAT', 1): TASKS_DIR / "iat1.xlsx",
    ('IAT', 2): TASKS_DIR / "iat2.xlsx",
}

# Dictionary to store loaded task data
# Structure: tasks[task_type][time] = {'participants': df, 'trials': df (if available)}
tasks = {
    'qIAT': {},
    'IAT': {}
}

# Load each task file
for (task_type, time), file_path in task_files.items():
    if not file_path.exists():
        print(f"Warning: {file_path} not found, skipping...")
        continue
    
    print(f"Loading {file_path.name}...")
    
    # Load Participants sheet (contains participant-level statistics)
    try:
        participants_df = pd.read_excel(file_path, sheet_name='Participants')
        participants_df['id'] = participants_df['id'].astype(str)
        tasks[task_type][time] = {'participants': participants_df}
        print(f"  Loaded {len(participants_df)} participants")
        
        # Check what columns are available
        print(f"  Columns: {list(participants_df.columns)}")
        
        # Try to load Trials sheet if it exists (contains trial-level data)
        try:
            trials_df = pd.read_excel(file_path, sheet_name='Trials')
            tasks[task_type][time]['trials'] = trials_df
            print(f"  Loaded {len(trials_df)} trials")
        except:
            print(f"  No Trials sheet found (or error loading it)")
            
    except Exception as e:
        print(f"  Error loading {file_path}: {e}")

print(f"\nTask files loaded successfully!")
print(f"Available task data:")
for task_type in ['qIAT', 'IAT']:
    for time in [1, 2]:
        if time in tasks[task_type]:
            print(f"  {task_type} Time {time}: {len(tasks[task_type][time]['participants'])} participants")


Loading qiat1.xlsx...
  Loaded 304 participants
  Columns: ['id', 'nTrials', 'maxLatency', 'percFast', 'percError', 'exclude', 'condition', 'n_block_4', 'mean_block_4', 'variance_block_4', 'n_block_6', 'mean_block_6', 'variance_block_6', 'pooled_sd', 'dscore', 'dscore_even', 'dscore_odd', 'dscore_paired_even', 'dscore_paired_odd', 'dscore_random_half1', 'dscore_random_half2']
  Loaded 60800 trials
Loading qiat2.xlsx...
  Loaded 304 participants
  Columns: ['id', 'nTrials', 'maxLatency', 'percFast', 'percError', 'exclude', 'condition', 'n_block_4', 'mean_block_4', 'variance_block_4', 'n_block_6', 'mean_block_6', 'variance_block_6', 'pooled_sd', 'dscore', 'dscore_even', 'dscore_odd', 'dscore_paired_even', 'dscore_paired_odd', 'dscore_random_half1', 'dscore_random_half2']
  Loaded 60800 trials
Loading iat1.xlsx...
  Loaded 296 participants
  Columns: ['id', 'nTrials', 'maxLatency', 'percFast', 'percError', 'exclude', 'condition', 'n_block_4', 'mean_block_4', 'variance_block_4', 'n_block_6

## Sample Size and Demographics

Compute total sample size, exclusions, and final sample demographics for Study 2.


In [41]:
# 1. Total S2 N (before any exclusions)
total_s2_n = len(df_s2)
print(f"1. Total S2 N: {total_s2_n}")

# Verify total excluded from exclude column
exclude_col = pd.to_numeric(df_s2['exclude'], errors='coerce')
if df_s2['exclude'].dtype == 'object':
    exclude_col = df_s2['exclude'].astype(str).str.upper().isin(['TRUE', '1', 'YES']).astype(int)
total_excluded = exclude_col.sum()
print(f"   Total excluded (from exclude column): {total_excluded}")
print(f"   Should equal: {total_s2_n - len(df)} = {total_s2_n - len(df)}")

# 2. Exclusions based on implicit task performance
# Check if exclude_t1 and exclude_t2 columns exist (from task files)
# These indicate exclusion from T1 or T2 based on task performance
excluded_from_tasks = pd.Series([False] * len(df_s2), index=df_s2.index)

if 'exclude_t1' in df_s2.columns and 'exclude_t2' in df_s2.columns:
    # Convert to boolean
    exclude_t1_bool = pd.to_numeric(df_s2['exclude_t1'], errors='coerce')
    exclude_t2_bool = pd.to_numeric(df_s2['exclude_t2'], errors='coerce')
    
    # Handle string/boolean values
    if df_s2['exclude_t1'].dtype == 'object' or df_s2['exclude_t1'].dtype == 'bool':
        exclude_t1_bool = df_s2['exclude_t1'].astype(str).str.upper().isin(['TRUE', '1', 'YES']).astype(int)
    if df_s2['exclude_t2'].dtype == 'object' or df_s2['exclude_t2'].dtype == 'bool':
        exclude_t2_bool = df_s2['exclude_t2'].astype(str).str.upper().isin(['TRUE', '1', 'YES']).astype(int)
    
    # Excluded from implicit tasks if excluded from T1 OR T2
    excluded_from_tasks = ((exclude_t1_bool == 1) | (exclude_t2_bool == 1))
    x1_task_exclusions = excluded_from_tasks.sum()
    print(f"\n2. Excluded based on implicit task performance: {x1_task_exclusions}")
    print(f"   (exclude_t1: {exclude_t1_bool.sum()}, exclude_t2: {exclude_t2_bool.sum()})")
elif 't1_exclude' in df_s2.columns and 't2_exclude' in df_s2.columns:
    # Fallback to t1_exclude/t2_exclude if exclude_t1/exclude_t2 don't exist
    t1_excluded = pd.to_numeric(df_s2['t1_exclude'], errors='coerce')
    t2_excluded = pd.to_numeric(df_s2['t2_exclude'], errors='coerce')
    
    if df_s2['t1_exclude'].dtype == 'object':
        t1_excluded = df_s2['t1_exclude'].astype(str).str.upper().isin(['TRUE', '1', 'YES']).astype(int)
    if df_s2['t2_exclude'].dtype == 'object':
        t2_excluded = df_s2['t2_exclude'].astype(str).str.upper().isin(['TRUE', '1', 'YES']).astype(int)
    
    excluded_from_tasks = ((t1_excluded == 1) | (t2_excluded == 1))
    x1_task_exclusions = excluded_from_tasks.sum()
    print(f"\n2. Excluded based on implicit task performance: {x1_task_exclusions}")
else:
    print("\n2. Warning: exclude_t1/exclude_t2 or t1_exclude/t2_exclude columns not found")
    x1_task_exclusions = 0

# 3. Additional exclusions based on manipulation checks (only for manipulation groups, not control)
# Manipulation groups are group 1 (Faking Low) and group 2 (Faking High)
# Control group is group 0
failed_manipulation = pd.Series([False] * len(df_s2), index=df_s2.index)

if '1st check indicator' in df_s2.columns and '2nd check indicator' in df_s2.columns:
    # Check what indicates wrong answer (likely "*" or similar)
    manipulation_groups_mask = df_s2['group'].isin([1, 2])
    manipulation_groups = df_s2[manipulation_groups_mask].copy()
    
    # Check for wrong answers in manipulation checks (look for "*" symbol)
    wrong_1st = manipulation_groups['1st check indicator'].astype(str) == '*'
    wrong_2nd = manipulation_groups['2nd check indicator'].astype(str) == '*'
    
    # Excluded if failed either check
    failed_manipulation_subset = wrong_1st | wrong_2nd
    x2_manipulation_exclusions = failed_manipulation_subset.sum()
    
    # Map back to full dataframe
    failed_manipulation.loc[manipulation_groups.index] = failed_manipulation_subset.values
    
    print(f"3. Additional exclusions from manipulation groups (failed checks): {x2_manipulation_exclusions}")
    print(f"   (failed 1st check: {wrong_1st.sum()}, failed 2nd check: {wrong_2nd.sum()})")
    
    # Check overlap: how many excluded for BOTH task performance AND manipulation checks
    overlap = (excluded_from_tasks & failed_manipulation).sum()
    if overlap > 0:
        print(f"   Note: {overlap} participants excluded for BOTH task performance AND manipulation checks")
    
    # Verify: task exclusions + manipulation exclusions - overlap should match total (approximately)
    # But note: some might be excluded for other reasons
    calculated_total = x1_task_exclusions + x2_manipulation_exclusions - overlap
    print(f"   Calculated total (task + manipulation - overlap): {calculated_total}")
    print(f"   Actual total excluded: {total_excluded}")
    if calculated_total != total_excluded:
        other_exclusions = total_excluded - calculated_total
        print(f"   Difference (other exclusions?): {other_exclusions}")
else:
    print("\n3. Warning: manipulation check indicator columns not found")
    x2_manipulation_exclusions = 0
    overlap = 0

# 4. Final sample (after all exclusions)
# Use the df dataframe which already has exclusions applied
final_n = len(df)
print(f"\n4. Final N (after all exclusions): {final_n}")

# 5. Demographics of final sample
# Gender: check what values indicate female
if 'gender' in df.columns:
    # Check what values are in gender column
    print(f"\nGender value counts:")
    print(df['gender'].value_counts())
    
    # Try to identify females (common values: 'Female', 'F', '2', etc.)
    gender_str = df['gender'].astype(str).str.upper()
    # Common female indicators
    female_indicators = ['FEMALE', 'F', '2', 'WOMAN', 'W']
    is_female = gender_str.isin(female_indicators)
    n_females = is_female.sum()
    print(f"5. Number of females: {n_females}")
else:
    print("\n5. Warning: gender column not found")
    n_females = 0

# Age statistics
if 'age' in df.columns:
    age_numeric = pd.to_numeric(df['age'], errors='coerce')
    age_mean = age_numeric.mean()
    age_sd = age_numeric.std(ddof=1)  # Sample SD
    print(f"6. Mean age: {age_mean:.2f}")
    print(f"7. SD age: {age_sd:.2f}")
else:
    print("\n6-7. Warning: age column not found")
    age_mean = np.nan
    age_sd = np.nan

# Summary output for easy copy-paste
print(f"\n{'='*60}")
print("SUMMARY (for article):")
print(f"{'='*60}")
print(f"1. Total S2 N: {total_s2_n}")
print(f"2. Excluded based on implicit tasks: {x1_task_exclusions}")
print(f"3. Additional exclusions (manipulation checks): {x2_manipulation_exclusions}")
print(f"4. Final N: {final_n}")
print(f"5. Females: {n_females}")
print(f"6. Mean age: {age_mean:.2f}")
print(f"7. SD age: {age_sd:.2f}")


1. Total S2 N: 600
   Total excluded (from exclude column): 72
   Should equal: 72 = 72

2. Excluded based on implicit task performance: 52
   (exclude_t1: 32, exclude_t2: 43)
3. Additional exclusions from manipulation groups (failed checks): 23
   (failed 1st check: 10, failed 2nd check: 21)
   Note: 3 participants excluded for BOTH task performance AND manipulation checks
   Calculated total (task + manipulation - overlap): 72
   Actual total excluded: 72

4. Final N (after all exclusions): 528

Gender value counts:
gender
Female                        284
Male                          240
other/ I prefer not to say      4
Name: count, dtype: int64
5. Number of females: 284
6. Mean age: 41.84
7. SD age: 12.38

SUMMARY (for article):
1. Total S2 N: 600
2. Excluded based on implicit tasks: 52
3. Additional exclusions (manipulation checks): 23
4. Final N: 528
5. Females: 284
6. Mean age: 41.84
7. SD age: 12.38


## Internal Consistency of Questionnaire (Time 1)

Compute Cronbach's alpha for the extraversion questionnaire at Time 1. Reverse-code items marked with "*" before computing reliability.


In [None]:

# Get all Q1_A columns (Time 1 questionnaire items)
q1_cols = [col for col in df.columns if col.startswith('Q1_A')]
print(f"Found {len(q1_cols)} Time 1 questionnaire columns:")
for col in sorted(q1_cols):
    print(f"  {col}")

# Based on the schema, reverse-coded items are those marked with "*" in the description
# According to the schema: Q1_A2, Q1_A3, Q1_A5, Q1_A8, Q1_A10 are reverse-coded
# Check if column names have "*" or use the known reverse-coded items
reverse_coded_items = ['Q1_A2', 'Q1_A3', 'Q1_A5', 'Q1_A8', 'Q1_A10']
reverse_coded = []

for col in q1_cols:
    # Check if column name contains "*" or matches known reverse-coded items
    col_base = col.replace(' *', '').replace('*', '').strip()
    if '*' in col or col.endswith(' *') or col_base in reverse_coded_items:
        reverse_coded.append(col)
        print(f"  {col} - REVERSE CODED")

print(f"\nReverse-coded items: {len(reverse_coded)}")
if len(reverse_coded) == 0:
    print("  Warning: No reverse-coded items found. Using schema-based list.")
    # Fallback: use items that exist in our list
    reverse_coded = [col for col in q1_cols if any(item in col for item in reverse_coded_items)]
    print(f"  Using fallback: {reverse_coded}")

# Extract questionnaire data for Time 1
q1_data = df[q1_cols].copy()

# Convert to numeric
for col in q1_cols:
    q1_data[col] = pd.to_numeric(q1_data[col], errors='coerce')

# Apply reverse coding: f(x) = 6 - x for reverse-coded items
for col in reverse_coded:
    if col in q1_data.columns:
        # Reverse code: 6 - x
        q1_data[col] = 6 - q1_data[col]
        print(f"Applied reverse coding to {col}")

# Remove rows with any missing values for reliability calculation
q1_data_clean = q1_data.dropna()
print(f"\nParticipants with complete T1 questionnaire data: {len(q1_data_clean)}")

# Compute Cronbach's alpha
# Formula: α = (k / (k-1)) * (1 - Σσ²ᵢ / σ²ₜ)
# where k = number of items, σ²ᵢ = variance of item i, σ²ₜ = total variance
k = len(q1_cols)
item_variances = q1_data_clean.var(axis=0, ddof=1)
total_variance = q1_data_clean.sum(axis=1).var(ddof=1)

cronbach_alpha = (k / (k - 1)) * (1 - item_variances.sum() / total_variance)

print(f"\nCronbach's Alpha for T1 Questionnaire: {cronbach_alpha:.3f}")
print(f"Number of items: {k}")
print(f"Sum of item variances: {item_variances.sum():.3f}")
print(f"Total variance: {total_variance:.3f}")

# Store for later use
t1_questionnaire_alpha = cronbach_alpha


Found 10 Time 1 questionnaire columns:
  Q1_A1
  Q1_A10 *
  Q1_A2 *
  Q1_A3 *
  Q1_A4
  Q1_A5 *
  Q1_A6
  Q1_A7
  Q1_A8 *
  Q1_A9
  Q1_A2 * - REVERSE CODED
  Q1_A3 * - REVERSE CODED
  Q1_A5 * - REVERSE CODED
  Q1_A8 * - REVERSE CODED
  Q1_A10 * - REVERSE CODED

Reverse-coded items: 5
Applied reverse coding to Q1_A2 *
Applied reverse coding to Q1_A3 *
Applied reverse coding to Q1_A5 *
Applied reverse coding to Q1_A8 *
Applied reverse coding to Q1_A10 *

Participants with complete T1 questionnaire data: 528

Cronbach's Alpha for T1 Questionnaire: 0.927
Number of items: 10
Sum of item variances: 12.704
Total variance: 76.566


## Split-Half Reliability (Time 1)

Compute split-half reliabilities for qIAT and IAT at Time 1 using paired-odd and paired-even D-scores, with Spearman-Brown correction.


In [43]:
# Get Time 1 data for qIAT and IAT
# Use dscore_paired_odd_t1 and dscore_paired_even_t1 columns from the combined file

# For qIAT
qiat_t1 = df[df['task'].str.upper() == 'QIAT'].copy()
qiat_t1_odd = pd.to_numeric(qiat_t1['dscore_paired_odd_t1'], errors='coerce')
qiat_t1_even = pd.to_numeric(qiat_t1['dscore_paired_even_t1'], errors='coerce')

# Remove missing values
qiat_t1_pairs = pd.DataFrame({
    'odd': qiat_t1_odd,
    'even': qiat_t1_even
}).dropna()

print(f"qIAT Time 1 participants with both odd and even D-scores: {len(qiat_t1_pairs)}")

if len(qiat_t1_pairs) > 0:
    # Compute correlation
    qiat_t1_corr, qiat_t1_p = pearsonr(qiat_t1_pairs['odd'], qiat_t1_pairs['even'])
    print(f"qIAT T1 correlation (odd vs even): r = {qiat_t1_corr:.3f}, p = {qiat_t1_p:.4f}")
    
    # Apply Spearman-Brown correction
    qiat_t1_reliability = spearman_brown(qiat_t1_corr)
    print(f"qIAT T1 split-half reliability (Spearman-Brown corrected): {qiat_t1_reliability:.3f}")
else:
    print("Warning: No valid qIAT T1 data found")
    qiat_t1_reliability = np.nan

print("\n" + "-"*60)

# For IAT
iat_t1 = df[df['task'].str.upper() == 'IAT'].copy()
iat_t1_odd = pd.to_numeric(iat_t1['dscore_paired_odd_t1'], errors='coerce')
iat_t1_even = pd.to_numeric(iat_t1['dscore_paired_even_t1'], errors='coerce')

# Remove missing values
iat_t1_pairs = pd.DataFrame({
    'odd': iat_t1_odd,
    'even': iat_t1_even
}).dropna()

print(f"IAT Time 1 participants with both odd and even D-scores: {len(iat_t1_pairs)}")

if len(iat_t1_pairs) > 0:
    # Compute correlation
    iat_t1_corr, iat_t1_p = pearsonr(iat_t1_pairs['odd'], iat_t1_pairs['even'])
    print(f"IAT T1 correlation (odd vs even): r = {iat_t1_corr:.3f}, p = {iat_t1_p:.4f}")
    
    # Apply Spearman-Brown correction
    iat_t1_reliability = spearman_brown(iat_t1_corr)
    print(f"IAT T1 split-half reliability (Spearman-Brown corrected): {iat_t1_reliability:.3f}")
else:
    print("Warning: No valid IAT T1 data found")
    iat_t1_reliability = np.nan

print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print(f"qIAT split-half reliability (T1): {qiat_t1_reliability:.2f}")
print(f"IAT split-half reliability (T1): {iat_t1_reliability:.2f}")


qIAT Time 1 participants with both odd and even D-scores: 267
qIAT T1 correlation (odd vs even): r = 0.914, p = 0.0000
qIAT T1 split-half reliability (Spearman-Brown corrected): 0.955

------------------------------------------------------------
IAT Time 1 participants with both odd and even D-scores: 261
IAT T1 correlation (odd vs even): r = 0.851, p = 0.0000
IAT T1 split-half reliability (Spearman-Brown corrected): 0.920

SUMMARY (for article):
qIAT split-half reliability (T1): 0.95
IAT split-half reliability (T1): 0.92


## Table 6: Descriptive Statistics (Means and SDs)

Table showing means (SDs) for questionnaire, IAT, and qIAT by group (Control, Faking Low, Faking High) and time (Time1, Time2).


In [44]:
# Group mapping: 0 = Control, 1 = Faking Low, 2 = Faking High
group_names = {0: 'Control', 1: 'Faking Low', 2: 'Faking High'}

# Initialize results dictionary
results = {
    'Questionnaire': {},
    'IAT': {},
    'qIAT': {}
}

# 1. Questionnaire (use t1_ques and t2_ques)
if 't1_ques' in df.columns and 't2_ques' in df.columns:
    for group_num, group_name in group_names.items():
        group_data = df[df['group'] == group_num]
        
        t1_ques = pd.to_numeric(group_data['t1_ques'], errors='coerce')
        t2_ques = pd.to_numeric(group_data['t2_ques'], errors='coerce')
        
        t1_mean = t1_ques.mean()
        t1_sd = t1_ques.std(ddof=1)
        t2_mean = t2_ques.mean()
        t2_sd = t2_ques.std(ddof=1)
        
        results['Questionnaire'][group_name] = {
            'Time1': (t1_mean, t1_sd),
            'Time2': (t2_mean, t2_sd)
        }
    
    # Overall (all groups combined)
    t1_ques_all = pd.to_numeric(df['t1_ques'], errors='coerce')
    t2_ques_all = pd.to_numeric(df['t2_ques'], errors='coerce')
    results['Questionnaire']['Overall'] = {
        'Time1': (t1_ques_all.mean(), t1_ques_all.std(ddof=1)),
        'Time2': (t2_ques_all.mean(), t2_ques_all.std(ddof=1))
    }

# 2. IAT (filter task == 'IAT', use t1_dscore and t2_dscore)
iat_data = df[df['task'].str.upper() == 'IAT'].copy()
if len(iat_data) > 0:
    for group_num, group_name in group_names.items():
        group_data = iat_data[iat_data['group'] == group_num]
        
        t1_dscore = pd.to_numeric(group_data['t1_dscore'], errors='coerce')
        t2_dscore = pd.to_numeric(group_data['t2_dscore'], errors='coerce')
        
        t1_mean = t1_dscore.mean()
        t1_sd = t1_dscore.std(ddof=1)
        t2_mean = t2_dscore.mean()
        t2_sd = t2_dscore.std(ddof=1)
        
        results['IAT'][group_name] = {
            'Time1': (t1_mean, t1_sd),
            'Time2': (t2_mean, t2_sd)
        }
    
    # Overall (all groups combined)
    t1_dscore_all = pd.to_numeric(iat_data['t1_dscore'], errors='coerce')
    t2_dscore_all = pd.to_numeric(iat_data['t2_dscore'], errors='coerce')
    results['IAT']['Overall'] = {
        'Time1': (t1_dscore_all.mean(), t1_dscore_all.std(ddof=1)),
        'Time2': (t2_dscore_all.mean(), t2_dscore_all.std(ddof=1))
    }

# 3. qIAT (filter task == 'qIAT', use t1_dscore and t2_dscore)
qiat_data = df[df['task'].str.upper() == 'QIAT'].copy()
if len(qiat_data) > 0:
    for group_num, group_name in group_names.items():
        group_data = qiat_data[qiat_data['group'] == group_num]
        
        t1_dscore = pd.to_numeric(group_data['t1_dscore'], errors='coerce')
        t2_dscore = pd.to_numeric(group_data['t2_dscore'], errors='coerce')
        
        t1_mean = t1_dscore.mean()
        t1_sd = t1_dscore.std(ddof=1)
        t2_mean = t2_dscore.mean()
        t2_sd = t2_dscore.std(ddof=1)
        
        results['qIAT'][group_name] = {
            'Time1': (t1_mean, t1_sd),
            'Time2': (t2_mean, t2_sd)
        }
    
    # Overall (all groups combined)
    t1_dscore_all = pd.to_numeric(qiat_data['t1_dscore'], errors='coerce')
    t2_dscore_all = pd.to_numeric(qiat_data['t2_dscore'], errors='coerce')
    results['qIAT']['Overall'] = {
        'Time1': (t1_dscore_all.mean(), t1_dscore_all.std(ddof=1)),
        'Time2': (t2_dscore_all.mean(), t2_dscore_all.std(ddof=1))
    }

# Create formatted table
print("Table 6. Means (SDs) for the questionnaire and implicit tasks in Study 2")
print("=" * 120)
header1 = f"{'Measure':<15} {'Control':<30} {'Faking Low':<30} {'Faking High':<30} {'Overall':<30}"
print(header1)
header2 = f"{'':<15} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14}"
print(header2)
print("-" * 120)

for measure in ['Questionnaire', 'IAT', 'qIAT']:
    row = f"{measure:<15}"
    
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[measure]:
            t1_mean, t1_sd = results[measure][group_name]['Time1']
            t2_mean, t2_sd = results[measure][group_name]['Time2']
            t1_str = f"{t1_mean:.2f} ({t1_sd:.2f})"
            t2_str = f"{t2_mean:.2f} ({t2_sd:.2f})"
            row += f"{t1_str:<14} {t2_str:<14}"
        else:
            row += f"{'N/A':<14} {'N/A':<14}"
    
    print(row)

print("=" * 120)

# Also create a pandas DataFrame for easier viewing
table_data = []
for measure in ['Questionnaire', 'IAT', 'qIAT']:
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[measure]:
            t1_mean, t1_sd = results[measure][group_name]['Time1']
            t2_mean, t2_sd = results[measure][group_name]['Time2']
            table_data.append({
                'Measure': measure,
                'Group': group_name,
                'Time1_Mean': t1_mean,
                'Time1_SD': t1_sd,
                'Time2_Mean': t2_mean,
                'Time2_SD': t2_sd,
                'Time1_Formatted': f"{t1_mean:.2f} ({t1_sd:.2f})",
                'Time2_Formatted': f"{t2_mean:.2f} ({t2_sd:.2f})"
            })

table_df = pd.DataFrame(table_data)
print("\nDetailed table (DataFrame format):")
print(table_df.to_string(index=False))

# Create a pivot table for better visualization
pivot_table = table_df.pivot(index='Measure', columns='Group', values=['Time1_Formatted', 'Time2_Formatted'])
print("\n\nPivot table format:")
print(pivot_table)


Table 6. Means (SDs) for the questionnaire and implicit tasks in Study 2
Measure         Control                        Faking Low                     Faking High                    Overall                       
                Time1          Time2          Time1          Time2          Time1          Time2          Time1          Time2         
------------------------------------------------------------------------------------------------------------------------
Questionnaire  28.79 (9.34)   29.01 (9.55)  27.83 (8.66)   11.32 (3.14)  26.98 (8.14)   48.31 (5.21)  27.87 (8.75)   29.85 (16.40) 
IAT            -0.46 (0.64)   -0.29 (0.54)  -0.50 (0.57)   -0.41 (0.46)  -0.44 (0.53)   -0.12 (0.58)  -0.46 (0.58)   -0.27 (0.54)  
qIAT           0.22 (0.90)    0.17 (0.67)   0.21 (1.00)    0.13 (0.73)   -0.03 (0.74)   0.33 (0.60)   0.13 (0.89)    0.21 (0.67)   

Detailed table (DataFrame format):
      Measure       Group  Time1_Mean  Time1_SD  Time2_Mean  Time2_SD Time1_Formatted Time2_Format

## Test-Retest Reliability (Control Group)

Compute test-retest reliability for self-report, qIAT, and IAT by correlating Time1 and Time2 scores in the control group only.


In [45]:
# Filter to control group only (group == 0)
control_group = df[df['group'] == 0].copy()
print(f"Control group participants: {len(control_group)}")

# 1. Self-report (Questionnaire) test-retest reliability
if 't1_ques' in control_group.columns and 't2_ques' in control_group.columns:
    t1_ques_control = pd.to_numeric(control_group['t1_ques'], errors='coerce')
    t2_ques_control = pd.to_numeric(control_group['t2_ques'], errors='coerce')
    
    # Remove missing values
    ques_pairs = pd.DataFrame({
        't1': t1_ques_control,
        't2': t2_ques_control
    }).dropna()
    
    print(f"\nSelf-report (Questionnaire):")
    print(f"  Participants with complete data: {len(ques_pairs)}")
    
    if len(ques_pairs) > 1:
        ques_corr, ques_p = pearsonr(ques_pairs['t1'], ques_pairs['t2'])
        print(f"  Test-retest reliability: r = {ques_corr:.3f}, p = {ques_p:.4f}")
        self_report_reliability = ques_corr
    else:
        print("  Warning: Insufficient data for correlation")
        self_report_reliability = np.nan
else:
    print("\nWarning: t1_ques or t2_ques columns not found")
    self_report_reliability = np.nan

# 2. qIAT test-retest reliability (control group, qIAT task only)
qiat_control = control_group[control_group['task'].str.upper() == 'QIAT'].copy()
print(f"\nqIAT (Control group):")
print(f"  Participants: {len(qiat_control)}")

if len(qiat_control) > 0:
    t1_qiat_control = pd.to_numeric(qiat_control['t1_dscore'], errors='coerce')
    t2_qiat_control = pd.to_numeric(qiat_control['t2_dscore'], errors='coerce')
    
    # Remove missing values
    qiat_pairs = pd.DataFrame({
        't1': t1_qiat_control,
        't2': t2_qiat_control
    }).dropna()
    
    print(f"  Participants with complete data: {len(qiat_pairs)}")
    
    if len(qiat_pairs) > 1:
        qiat_corr, qiat_p = pearsonr(qiat_pairs['t1'], qiat_pairs['t2'])
        print(f"  Test-retest reliability: r = {qiat_corr:.3f}, p = {qiat_p:.4f}")
        qiat_reliability = qiat_corr
    else:
        print("  Warning: Insufficient data for correlation")
        qiat_reliability = np.nan
else:
    print("  Warning: No qIAT data found for control group")
    qiat_reliability = np.nan

# 3. IAT test-retest reliability (control group, IAT task only)
iat_control = control_group[control_group['task'].str.upper() == 'IAT'].copy()
print(f"\nIAT (Control group):")
print(f"  Participants: {len(iat_control)}")

if len(iat_control) > 0:
    t1_iat_control = pd.to_numeric(iat_control['t1_dscore'], errors='coerce')
    t2_iat_control = pd.to_numeric(iat_control['t2_dscore'], errors='coerce')
    
    # Remove missing values
    iat_pairs = pd.DataFrame({
        't1': t1_iat_control,
        't2': t2_iat_control
    }).dropna()
    
    print(f"  Participants with complete data: {len(iat_pairs)}")
    
    if len(iat_pairs) > 1:
        iat_corr, iat_p = pearsonr(iat_pairs['t1'], iat_pairs['t2'])
        print(f"  Test-retest reliability: r = {iat_corr:.3f}, p = {iat_p:.4f}")
        iat_reliability = iat_corr
    else:
        print("  Warning: Insufficient data for correlation")
        iat_reliability = np.nan
else:
    print("  Warning: No IAT data found for control group")
    iat_reliability = np.nan

# Summary
print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print(f"Test-retest reliability (Control group):")
print(f"  Self-report: {self_report_reliability:.2f}")
print(f"  qIAT: {qiat_reliability:.2f}")
print(f"  IAT: {iat_reliability:.2f}")


Control group participants: 181

Self-report (Questionnaire):
  Participants with complete data: 181
  Test-retest reliability: r = 0.962, p = 0.0000

qIAT (Control group):
  Participants: 89
  Participants with complete data: 89
  Test-retest reliability: r = 0.670, p = 0.0000

IAT (Control group):
  Participants: 92
  Participants with complete data: 92
  Test-retest reliability: r = 0.689, p = 0.0000

SUMMARY (for article):
Test-retest reliability (Control group):
  Self-report: 0.96
  qIAT: 0.67
  IAT: 0.69


## Table 4: Split-Half Reliabilities

Table showing split-half reliabilities for IAT and qIAT across groups (Control, Faking Low, Faking High, Overall) and time points (Time1, Time2). Uses paired-odd and paired-even D-scores with Spearman-Brown correction.


In [46]:
# Helper function to add significance asterisks
def add_significance(p_value):
    """Add significance asterisks based on p-value."""
    if p_value < 0.001:
        return "***"
    elif p_value < 0.01:
        return "**"
    elif p_value < 0.05:
        return "*"
    else:
        return ""

# Group mapping: 0 = Control, 1 = Faking Low, 2 = Faking High
group_names = {0: 'Control', 1: 'Faking Low', 2: 'Faking High'}

# Initialize results dictionary
# Structure: results[task][group][time] = (reliability, p_value, formatted_string)
results = {
    'IAT': {},
    'qIAT': {}
}

# Function to compute split-half reliability for a subset of data
def compute_split_half_reliability(data, odd_col, even_col):
    """Compute split-half reliability from odd and even D-scores."""
    odd_scores = pd.to_numeric(data[odd_col], errors='coerce')
    even_scores = pd.to_numeric(data[even_col], errors='coerce')
    
    # Remove missing values
    pairs = pd.DataFrame({
        'odd': odd_scores,
        'even': even_scores
    }).dropna()
    
    if len(pairs) < 2:
        return np.nan, np.nan, "N/A"
    
    # Compute correlation
    corr, p_value = pearsonr(pairs['odd'], pairs['even'])
    
    # Apply Spearman-Brown correction
    reliability = spearman_brown(corr)
    
    # Format with significance
    sig = add_significance(p_value)
    formatted = f"{reliability:.2f}{sig}"
    
    return reliability, p_value, formatted

# Process IAT
iat_data = df[df['task'].str.upper() == 'IAT'].copy()

for group_num, group_name in group_names.items():
    group_iat = iat_data[iat_data['group'] == group_num].copy()
    
    # Time 1
    rel_t1, p_t1, fmt_t1 = compute_split_half_reliability(
        group_iat, 'dscore_paired_odd_t1', 'dscore_paired_even_t1'
    )
    
    # Time 2
    rel_t2, p_t2, fmt_t2 = compute_split_half_reliability(
        group_iat, 'dscore_paired_odd_t2', 'dscore_paired_even_t2'
    )
    
    results['IAT'][group_name] = {
        'Time1': (rel_t1, p_t1, fmt_t1),
        'Time2': (rel_t2, p_t2, fmt_t2)
    }

# IAT Overall
rel_t1_all, p_t1_all, fmt_t1_all = compute_split_half_reliability(
    iat_data, 'dscore_paired_odd_t1', 'dscore_paired_even_t1'
)
rel_t2_all, p_t2_all, fmt_t2_all = compute_split_half_reliability(
    iat_data, 'dscore_paired_odd_t2', 'dscore_paired_even_t2'
)
results['IAT']['Overall'] = {
    'Time1': (rel_t1_all, p_t1_all, fmt_t1_all),
    'Time2': (rel_t2_all, p_t2_all, fmt_t2_all)
}

# Process qIAT
qiat_data = df[df['task'].str.upper() == 'QIAT'].copy()

for group_num, group_name in group_names.items():
    group_qiat = qiat_data[qiat_data['group'] == group_num].copy()
    
    # Time 1
    rel_t1, p_t1, fmt_t1 = compute_split_half_reliability(
        group_qiat, 'dscore_paired_odd_t1', 'dscore_paired_even_t1'
    )
    
    # Time 2
    rel_t2, p_t2, fmt_t2 = compute_split_half_reliability(
        group_qiat, 'dscore_paired_odd_t2', 'dscore_paired_even_t2'
    )
    
    results['qIAT'][group_name] = {
        'Time1': (rel_t1, p_t1, fmt_t1),
        'Time2': (rel_t2, p_t2, fmt_t2)
    }

# qIAT Overall
rel_t1_all, p_t1_all, fmt_t1_all = compute_split_half_reliability(
    qiat_data, 'dscore_paired_odd_t1', 'dscore_paired_even_t1'
)
rel_t2_all, p_t2_all, fmt_t2_all = compute_split_half_reliability(
    qiat_data, 'dscore_paired_odd_t2', 'dscore_paired_even_t2'
)
results['qIAT']['Overall'] = {
    'Time1': (rel_t1_all, p_t1_all, fmt_t1_all),
    'Time2': (rel_t2_all, p_t2_all, fmt_t2_all)
}

# Create formatted table
print("Table 4. Split half reliabilities of implicit tasks in Study 2")
print("=" * 120)
header1 = f"{'Measure':<10} {'Control':<30} {'Faking Low':<30} {'Faking High':<30} {'Overall':<30}"
print(header1)
header2 = f"{'':<10} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14}"
print(header2)
print("-" * 120)

for task in ['IAT', 'qIAT']:
    row = f"{task:<10}"
    
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[task]:
            fmt_t1 = results[task][group_name]['Time1'][2]
            fmt_t2 = results[task][group_name]['Time2'][2]
            row += f"{fmt_t1:<14} {fmt_t2:<14}"
        else:
            row += f"{'N/A':<14} {'N/A':<14}"
    
    print(row)

print("=" * 120)
print("Note: * p < .05, ** p < .01, *** p < .001")

# Create detailed DataFrame with all information
table_data = []
for task in ['IAT', 'qIAT']:
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[task]:
            rel_t1, p_t1, fmt_t1 = results[task][group_name]['Time1']
            rel_t2, p_t2, fmt_t2 = results[task][group_name]['Time2']
            table_data.append({
                'Task': task,
                'Group': group_name,
                'Time1_Reliability': rel_t1,
                'Time1_p': p_t1,
                'Time1_Formatted': fmt_t1,
                'Time2_Reliability': rel_t2,
                'Time2_p': p_t2,
                'Time2_Formatted': fmt_t2
            })

table_df = pd.DataFrame(table_data)
print("\n\nDetailed table (DataFrame format):")
print(table_df.to_string(index=False))


Table 4. Split half reliabilities of implicit tasks in Study 2
Measure    Control                        Faking Low                     Faking High                    Overall                       
           Time1          Time2          Time1          Time2          Time1          Time2          Time1          Time2         
------------------------------------------------------------------------------------------------------------------------
IAT       0.93***        0.88***       0.93***        0.84***       0.90***        0.90***       0.92***        0.88***       
qIAT      0.96***        0.91***       0.97***        0.92***       0.94***        0.91***       0.95***        0.91***       
Note: * p < .05, ** p < .01, *** p < .001


Detailed table (DataFrame format):
Task       Group  Time1_Reliability       Time1_p Time1_Formatted  Time2_Reliability      Time2_p Time2_Formatted
 IAT     Control           0.930090  2.745030e-29         0.93***           0.880119 1.723079e-20      

## Table 5: Implicit-Explicit Correlations

Compute correlations between implicit tasks (IAT and qIAT) and the explicit questionnaire measure at Time1 and Time2, separately for each group (Control, Faking Low, Faking High, Overall).


In [47]:
# Helper function to compute correlation with significance
def compute_correlation_with_test(x, y):
    """Compute Pearson correlation and test significance."""
    # Remove missing values
    pairs = pd.DataFrame({
        'x': pd.to_numeric(x, errors='coerce'),
        'y': pd.to_numeric(y, errors='coerce')
    }).dropna()
    
    if len(pairs) < 2:
        return np.nan, np.nan, "N/A"
    
    # Compute correlation
    corr, p_value = pearsonr(pairs['x'], pairs['y'])
    
    # Format with significance
    sig = add_significance(p_value)
    formatted = f"{corr:.2f}{sig}"
    
    return corr, p_value, formatted

# Group mapping: 0 = Control, 1 = Faking Low, 2 = Faking High
group_names = {0: 'Control', 1: 'Faking Low', 2: 'Faking High'}

# Initialize results dictionary
# Structure: results[task][group][time] = (correlation, p_value, formatted_string)
results = {
    'IAT': {},
    'qIAT': {}
}

# Process IAT
iat_data = df[df['task'].str.upper() == 'IAT'].copy()

for group_num, group_name in group_names.items():
    group_iat = iat_data[iat_data['group'] == group_num].copy()
    
    # Time 1: IAT D-score with Questionnaire score
    t1_dscore = pd.to_numeric(group_iat['t1_dscore'], errors='coerce')
    t1_ques = pd.to_numeric(group_iat['t1_ques'], errors='coerce')
    corr_t1, p_t1, fmt_t1 = compute_correlation_with_test(t1_dscore, t1_ques)
    
    # Time 2: IAT D-score with Questionnaire score
    t2_dscore = pd.to_numeric(group_iat['t2_dscore'], errors='coerce')
    t2_ques = pd.to_numeric(group_iat['t2_ques'], errors='coerce')
    corr_t2, p_t2, fmt_t2 = compute_correlation_with_test(t2_dscore, t2_ques)
    
    results['IAT'][group_name] = {
        'Time1': (corr_t1, p_t1, fmt_t1),
        'Time2': (corr_t2, p_t2, fmt_t2)
    }

# IAT Overall
t1_dscore_all = pd.to_numeric(iat_data['t1_dscore'], errors='coerce')
t1_ques_all = pd.to_numeric(iat_data['t1_ques'], errors='coerce')
corr_t1_all, p_t1_all, fmt_t1_all = compute_correlation_with_test(t1_dscore_all, t1_ques_all)

t2_dscore_all = pd.to_numeric(iat_data['t2_dscore'], errors='coerce')
t2_ques_all = pd.to_numeric(iat_data['t2_ques'], errors='coerce')
corr_t2_all, p_t2_all, fmt_t2_all = compute_correlation_with_test(t2_dscore_all, t2_ques_all)

results['IAT']['Overall'] = {
    'Time1': (corr_t1_all, p_t1_all, fmt_t1_all),
    'Time2': (corr_t2_all, p_t2_all, fmt_t2_all)
}

# Process qIAT
qiat_data = df[df['task'].str.upper() == 'QIAT'].copy()

for group_num, group_name in group_names.items():
    group_qiat = qiat_data[qiat_data['group'] == group_num].copy()
    
    # Time 1: qIAT D-score with Questionnaire score
    t1_dscore = pd.to_numeric(group_qiat['t1_dscore'], errors='coerce')
    t1_ques = pd.to_numeric(group_qiat['t1_ques'], errors='coerce')
    corr_t1, p_t1, fmt_t1 = compute_correlation_with_test(t1_dscore, t1_ques)
    
    # Time 2: qIAT D-score with Questionnaire score
    t2_dscore = pd.to_numeric(group_qiat['t2_dscore'], errors='coerce')
    t2_ques = pd.to_numeric(group_qiat['t2_ques'], errors='coerce')
    corr_t2, p_t2, fmt_t2 = compute_correlation_with_test(t2_dscore, t2_ques)
    
    results['qIAT'][group_name] = {
        'Time1': (corr_t1, p_t1, fmt_t1),
        'Time2': (corr_t2, p_t2, fmt_t2)
    }

# qIAT Overall
t1_dscore_all = pd.to_numeric(qiat_data['t1_dscore'], errors='coerce')
t1_ques_all = pd.to_numeric(qiat_data['t1_ques'], errors='coerce')
corr_t1_all, p_t1_all, fmt_t1_all = compute_correlation_with_test(t1_dscore_all, t1_ques_all)

t2_dscore_all = pd.to_numeric(qiat_data['t2_dscore'], errors='coerce')
t2_ques_all = pd.to_numeric(qiat_data['t2_ques'], errors='coerce')
corr_t2_all, p_t2_all, fmt_t2_all = compute_correlation_with_test(t2_dscore_all, t2_ques_all)

results['qIAT']['Overall'] = {
    'Time1': (corr_t1_all, p_t1_all, fmt_t1_all),
    'Time2': (corr_t2_all, p_t2_all, fmt_t2_all)
}

# Create formatted table
print("Table 5. Correlations between implicit tasks and the explicit measure in Study 2")
print("=" * 120)
header1 = f"{'Measure':<10} {'Control':<30} {'Faking Low':<30} {'Faking High':<30} {'Overall':<30}"
print(header1)
header2 = f"{'':<10} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14} {'Time1':<14} {'Time2':<14}"
print(header2)
print("-" * 120)

for task in ['IAT', 'qIAT']:
    row = f"{task:<10}"
    
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[task]:
            fmt_t1 = results[task][group_name]['Time1'][2]
            fmt_t2 = results[task][group_name]['Time2'][2]
            row += f"{fmt_t1:<14} {fmt_t2:<14}"
        else:
            row += f"{'N/A':<14} {'N/A':<14}"
    
    print(row)

print("=" * 120)
print("Note: * p < .05, ** p < .01, *** p < .001")

# Create detailed DataFrame with all information
table_data = []
for task in ['IAT', 'qIAT']:
    for group_name in ['Control', 'Faking Low', 'Faking High', 'Overall']:
        if group_name in results[task]:
            corr_t1, p_t1, fmt_t1 = results[task][group_name]['Time1']
            corr_t2, p_t2, fmt_t2 = results[task][group_name]['Time2']
            table_data.append({
                'Task': task,
                'Group': group_name,
                'Time1_Correlation': corr_t1,
                'Time1_p': p_t1,
                'Time1_Formatted': fmt_t1,
                'Time2_Correlation': corr_t2,
                'Time2_p': p_t2,
                'Time2_Formatted': fmt_t2
            })

table_df = pd.DataFrame(table_data)
print("\n\nDetailed table (DataFrame format):")
print(table_df.to_string(index=False))


Table 5. Correlations between implicit tasks and the explicit measure in Study 2
Measure    Control                        Faking Low                     Faking High                    Overall                       
           Time1          Time2          Time1          Time2          Time1          Time2          Time1          Time2         
------------------------------------------------------------------------------------------------------------------------
IAT       0.28**         0.41***       0.35**         0.18          0.30**         -0.03         0.31***        0.28***       
qIAT      0.52***        0.58***       0.56***        0.20          0.46***        -0.01         0.52***        0.24***       
Note: * p < .05, ** p < .01, *** p < .001


Detailed table (DataFrame format):
Task       Group  Time1_Correlation      Time1_p Time1_Formatted  Time2_Correlation      Time2_p Time2_Formatted
 IAT     Control           0.279475 6.976458e-03          0.28**           0.414384 4.

## Implicit-Explicit Convergent Validity Comparison (Time1)

Compare qIAT and IAT correlations with explicit measure at Time1 using Fisher z test to determine if qIAT shows stronger implicit-explicit relationship than IAT.


In [48]:
# Get overall Time1 correlations for qIAT and IAT
# These are the correlations from the "Overall" group in Table 5

# qIAT Time1 correlation with explicit measure
qiat_t1_data = df[df['task'].str.upper() == 'QIAT'].copy()
qiat_t1_dscore = pd.to_numeric(qiat_t1_data['t1_dscore'], errors='coerce')
qiat_t1_ques = pd.to_numeric(qiat_t1_data['t1_ques'], errors='coerce')

qiat_t1_pairs = pd.DataFrame({
    'dscore': qiat_t1_dscore,
    'ques': qiat_t1_ques
}).dropna()

qiat_t1_corr, qiat_t1_p = pearsonr(qiat_t1_pairs['dscore'], qiat_t1_pairs['ques'])
qiat_t1_n = len(qiat_t1_pairs)

print(f"qIAT Time1 correlation with explicit measure:")
print(f"  r = {qiat_t1_corr:.2f}, p = {qiat_t1_p:.4f}, n = {qiat_t1_n}")

# IAT Time1 correlation with explicit measure
iat_t1_data = df[df['task'].str.upper() == 'IAT'].copy()
iat_t1_dscore = pd.to_numeric(iat_t1_data['t1_dscore'], errors='coerce')
iat_t1_ques = pd.to_numeric(iat_t1_data['t1_ques'], errors='coerce')

iat_t1_pairs = pd.DataFrame({
    'dscore': iat_t1_dscore,
    'ques': iat_t1_ques
}).dropna()

iat_t1_corr, iat_t1_p = pearsonr(iat_t1_pairs['dscore'], iat_t1_pairs['ques'])
iat_t1_n = len(iat_t1_pairs)

print(f"\nIAT Time1 correlation with explicit measure:")
print(f"  r = {iat_t1_corr:.2f}, p = {iat_t1_p:.4f}, n = {iat_t1_n}")

# Fisher z test to compare the two correlations
# Note: These are independent correlations (different participants in qIAT vs IAT groups)
z_stat, p_value = compare_correlations(qiat_t1_corr, iat_t1_corr, qiat_t1_n, iat_t1_n)

print(f"\n{'='*60}")
print("Fisher z test comparing qIAT vs IAT correlations:")
print(f"{'='*60}")
print(f"qIAT correlation: r = {qiat_t1_corr:.2f}, n = {qiat_t1_n}")
print(f"IAT correlation: r = {iat_t1_corr:.2f}, n = {iat_t1_n}")
print(f"\nFisher z statistic: z = {z_stat:.2f}")
print(f"p-value: p = {p_value:.4f}")

if p_value < 0.05:
    print(f"\nConclusion: The difference is significant (p = {p_value:.4f})")
    if qiat_t1_corr > iat_t1_corr:
        print("qIAT exhibits a stronger implicit-explicit relationship than IAT.")
    else:
        print("IAT exhibits a stronger implicit-explicit relationship than qIAT.")
else:
    print(f"\nConclusion: The difference is not significant (p = {p_value:.4f})")

print(f"\n{'='*60}")
print("SUMMARY (for article):")
print(f"{'='*60}")
print(f"qIAT-explicit correlation (Time1): r = {qiat_t1_corr:.2f}, p < .001")
print(f"IAT-explicit correlation (Time1): r = {iat_t1_corr:.2f}, p < .001")
print(f"Fisher z test: z = {z_stat:.2f}, p = {p_value:.4f}")


qIAT Time1 correlation with explicit measure:
  r = 0.52, p = 0.0000, n = 267

IAT Time1 correlation with explicit measure:
  r = 0.31, p = 0.0000, n = 261

Fisher z test comparing qIAT vs IAT correlations:
qIAT correlation: r = 0.52, n = 267
IAT correlation: r = 0.31, n = 261

Fisher z statistic: z = 2.99
p-value: p = 0.0028

Conclusion: The difference is significant (p = 0.0028)
qIAT exhibits a stronger implicit-explicit relationship than IAT.

SUMMARY (for article):
qIAT-explicit correlation (Time1): r = 0.52, p < .001
IAT-explicit correlation (Time1): r = 0.31, p < .001
Fisher z test: z = 2.99, p = 0.0028


## Fakeability Analysis: Self-Report (Questionnaire)

Conduct mixed ANOVA for self-report measure with Time (within-subject) and Group (between-subject) factors, followed by planned contrasts (paired t-tests) for Faking High and Faking Low groups.


In [49]:
# Prepare data for mixed ANOVA
# Need long format: participant, group, time, score
ques_data = df[['id', 'group', 't1_ques', 't2_ques']].copy()

# Convert to numeric
ques_data['group'] = pd.to_numeric(ques_data['group'], errors='coerce')
ques_data['t1_ques'] = pd.to_numeric(ques_data['t1_ques'], errors='coerce')
ques_data['t2_ques'] = pd.to_numeric(ques_data['t2_ques'], errors='coerce')

# Remove rows with missing data
ques_data = ques_data.dropna()

print(f"Participants with complete questionnaire data: {len(ques_data)}")
print(f"Group distribution:")
print(ques_data['group'].value_counts().sort_index())

# Reshape to long format for ANOVA
ques_long = pd.melt(
    ques_data,
    id_vars=['id', 'group'],
    value_vars=['t1_ques', 't2_ques'],
    var_name='time',
    value_name='score'
)

# Recode time: t1_ques -> 1, t2_ques -> 2
ques_long['time'] = ques_long['time'].map({'t1_ques': 1, 't2_ques': 2})

# Recode group labels for clarity (0=Control, 1=Faking Low, 2=Faking High)
ques_long['group_label'] = ques_long['group'].map({0: 'Control', 1: 'Faking Low', 2: 'Faking High'})

print(f"\nData prepared for ANOVA: {len(ques_long)} observations")

# Mixed ANOVA using pingouin
# Run mixed ANOVA using pingouin
aov = pg.mixed_anova(
    data=ques_long,
    dv='score',
    within='time',
    between='group',
    subject='id'
)

print("\n" + "="*60)
print("Mixed ANOVA Results for Self-Report (Questionnaire)")
print("="*60)
print(aov)

# Extract and format results
print("\n" + "-"*60)
print("ANOVA Summary:")
print("-"*60)
for idx, row in aov.iterrows():
    effect = row['Source']
    if 'time' in effect.lower() and 'group' in effect.lower() and '*' in effect:
        effect_name = "Group × Time"
    elif 'time' in effect.lower():
        effect_name = "Time"
    elif 'group' in effect.lower():
        effect_name = "Group"
    else:
        effect_name = effect
    
    f_val = row['F']
    p_val = row['p-unc']
    eta_sq = row.get('np2', np.nan)  # partial eta squared
    
    # Get degrees of freedom
    df1 = row.get('DF', '?')
    df2 = row.get('DF2', '?')
    
    print(f"{effect_name}: F({df1}, {df2}) = {f_val:.2f}, p = {p_val:.4f}, ηp² = {eta_sq:.2f}")

# Compute descriptive statistics for planned contrasts
print("\n" + "="*60)
print("Descriptive Statistics for Planned Contrasts")
print("="*60)

# Faking Low group (group == 1)
faking_low = ques_data[ques_data['group'] == 1].copy()
fl_t1_mean = faking_low['t1_ques'].mean()
fl_t1_sd = faking_low['t1_ques'].std(ddof=1)
fl_t2_mean = faking_low['t2_ques'].mean()
fl_t2_sd = faking_low['t2_ques'].std(ddof=1)
fl_n = len(faking_low)

print(f"\nFaking Low group (n = {fl_n}):")
print(f"  Time1: M = {fl_t1_mean:.2f}, SD = {fl_t1_sd:.2f}")
print(f"  Time2: M = {fl_t2_mean:.2f}, SD = {fl_t2_sd:.2f}")

# Faking High group (group == 2)
faking_high = ques_data[ques_data['group'] == 2].copy()
fh_t1_mean = faking_high['t1_ques'].mean()
fh_t1_sd = faking_high['t1_ques'].std(ddof=1)
fh_t2_mean = faking_high['t2_ques'].mean()
fh_t2_sd = faking_high['t2_ques'].std(ddof=1)
fh_n = len(faking_high)

print(f"\nFaking High group (n = {fh_n}):")
print(f"  Time1: M = {fh_t1_mean:.2f}, SD = {fh_t1_sd:.2f}")
print(f"  Time2: M = {fh_t2_mean:.2f}, SD = {fh_t2_sd:.2f}")

# Planned contrasts: Paired t-tests
print("\n" + "="*60)
print("Planned Contrasts (Paired t-tests)")
print("="*60)

# Faking Low: Time1 vs Time2
fl_t1 = faking_low['t1_ques'].values
fl_t2 = faking_low['t2_ques'].values
fl_t_stat, fl_t_p = ttest_rel(fl_t1, fl_t2)

# Cohen's d for paired samples: mean difference / SD of differences
fl_diff = fl_t1 - fl_t2
fl_cohens_d = fl_diff.mean() / fl_diff.std(ddof=1)

print(f"\nFaking Low group:")
print(f"  Paired t-test: t({fl_n-1}) = {fl_t_stat:.2f}, p = {fl_t_p:.4f}")
print(f"  Cohen's d = {fl_cohens_d:.2f}")
print(f"  Mean difference = {fl_diff.mean():.2f}, SD of differences = {fl_diff.std(ddof=1):.2f}")

# Faking High: Time1 vs Time2
fh_t1 = faking_high['t1_ques'].values
fh_t2 = faking_high['t2_ques'].values
fh_t_stat, fh_t_p = ttest_rel(fh_t1, fh_t2)

# Cohen's d for paired samples: mean difference / SD of differences
fh_diff = fh_t1 - fh_t2
fh_cohens_d = fh_diff.mean() / fh_diff.std(ddof=1)

print(f"\nFaking High group:")
print(f"  Paired t-test: t({fh_n-1}) = {abs(fh_t_stat):.2f}, p = {fh_t_p:.4f}")
print(f"  Cohen's d = {abs(fh_cohens_d):.2f}")
print(f"  Mean difference = {fh_diff.mean():.2f}, SD of differences = {fh_diff.std(ddof=1):.2f}")

print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print("Mixed ANOVA for Self-Report:")
print("  (Note: Full ANOVA results require specialized output)")
print("\nPlanned Contrasts:")
print(f"  Faking Low: t({fl_n-1}) = {fl_t_stat:.2f}, p < .001, d = {fl_cohens_d:.2f}")
print(f"  Faking High: t({fh_n-1}) = {fh_t_stat:.2f}, p < .001, d = {fh_cohens_d:.2f}")


Participants with complete questionnaire data: 528
Group distribution:
group
0    181
1    169
2    178
Name: count, dtype: int64

Data prepared for ANOVA: 1056 observations

Mixed ANOVA Results for Self-Report (Questionnaire)
        Source            SS  DF1  DF2            MS            F  \
0        group  56609.240910    2  525  28304.620455   309.548750   
1         time   1036.094697    1  525   1036.094697    36.432623   
2  Interaction  62533.612811    2  525  31266.806406  1099.447541   

           p-unc       np2  eps  
0   1.567725e-89  0.541123  NaN  
1   2.989402e-09  0.064892  1.0  
2  2.015318e-188  0.807261  NaN  

------------------------------------------------------------
ANOVA Summary:
------------------------------------------------------------
Group: F(?, 525) = 309.55, p = 0.0000, ηp² = 0.54
Time: F(?, 525) = 36.43, p = 0.0000, ηp² = 0.06
Interaction: F(?, 525) = 1099.45, p = 0.0000, ηp² = 0.81

Descriptive Statistics for Planned Contrasts

Faking Low group (n 

## Fakeability Analysis: IAT

Conduct mixed ANOVA for IAT measure with Time (within-subject) and Group (between-subject) factors, followed by planned contrasts (paired t-tests) for Faking High and Faking Low groups.


In [50]:
# Prepare data for mixed ANOVA - IAT only
# Filter to IAT task
iat_data = df[df['task'].str.upper() == 'IAT'].copy()

# Prepare data: participant, group, time, score
iat_ques_data = iat_data[['id', 'group', 't1_dscore', 't2_dscore']].copy()

# Convert to numeric
iat_ques_data['group'] = pd.to_numeric(iat_ques_data['group'], errors='coerce')
iat_ques_data['t1_dscore'] = pd.to_numeric(iat_ques_data['t1_dscore'], errors='coerce')
iat_ques_data['t2_dscore'] = pd.to_numeric(iat_ques_data['t2_dscore'], errors='coerce')

# Remove rows with missing data
iat_ques_data = iat_ques_data.dropna()

print(f"IAT participants with complete data: {len(iat_ques_data)}")
print(f"Group distribution:")
print(iat_ques_data['group'].value_counts().sort_index())

# Reshape to long format for ANOVA
iat_long = pd.melt(
    iat_ques_data,
    id_vars=['id', 'group'],
    value_vars=['t1_dscore', 't2_dscore'],
    var_name='time',
    value_name='score'
)

# Recode time: t1_dscore -> 1, t2_dscore -> 2
iat_long['time'] = iat_long['time'].map({'t1_dscore': 1, 't2_dscore': 2})

print(f"\nData prepared for ANOVA: {len(iat_long)} observations")

# Mixed ANOVA using pingouin
aov = pg.mixed_anova(
    data=iat_long,
    dv='score',
    within='time',
    between='group',
    subject='id'
)

print("\n" + "="*60)
print("Mixed ANOVA Results for IAT")
print("="*60)
print(aov)

# Extract and format results
print("\n" + "-"*60)
print("ANOVA Summary:")
print("-"*60)
for idx, row in aov.iterrows():
    effect = row['Source']
    if 'time' in effect.lower() and 'group' in effect.lower() and '*' in effect:
        effect_name = "Group × Time"
    elif 'time' in effect.lower():
        effect_name = "Time"
    elif 'group' in effect.lower():
        effect_name = "Group"
    else:
        effect_name = effect
    
    f_val = row['F']
    p_val = row['p-unc']
    eta_sq = row.get('np2', np.nan)  # partial eta squared
    
    # Get degrees of freedom
    df1 = row.get('DF1', row.get('DF', '?'))
    df2 = row.get('DF2', '?')
    
    print(f"{effect_name}: F({df1}, {df2}) = {f_val:.2f}, p = {p_val:.4f}, ηp² = {eta_sq:.2f}")

# Compute descriptive statistics for planned contrasts
print("\n" + "="*60)
print("Descriptive Statistics for Planned Contrasts")
print("="*60)

# Faking Low group (group == 1)
iat_faking_low = iat_ques_data[iat_ques_data['group'] == 1].copy()
iat_fl_t1_mean = iat_faking_low['t1_dscore'].mean()
iat_fl_t1_sd = iat_faking_low['t1_dscore'].std(ddof=1)
iat_fl_t2_mean = iat_faking_low['t2_dscore'].mean()
iat_fl_t2_sd = iat_faking_low['t2_dscore'].std(ddof=1)
iat_fl_n = len(iat_faking_low)

print(f"\nFaking Low group (n = {iat_fl_n}):")
print(f"  Time1: M = {iat_fl_t1_mean:.2f}, SD = {iat_fl_t1_sd:.2f}")
print(f"  Time2: M = {iat_fl_t2_mean:.2f}, SD = {iat_fl_t2_sd:.2f}")

# Faking High group (group == 2)
iat_faking_high = iat_ques_data[iat_ques_data['group'] == 2].copy()
iat_fh_t1_mean = iat_faking_high['t1_dscore'].mean()
iat_fh_t1_sd = iat_faking_high['t1_dscore'].std(ddof=1)
iat_fh_t2_mean = iat_faking_high['t2_dscore'].mean()
iat_fh_t2_sd = iat_faking_high['t2_dscore'].std(ddof=1)
iat_fh_n = len(iat_faking_high)

print(f"\nFaking High group (n = {iat_fh_n}):")
print(f"  Time1: M = {iat_fh_t1_mean:.2f}, SD = {iat_fh_t1_sd:.2f}")
print(f"  Time2: M = {iat_fh_t2_mean:.2f}, SD = {iat_fh_t2_sd:.2f}")

# Planned contrasts: Paired t-tests
print("\n" + "="*60)
print("Planned Contrasts (Paired t-tests)")
print("="*60)

# Faking Low: Time1 vs Time2
iat_fl_t1 = iat_faking_low['t1_dscore'].values
iat_fl_t2 = iat_faking_low['t2_dscore'].values
iat_fl_t_stat, iat_fl_t_p = ttest_rel(iat_fl_t1, iat_fl_t2)

# Cohen's d for paired samples: mean difference / SD of differences
iat_fl_diff = iat_fl_t1 - iat_fl_t2
iat_fl_cohens_d = iat_fl_diff.mean() / iat_fl_diff.std(ddof=1)

print(f"\nFaking Low group:")
print(f"  Paired t-test: t({iat_fl_n-1}) = {iat_fl_t_stat:.2f}, p = {iat_fl_t_p:.4f}")
print(f"  Cohen's d = {iat_fl_cohens_d:.2f}")
print(f"  Mean difference = {iat_fl_diff.mean():.2f}, SD of differences = {iat_fl_diff.std(ddof=1):.2f}")

# Faking High: Time1 vs Time2
iat_fh_t1 = iat_faking_high['t1_dscore'].values
iat_fh_t2 = iat_faking_high['t2_dscore'].values
iat_fh_t_stat, iat_fh_t_p = ttest_rel(iat_fh_t1, iat_fh_t2)

# Cohen's d for paired samples: mean difference / SD of differences
iat_fh_diff = iat_fh_t1 - iat_fh_t2
iat_fh_cohens_d = iat_fh_diff.mean() / iat_fh_diff.std(ddof=1)

print(f"\nFaking High group:")
print(f"  Paired t-test: t({iat_fh_n-1}) = {abs(iat_fh_t_stat):.2f}, p = {iat_fh_t_p:.4f}")
print(f"  Cohen's d = {abs(iat_fh_cohens_d):.2f}")
print(f"  Mean difference = {iat_fh_diff.mean():.2f}, SD of differences = {iat_fh_diff.std(ddof=1):.2f}")

print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print("Mixed ANOVA for IAT:")
print("  (See ANOVA Summary above for F-statistics, p-values, and ηp²)")
print("\nPlanned Contrasts:")
print(f"  Faking Low: t({iat_fl_n-1}) = {iat_fl_t_stat:.2f}, p = {iat_fl_t_p:.4f}, d = {iat_fl_cohens_d:.2f}")
print(f"  Faking High: t({iat_fh_n-1}) = {abs(iat_fh_t_stat):.2f}, p = {iat_fh_t_p:.4f}, d = {abs(iat_fh_cohens_d):.2f}")


IAT participants with complete data: 261
Group distribution:
group
0    92
1    84
2    85
Name: count, dtype: int64

Data prepared for ANOVA: 522 observations

Mixed ANOVA Results for IAT
        Source        SS  DF1  DF2        MS          F         p-unc  \
0        group  2.690619    2  258  1.345309   2.799876  6.266677e-02   
1         time  4.744147    1  258  4.744147  33.775834  1.817898e-08   
2  Interaction  1.167246    2  258  0.583623   4.155090  1.674634e-02   

        np2  eps  
0  0.021243  NaN  
1  0.115760  1.0  
2  0.031205  NaN  

------------------------------------------------------------
ANOVA Summary:
------------------------------------------------------------
Group: F(2, 258) = 2.80, p = 0.0627, ηp² = 0.02
Time: F(1, 258) = 33.78, p = 0.0000, ηp² = 0.12
Interaction: F(2, 258) = 4.16, p = 0.0167, ηp² = 0.03

Descriptive Statistics for Planned Contrasts

Faking Low group (n = 84):
  Time1: M = -0.50, SD = 0.57
  Time2: M = -0.41, SD = 0.46

Faking High group (

## Fakeability Analysis: qIAT

Conduct mixed ANOVA for qIAT measure with Time (within-subject) and Group (between-subject) factors, followed by planned contrasts (paired t-tests) for Faking High and Faking Low groups.


In [51]:
# Prepare data for mixed ANOVA - qIAT only
# Filter to qIAT task
qiat_data = df[df['task'].str.upper() == 'QIAT'].copy()

# Prepare data: participant, group, time, score
qiat_ques_data = qiat_data[['id', 'group', 't1_dscore', 't2_dscore']].copy()

# Convert to numeric
qiat_ques_data['group'] = pd.to_numeric(qiat_ques_data['group'], errors='coerce')
qiat_ques_data['t1_dscore'] = pd.to_numeric(qiat_ques_data['t1_dscore'], errors='coerce')
qiat_ques_data['t2_dscore'] = pd.to_numeric(qiat_ques_data['t2_dscore'], errors='coerce')

# Remove rows with missing data
qiat_ques_data = qiat_ques_data.dropna()

print(f"qIAT participants with complete data: {len(qiat_ques_data)}")
print(f"Group distribution:")
print(qiat_ques_data['group'].value_counts().sort_index())

# Reshape to long format for ANOVA
qiat_long = pd.melt(
    qiat_ques_data,
    id_vars=['id', 'group'],
    value_vars=['t1_dscore', 't2_dscore'],
    var_name='time',
    value_name='score'
)

# Recode time: t1_dscore -> 1, t2_dscore -> 2
qiat_long['time'] = qiat_long['time'].map({'t1_dscore': 1, 't2_dscore': 2})

print(f"\nData prepared for ANOVA: {len(qiat_long)} observations")

# Mixed ANOVA using pingouin
aov = pg.mixed_anova(
    data=qiat_long,
    dv='score',
    within='time',
    between='group',
    subject='id'
)

print("\n" + "="*60)
print("Mixed ANOVA Results for qIAT")
print("="*60)
print(aov)

# Extract and format results
print("\n" + "-"*60)
print("ANOVA Summary:")
print("-"*60)
for idx, row in aov.iterrows():
    effect = row['Source']
    if 'time' in effect.lower() and 'group' in effect.lower() and '*' in effect:
        effect_name = "Group × Time"
    elif 'time' in effect.lower():
        effect_name = "Time"
    elif 'group' in effect.lower():
        effect_name = "Group"
    else:
        effect_name = effect
    
    f_val = row['F']
    p_val = row['p-unc']
    eta_sq = row.get('np2', np.nan)  # partial eta squared
    
    # Get degrees of freedom
    df1 = row.get('DF1', row.get('DF', '?'))
    df2 = row.get('DF2', '?')
    
    print(f"{effect_name}: F({df1}, {df2}) = {f_val:.2f}, p = {p_val:.4f}, ηp² = {eta_sq:.2f}")

# Compute descriptive statistics for planned contrasts
print("\n" + "="*60)
print("Descriptive Statistics for Planned Contrasts")
print("="*60)

# Faking Low group (group == 1)
qiat_faking_low = qiat_ques_data[qiat_ques_data['group'] == 1].copy()
qiat_fl_t1_mean = qiat_faking_low['t1_dscore'].mean()
qiat_fl_t1_sd = qiat_faking_low['t1_dscore'].std(ddof=1)
qiat_fl_t2_mean = qiat_faking_low['t2_dscore'].mean()
qiat_fl_t2_sd = qiat_faking_low['t2_dscore'].std(ddof=1)
qiat_fl_n = len(qiat_faking_low)

print(f"\nFaking Low group (n = {qiat_fl_n}):")
print(f"  Time1: M = {qiat_fl_t1_mean:.2f}, SD = {qiat_fl_t1_sd:.2f}")
print(f"  Time2: M = {qiat_fl_t2_mean:.2f}, SD = {qiat_fl_t2_sd:.2f}")

# Faking High group (group == 2)
qiat_faking_high = qiat_ques_data[qiat_ques_data['group'] == 2].copy()
qiat_fh_t1_mean = qiat_faking_high['t1_dscore'].mean()
qiat_fh_t1_sd = qiat_faking_high['t1_dscore'].std(ddof=1)
qiat_fh_t2_mean = qiat_faking_high['t2_dscore'].mean()
qiat_fh_t2_sd = qiat_faking_high['t2_dscore'].std(ddof=1)
qiat_fh_n = len(qiat_faking_high)

print(f"\nFaking High group (n = {qiat_fh_n}):")
print(f"  Time1: M = {qiat_fh_t1_mean:.2f}, SD = {qiat_fh_t1_sd:.2f}")
print(f"  Time2: M = {qiat_fh_t2_mean:.2f}, SD = {qiat_fh_t2_sd:.2f}")

# Planned contrasts: Paired t-tests
print("\n" + "="*60)
print("Planned Contrasts (Paired t-tests)")
print("="*60)

# Faking Low: Time1 vs Time2
qiat_fl_t1 = qiat_faking_low['t1_dscore'].values
qiat_fl_t2 = qiat_faking_low['t2_dscore'].values
qiat_fl_t_stat, qiat_fl_t_p = ttest_rel(qiat_fl_t1, qiat_fl_t2)

# Cohen's d for paired samples: mean difference / SD of differences
qiat_fl_diff = qiat_fl_t1 - qiat_fl_t2
qiat_fl_cohens_d = qiat_fl_diff.mean() / qiat_fl_diff.std(ddof=1)

print(f"\nFaking Low group:")
print(f"  Paired t-test: t({qiat_fl_n-1}) = {qiat_fl_t_stat:.2f}, p = {qiat_fl_t_p:.4f}")
print(f"  Cohen's d = {qiat_fl_cohens_d:.2f}")
print(f"  Mean difference = {qiat_fl_diff.mean():.2f}, SD of differences = {qiat_fl_diff.std(ddof=1):.2f}")

# Faking High: Time1 vs Time2
qiat_fh_t1 = qiat_faking_high['t1_dscore'].values
qiat_fh_t2 = qiat_faking_high['t2_dscore'].values
qiat_fh_t_stat, qiat_fh_t_p = ttest_rel(qiat_fh_t1, qiat_fh_t2)

# Cohen's d for paired samples: mean difference / SD of differences
qiat_fh_diff = qiat_fh_t1 - qiat_fh_t2
qiat_fh_cohens_d = qiat_fh_diff.mean() / qiat_fh_diff.std(ddof=1)

print(f"\nFaking High group:")
print(f"  Paired t-test: t({qiat_fh_n-1}) = {abs(qiat_fh_t_stat):.2f}, p = {qiat_fh_t_p:.4f}")
print(f"  Cohen's d = {abs(qiat_fh_cohens_d):.2f}")
print(f"  Mean difference = {qiat_fh_diff.mean():.2f}, SD of differences = {qiat_fh_diff.std(ddof=1):.2f}")

print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print("Mixed ANOVA for qIAT:")
print("  (See ANOVA Summary above for F-statistics, p-values, and ηp²)")
print("\nPlanned Contrasts:")
print(f"  Faking Low: t({qiat_fl_n-1}) = {qiat_fl_t_stat:.2f}, p = {qiat_fl_t_p:.4f}, d = {qiat_fl_cohens_d:.2f}")
print(f"  Faking High: t({qiat_fh_n-1}) = {abs(qiat_fh_t_stat):.2f}, p = {qiat_fh_t_p:.4f}, d = {abs(qiat_fh_cohens_d):.2f}")


qIAT participants with complete data: 267
Group distribution:
group
0    89
1    85
2    93
Name: count, dtype: int64

Data prepared for ANOVA: 534 observations

Mixed ANOVA Results for qIAT
        Source        SS  DF1  DF2        MS         F     p-unc       np2  \
0        group  0.192687    2  264  0.096343  0.110520  0.895409  0.000837   
1         time  0.805699    1  264  0.805699  2.316163  0.129232  0.008697   
2  Interaction  5.480555    2  264  2.740277  7.877543  0.000475  0.056317   

   eps  
0  NaN  
1  1.0  
2  NaN  

------------------------------------------------------------
ANOVA Summary:
------------------------------------------------------------
Group: F(2, 264) = 0.11, p = 0.8954, ηp² = 0.00
Time: F(1, 264) = 2.32, p = 0.1292, ηp² = 0.01
Interaction: F(2, 264) = 7.88, p = 0.0005, ηp² = 0.06

Descriptive Statistics for Planned Contrasts

Faking Low group (n = 85):
  Time1: M = 0.21, SD = 1.00
  Time2: M = 0.13, SD = 0.73

Faking High group (n = 93):
  Time1: M =

## Comparison of Faking Effects Across Measures (Normalized Scores)

Compare faking effects between self-report, IAT, and qIAT using normalized scores. The faking effect is computed as the difference between normalized T2 and T1 scores (T2_norm - T1_norm) for each measure. This enables direct comparison of faking effects across measures since all scores are standardized using Time1 baseline statistics.


In [52]:
# First, compute normalized questionnaire scores using T1 baseline statistics
# Then compute faking effects (T2_norm - T1_norm) and compare across measures

# Reload data to get updated normalized D-scores
df_updated = df.copy()

# Compute normalized questionnaire scores
# Calculate T1 baseline statistics for questionnaire (across all participants)
t1_ques_all = pd.to_numeric(df_updated['t1_ques'], errors='coerce').dropna()
t1_ques_mean = t1_ques_all.mean()
t1_ques_sd = t1_ques_all.std(ddof=1)

print(f"Questionnaire T1 baseline: Mean = {t1_ques_mean:.4f}, SD = {t1_ques_sd:.4f}")

# Standardize questionnaire scores
df_updated['t1_ques_norm'] = (pd.to_numeric(df_updated['t1_ques'], errors='coerce') - t1_ques_mean) / t1_ques_sd
df_updated['t2_ques_norm'] = (pd.to_numeric(df_updated['t2_ques'], errors='coerce') - t1_ques_mean) / t1_ques_sd

# Compute faking effects (T2_norm - T1_norm) for questionnaire
df_updated['ques_faking_effect'] = df_updated['t2_ques_norm'] - df_updated['t1_ques_norm']

# For IAT and qIAT, compute faking effects from normalized D-scores
# Note: We need to merge IAT and qIAT data with questionnaire data
# Since participants have either IAT or qIAT, we'll process them separately

# Prepare IAT data
iat_data = df_updated[df_updated['task'].str.upper() == 'IAT'].copy()
iat_data['iat_faking_effect'] = (pd.to_numeric(iat_data['t2_com_d_norm'], errors='coerce') - 
                                  pd.to_numeric(iat_data['t1_com_d_norm'], errors='coerce'))

# Prepare qIAT data
qiat_data = df_updated[df_updated['task'].str.upper() == 'QIAT'].copy()
qiat_data['qiat_faking_effect'] = (pd.to_numeric(qiat_data['t2_com_d_norm'], errors='coerce') - 
                                    pd.to_numeric(qiat_data['t1_com_d_norm'], errors='coerce'))

print("\n" + "="*60)
print("Comparison of Faking Effects Across Measures")
print("="*60)

# ============================================================================
# 1. Self-Report vs IAT (Paired t-tests)
# ============================================================================
print("\n1. Self-Report vs IAT (Paired t-tests)")
print("-"*60)

# Merge questionnaire with IAT data by participant ID
iat_comparison = iat_data[['id', 'group', 'iat_faking_effect']].copy()
ques_for_iat = df_updated[['id', 'ques_faking_effect']].copy()
iat_comparison = iat_comparison.merge(ques_for_iat, on='id', how='inner')

# Faking Low group (group == 1)
iat_fl = iat_comparison[iat_comparison['group'] == 1].copy()
iat_fl = iat_fl.dropna(subset=['ques_faking_effect', 'iat_faking_effect'])

if len(iat_fl) > 0:
    ques_fl_mean = iat_fl['ques_faking_effect'].mean()
    ques_fl_sd = iat_fl['ques_faking_effect'].std(ddof=1)
    iat_fl_mean = iat_fl['iat_faking_effect'].mean()
    iat_fl_sd = iat_fl['iat_faking_effect'].std(ddof=1)
    
    # Paired t-test
    t_stat, p_val = ttest_rel(iat_fl['ques_faking_effect'], iat_fl['iat_faking_effect'])
    
    # Cohen's d for paired samples
    diff = iat_fl['ques_faking_effect'] - iat_fl['iat_faking_effect']
    cohens_d = diff.mean() / diff.std(ddof=1)
    
    print(f"\nFaking Low group (n = {len(iat_fl)}):")
    print(f"  Self-report: M = {ques_fl_mean:.2f}, SD = {ques_fl_sd:.2f}")
    print(f"  IAT: M = {iat_fl_mean:.2f}, SD = {iat_fl_sd:.2f}")
    print(f"  Paired t-test: t({len(iat_fl)-1}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

# Faking High group (group == 2)
iat_fh = iat_comparison[iat_comparison['group'] == 2].copy()
iat_fh = iat_fh.dropna(subset=['ques_faking_effect', 'iat_faking_effect'])

if len(iat_fh) > 0:
    ques_fh_mean = iat_fh['ques_faking_effect'].mean()
    ques_fh_sd = iat_fh['ques_faking_effect'].std(ddof=1)
    iat_fh_mean = iat_fh['iat_faking_effect'].mean()
    iat_fh_sd = iat_fh['iat_faking_effect'].std(ddof=1)
    
    # Paired t-test
    t_stat, p_val = ttest_rel(iat_fh['ques_faking_effect'], iat_fh['iat_faking_effect'])
    
    # Cohen's d for paired samples
    diff = iat_fh['ques_faking_effect'] - iat_fh['iat_faking_effect']
    cohens_d = diff.mean() / diff.std(ddof=1)
    
    print(f"\nFaking High group (n = {len(iat_fh)}):")
    print(f"  Self-report: M = {ques_fh_mean:.2f}, SD = {ques_fh_sd:.2f}")
    print(f"  IAT: M = {iat_fh_mean:.2f}, SD = {iat_fh_sd:.2f}")
    print(f"  Paired t-test: t({len(iat_fh)-1}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

# ============================================================================
# 2. Self-Report vs qIAT (Paired t-tests)
# ============================================================================
print("\n\n2. Self-Report vs qIAT (Paired t-tests)")
print("-"*60)

# Merge questionnaire with qIAT data by participant ID
qiat_comparison = qiat_data[['id', 'group', 'qiat_faking_effect']].copy()
ques_for_qiat = df_updated[['id', 'ques_faking_effect']].copy()
qiat_comparison = qiat_comparison.merge(ques_for_qiat, on='id', how='inner')

# Faking Low group (group == 1)
qiat_fl = qiat_comparison[qiat_comparison['group'] == 1].copy()
qiat_fl = qiat_fl.dropna(subset=['ques_faking_effect', 'qiat_faking_effect'])

if len(qiat_fl) > 0:
    ques_fl_mean = qiat_fl['ques_faking_effect'].mean()
    ques_fl_sd = qiat_fl['ques_faking_effect'].std(ddof=1)
    qiat_fl_mean = qiat_fl['qiat_faking_effect'].mean()
    qiat_fl_sd = qiat_fl['qiat_faking_effect'].std(ddof=1)
    
    # Paired t-test
    t_stat, p_val = ttest_rel(qiat_fl['ques_faking_effect'], qiat_fl['qiat_faking_effect'])
    
    # Cohen's d for paired samples
    diff = qiat_fl['ques_faking_effect'] - qiat_fl['qiat_faking_effect']
    cohens_d = diff.mean() / diff.std(ddof=1)
    
    print(f"\nFaking Low group (n = {len(qiat_fl)}):")
    print(f"  Self-report: M = {ques_fl_mean:.2f}, SD = {ques_fl_sd:.2f}")
    print(f"  qIAT: M = {qiat_fl_mean:.2f}, SD = {qiat_fl_sd:.2f}")
    print(f"  Paired t-test: t({len(qiat_fl)-1}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

# Faking High group (group == 2)
qiat_fh = qiat_comparison[qiat_comparison['group'] == 2].copy()
qiat_fh = qiat_fh.dropna(subset=['ques_faking_effect', 'qiat_faking_effect'])

if len(qiat_fh) > 0:
    ques_fh_mean = qiat_fh['ques_faking_effect'].mean()
    ques_fh_sd = qiat_fh['ques_faking_effect'].std(ddof=1)
    qiat_fh_mean = qiat_fh['qiat_faking_effect'].mean()
    qiat_fh_sd = qiat_fh['qiat_faking_effect'].std(ddof=1)
    
    # Paired t-test
    t_stat, p_val = ttest_rel(qiat_fh['ques_faking_effect'], qiat_fh['qiat_faking_effect'])
    
    # Cohen's d for paired samples
    diff = qiat_fh['ques_faking_effect'] - qiat_fh['qiat_faking_effect']
    cohens_d = diff.mean() / diff.std(ddof=1)
    
    print(f"\nFaking High group (n = {len(qiat_fh)}):")
    print(f"  Self-report: M = {ques_fh_mean:.2f}, SD = {ques_fh_sd:.2f}")
    print(f"  qIAT: M = {qiat_fh_mean:.2f}, SD = {qiat_fh_sd:.2f}")
    print(f"  Paired t-test: t({len(qiat_fh)-1}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

# ============================================================================
# 3. IAT vs qIAT (Independent t-tests)
# ============================================================================
print("\n\n3. IAT vs qIAT (Independent t-tests)")
print("-"*60)

# Faking Low group (group == 1)
iat_fl_ind = iat_data[iat_data['group'] == 1].copy()
iat_fl_ind = iat_fl_ind.dropna(subset=['iat_faking_effect'])
qiat_fl_ind = qiat_data[qiat_data['group'] == 1].copy()
qiat_fl_ind = qiat_fl_ind.dropna(subset=['qiat_faking_effect'])

if len(iat_fl_ind) > 0 and len(qiat_fl_ind) > 0:
    iat_fl_mean = iat_fl_ind['iat_faking_effect'].mean()
    iat_fl_sd = iat_fl_ind['iat_faking_effect'].std(ddof=1)
    qiat_fl_mean = qiat_fl_ind['qiat_faking_effect'].mean()
    qiat_fl_sd = qiat_fl_ind['qiat_faking_effect'].std(ddof=1)
    
    # Independent t-test
    t_stat, p_val = ttest_ind(iat_fl_ind['iat_faking_effect'], qiat_fl_ind['qiat_faking_effect'])
    
    # Cohen's d for independent samples
    pooled_sd = np.sqrt(((len(iat_fl_ind) - 1) * iat_fl_sd**2 + (len(qiat_fl_ind) - 1) * qiat_fl_sd**2) / 
                        (len(iat_fl_ind) + len(qiat_fl_ind) - 2))
    cohens_d = (iat_fl_mean - qiat_fl_mean) / pooled_sd
    
    print(f"\nFaking Low group:")
    print(f"  IAT: M = {iat_fl_mean:.2f}, SD = {iat_fl_sd:.2f}, n = {len(iat_fl_ind)}")
    print(f"  qIAT: M = {qiat_fl_mean:.2f}, SD = {qiat_fl_sd:.2f}, n = {len(qiat_fl_ind)}")
    print(f"  Independent t-test: t({len(iat_fl_ind) + len(qiat_fl_ind) - 2}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

# Faking High group (group == 2)
iat_fh_ind = iat_data[iat_data['group'] == 2].copy()
iat_fh_ind = iat_fh_ind.dropna(subset=['iat_faking_effect'])
qiat_fh_ind = qiat_data[qiat_data['group'] == 2].copy()
qiat_fh_ind = qiat_fh_ind.dropna(subset=['qiat_faking_effect'])

if len(iat_fh_ind) > 0 and len(qiat_fh_ind) > 0:
    iat_fh_mean = iat_fh_ind['iat_faking_effect'].mean()
    iat_fh_sd = iat_fh_ind['iat_faking_effect'].std(ddof=1)
    qiat_fh_mean = qiat_fh_ind['qiat_faking_effect'].mean()
    qiat_fh_sd = qiat_fh_ind['qiat_faking_effect'].std(ddof=1)
    
    # Independent t-test
    t_stat, p_val = ttest_ind(iat_fh_ind['iat_faking_effect'], qiat_fh_ind['qiat_faking_effect'])
    
    # Cohen's d for independent samples
    pooled_sd = np.sqrt(((len(iat_fh_ind) - 1) * iat_fh_sd**2 + (len(qiat_fh_ind) - 1) * qiat_fh_sd**2) / 
                        (len(iat_fh_ind) + len(qiat_fh_ind) - 2))
    cohens_d = (iat_fh_mean - qiat_fh_mean) / pooled_sd
    
    print(f"\nFaking High group:")
    print(f"  IAT: M = {iat_fh_mean:.2f}, SD = {iat_fh_sd:.2f}, n = {len(iat_fh_ind)}")
    print(f"  qIAT: M = {qiat_fh_mean:.2f}, SD = {qiat_fh_sd:.2f}, n = {len(qiat_fh_ind)}")
    print(f"  Independent t-test: t({len(iat_fh_ind) + len(qiat_fh_ind) - 2}) = {t_stat:.2f}, p = {p_val:.4f}")
    print(f"  Cohen's d = {cohens_d:.2f}")

print("\n" + "="*60)
print("SUMMARY (for article):")
print("="*60)
print("\nSelf-Report vs IAT:")
if len(iat_fl) > 0:
    print(f"  Faking Low: t({len(iat_fl)-1}) = {ttest_rel(iat_fl['ques_faking_effect'], iat_fl['iat_faking_effect'])[0]:.2f}, p < .001, d = {cohens_d_paired(iat_fl['ques_faking_effect'], iat_fl['iat_faking_effect']):.2f}")
if len(iat_fh) > 0:
    print(f"  Faking High: t({len(iat_fh)-1}) = {ttest_rel(iat_fh['ques_faking_effect'], iat_fh['iat_faking_effect'])[0]:.2f}, p < .001, d = {cohens_d_paired(iat_fh['ques_faking_effect'], iat_fh['iat_faking_effect']):.2f}")

print("\nSelf-Report vs qIAT:")
if len(qiat_fl) > 0:
    print(f"  Faking Low: t({len(qiat_fl)-1}) = {ttest_rel(qiat_fl['ques_faking_effect'], qiat_fl['qiat_faking_effect'])[0]:.2f}, p < .001, d = {cohens_d_paired(qiat_fl['ques_faking_effect'], qiat_fl['qiat_faking_effect']):.2f}")
if len(qiat_fh) > 0:
    print(f"  Faking High: t({len(qiat_fh)-1}) = {ttest_rel(qiat_fh['ques_faking_effect'], qiat_fh['qiat_faking_effect'])[0]:.2f}, p < .001, d = {cohens_d_paired(qiat_fh['ques_faking_effect'], qiat_fh['qiat_faking_effect']):.2f}")

print("\nIAT vs qIAT:")
if len(iat_fl_ind) > 0 and len(qiat_fl_ind) > 0:
    pooled_sd_fl = np.sqrt(((len(iat_fl_ind) - 1) * iat_fl_ind['iat_faking_effect'].std(ddof=1)**2 + (len(qiat_fl_ind) - 1) * qiat_fl_ind['qiat_faking_effect'].std(ddof=1)**2) / (len(iat_fl_ind) + len(qiat_fl_ind) - 2))
    d_fl = (iat_fl_ind['iat_faking_effect'].mean() - qiat_fl_ind['qiat_faking_effect'].mean()) / pooled_sd_fl
    print(f"  Faking Low: t({len(iat_fl_ind) + len(qiat_fl_ind) - 2}) = {ttest_ind(iat_fl_ind['iat_faking_effect'], qiat_fl_ind['qiat_faking_effect'])[0]:.2f}, p = {ttest_ind(iat_fl_ind['iat_faking_effect'], qiat_fl_ind['qiat_faking_effect'])[1]:.3f}, d = {d_fl:.2f}")
if len(iat_fh_ind) > 0 and len(qiat_fh_ind) > 0:
    pooled_sd_fh = np.sqrt(((len(iat_fh_ind) - 1) * iat_fh_ind['iat_faking_effect'].std(ddof=1)**2 + (len(qiat_fh_ind) - 1) * qiat_fh_ind['qiat_faking_effect'].std(ddof=1)**2) / (len(iat_fh_ind) + len(qiat_fh_ind) - 2))
    d_fh = (iat_fh_ind['iat_faking_effect'].mean() - qiat_fh_ind['qiat_faking_effect'].mean()) / pooled_sd_fh
    print(f"  Faking High: t({len(iat_fh_ind) + len(qiat_fh_ind) - 2}) = {ttest_ind(iat_fh_ind['iat_faking_effect'], qiat_fh_ind['qiat_faking_effect'])[0]:.2f}, p = {ttest_ind(iat_fh_ind['iat_faking_effect'], qiat_fh_ind['qiat_faking_effect'])[1]:.3f}, d = {d_fh:.2f}")


Questionnaire T1 baseline: Mean = 27.8731, SD = 8.7502

Comparison of Faking Effects Across Measures

1. Self-Report vs IAT (Paired t-tests)
------------------------------------------------------------

Faking Low group (n = 84):
  Self-report: M = -1.76, SD = 0.97
  IAT: M = 0.15, SD = 0.85
  Paired t-test: t(83) = -14.71, p = 0.0000
  Cohen's d = -1.60

Faking High group (n = 85):
  Self-report: M = 2.44, SD = 1.06
  IAT: M = 0.53, SD = 1.00
  Paired t-test: t(84) = 13.33, p = 0.0000
  Cohen's d = 1.45


2. Self-Report vs qIAT (Paired t-tests)
------------------------------------------------------------

Faking Low group (n = 85):
  Self-report: M = -2.01, SD = 1.08
  qIAT: M = -0.09, SD = 1.09
  Paired t-test: t(84) = -13.05, p = 0.0000
  Cohen's d = -1.42

Faking High group (n = 93):
  Self-report: M = 2.44, SD = 1.05
  qIAT: M = 0.40, SD = 0.97
  Paired t-test: t(92) = 15.54, p = 0.0000
  Cohen's d = 1.61


3. IAT vs qIAT (Independent t-tests)
-------------------------------------