In [1]:
# =============================================================================
# RQ3: How do concept-based interventions influence cheating behavior, 
# performance, and experience through underlying psychological mechanisms?
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import sys, os, warnings

# Setup
os.chdir('game-behavior-analytics/data_analysis_notebook/')
sys.path.append(os.path.abspath('utils'))
warnings.filterwarnings('ignore')

from data_utils import load_and_prepare_data

# Load data
df, concepts = load_and_prepare_data("../data/final_dataset.csv")

print("="*80)
print("RQ3: MECHANISM-BASED PREDICTION ANALYSIS")
print("="*80)
print("RQ3.1: Can we predict cheating group changes via mechanism changes?")
print("RQ3.1.1: Is prediction accuracy better than using concepts alone?")
print("RQ3.2: Can we predict performance changes via mechanisms by cheating groups?")
print("RQ3.2.1: Is prediction accuracy better than using concepts alone?")
print("RQ3.3: Can we predict experience changes via mechanisms by cheating groups?")
print("RQ3.3.1: Is prediction accuracy better than using concepts alone?")
print("="*80)

RQ3: MECHANISM-BASED PREDICTION ANALYSIS
RQ3.1: Can we predict cheating group changes via mechanism changes?
RQ3.1.1: Is prediction accuracy better than using concepts alone?
RQ3.2: Can we predict performance changes via mechanisms by cheating groups?
RQ3.2.1: Is prediction accuracy better than using concepts alone?
RQ3.3: Can we predict experience changes via mechanisms by cheating groups?
RQ3.3.1: Is prediction accuracy better than using concepts alone?


In [2]:
# =============================================================================
# SETUP: Define mechanisms and display names
# =============================================================================

# Original mechanism column names
mechanisms = [
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration',
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    'cognitive_discomfort', 'moral_disengagement',
    'injunctive_norms', 'descriptive_norms', 'reference_group_identification', 'social_sanctions',
    'performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal'
]

# Display names for presentation
mechanism_display_names = {
    'autonomy_need_satisfaction': 'Autonomy Need Satisfaction',
    'autonomy_need_frustration': 'Autonomy Need Frustration',
    'competence_need_satisfaction': 'Competence Need Satisfaction',
    'competence_need_frustration': 'Competence Need Frustration',
    'relatedness_need_satisfaction': 'Relatedness Need Satisfaction',
    'relatedness_need_frustration': 'Relatedness Need Frustration',
    'cognitive_discomfort': 'Cognitive Discomfort',
    'moral_disengagement': 'Moral Disengagement',
    'descriptive_norms': 'Perceived Descriptive Norms',
    'injunctive_norms': 'Perceived Injunctive Norms',
    'reference_group_identification': 'Perceived Reference Group Identification',
    'social_sanctions': 'Perceived Social Sanctions',
    'performance_accomplishments': 'Perceived Performance Accomplishments',
    'vicarious_experience': 'Perceived Vicarious Experience',
    'verbal_persuasion': 'Perceived Verbal Persuasion',
    'emotional_arousal': 'Perceived Emotional Arousal'
}

# Mechanism order for visualizations
mechanism_order = mechanisms[::-1]

# Calculate control group baseline
control_data = df[df['concept'] == 'control']
control_means = control_data[mechanisms].mean()
control_performance = control_data['performance'].mean()
control_experience = control_data['experience'].mean()

# Create intervention dataset (exclude control)
intervention_data = df[df['concept'] != 'control'].copy()

print(f"✓ Control baseline calculated (N={len(control_data)})")
print(f"✓ Intervention participants: {len(intervention_data)}")
print(f"✓ Analyzing {len(mechanisms)} mechanisms")


✓ Control baseline calculated (N=73)
✓ Intervention participants: 1159
✓ Analyzing 16 mechanisms


In [3]:
# ===================================================
# CORE FUNCTION: Calculate R² for all predictions
# ===================================================

def calculate_prediction_accuracy(intervention_data, mechanisms, control_means):
    """
    Calculate R² values using RQ3_old.ipynb logic.
    Returns cheating behavior and performance/experience predictions.
    """
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    cheating_results = {}
    performance_experience_results = {}
    
    print("\nCalculating R² values for all prediction tasks...")
    
    # ========== PART 1: CHEATING BEHAVIOR PREDICTION ==========
    for mechanism in mechanisms:
        if mechanism not in intervention_data.columns:
            continue
            
        mechanism_delta = intervention_data[mechanism] - control_means[mechanism]
        
        valid_cheating_data = pd.DataFrame({
            'delta_mechanism': mechanism_delta,
            'cheating_behavior': intervention_data['cheating_behavior']
        }).dropna()
        
        if len(valid_cheating_data) < 50:
            cheating_results[mechanism] = {
                'Non-Cheaters': {'r2': 0, 'direction': 'none'},
                'Partial-Cheaters': {'r2': 0, 'direction': 'none'},
                'Full-Cheaters': {'r2': 0, 'direction': 'none'},
                'n': 0
            }
            continue
            
        r2_values = {}
        for target_group, group_name in groups:
            y_binary = (valid_cheating_data['cheating_behavior'] == target_group).astype(float)
            X = valid_cheating_data[['delta_mechanism']].values
            
            if len(np.unique(y_binary)) > 1:
                try:
                    lr = LinearRegression().fit(X, y_binary)
                    r2_val = max(0, r2_score(y_binary, lr.predict(X)))
                    coefficient = lr.coef_[0]
                    r2_values[group_name] = {
                        'r2': r2_val,
                        'coef': coefficient,
                        'direction': 'positive' if coefficient > 0 else 'negative'
                    }
                except:
                    r2_values[group_name] = {'r2': 0, 'coef': 0, 'direction': 'none'}
            else:
                r2_values[group_name] = {'r2': 0, 'coef': 0, 'direction': 'none'}
        
        r2_values['n'] = len(valid_cheating_data)
        cheating_results[mechanism] = r2_values
    
    # ========== PART 2: PERFORMANCE & EXPERIENCE PREDICTION ==========
    for mechanism in mechanisms:
        performance_experience_results[mechanism] = {}
        
        for group_val, group_name in groups:
            group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
            
            if len(group_data) < 30:
                continue
            
            mechanism_delta = group_data[mechanism] - control_means[mechanism]
            performance_delta = group_data['performance'] - control_performance
            experience_delta = group_data['experience'] - control_experience
            
            valid_data = pd.DataFrame({
                'delta_mechanism': mechanism_delta,
                'delta_performance': performance_delta,
                'delta_experience': experience_delta
            }).dropna()
            
            if len(valid_data) < 20:
                continue
                
            X = valid_data[['delta_mechanism']].values
            
            # Performance
            y_perf = valid_data['delta_performance'].values
            if len(np.unique(y_perf)) > 1:
                try:
                    lr_perf = LinearRegression().fit(X, y_perf)
                    r2_perf = max(0, r2_score(y_perf, lr_perf.predict(X)))
                    coef_perf = lr_perf.coef_[0]
                    performance_experience_results[mechanism][f'{group_name}_performance'] = {
                        'r2': r2_perf, 'direction': 'positive' if coef_perf > 0 else 'negative'
                    }
                except:
                    performance_experience_results[mechanism][f'{group_name}_performance'] = {'r2': 0, 'direction': 'none'}
                
            # Experience
            y_exp = valid_data['delta_experience'].values
            if len(np.unique(y_exp)) > 1:
                try:
                    lr_exp = LinearRegression().fit(X, y_exp)
                    r2_exp = max(0, r2_score(y_exp, lr_exp.predict(X)))
                    coef_exp = lr_exp.coef_[0]
                    performance_experience_results[mechanism][f'{group_name}_experience'] = {
                        'r2': r2_exp, 'direction': 'positive' if coef_exp > 0 else 'negative'
                    }
                except:
                    performance_experience_results[mechanism][f'{group_name}_experience'] = {'r2': 0, 'direction': 'none'}
            
            performance_experience_results[mechanism][f'{group_name}_n'] = len(valid_data)
    
    return cheating_results, performance_experience_results

# Execute calculation
cheating_r2_results, pe_r2_results = calculate_prediction_accuracy(
    intervention_data, mechanisms, control_means
)

print("✓ R² calculations complete")


Calculating R² values for all prediction tasks...
✓ R² calculations complete


In [4]:
# =============================================================================
# RQ3.1: CHEATING BEHAVIOR PREDICTION - Enhanced Table
# =============================================================================

print("\n" + "="*90)
print("RQ3.1: MECHANISM CHANGES → CHEATING BEHAVIOR PREDICTION")
print("="*90)
print(f"{'Mechanism':<45} {'Non-Cheaters':<15} {'Partial-Cheaters':<15} {'Full-Cheaters':<15}")
print("-" * 90)

for mechanism in mechanisms:
    if mechanism in cheating_r2_results:
        display_name = mechanism_display_names.get(mechanism, mechanism.replace('_', ' ').title())
        
        nc = cheating_r2_results[mechanism]['Non-Cheaters']
        pc = cheating_r2_results[mechanism]['Partial-Cheaters']
        fc = cheating_r2_results[mechanism]['Full-Cheaters']
        
        nc_symbol = "↑" if nc.get('direction') == 'positive' else "↓" if nc.get('direction') == 'negative' else ""
        pc_symbol = "↑" if pc.get('direction') == 'positive' else "↓" if pc.get('direction') == 'negative' else ""
        fc_symbol = "↑" if fc.get('direction') == 'positive' else "↓" if fc.get('direction') == 'negative' else ""
        
        print(f"{display_name:<45} {nc['r2']:.4f}{nc_symbol:<11} {pc['r2']:.4f}{pc_symbol:<11} {fc['r2']:.4f}{fc_symbol:<11}")

print(f"\n✓ RQ3.1 Complete")



RQ3.1: MECHANISM CHANGES → CHEATING BEHAVIOR PREDICTION
Mechanism                                     Non-Cheaters    Partial-Cheaters Full-Cheaters  
------------------------------------------------------------------------------------------
Autonomy Need Satisfaction                    0.0076↓           0.0032↑           0.0027↑          
Autonomy Need Frustration                     0.0001↑           0.0018↓           0.0008↑          
Competence Need Satisfaction                  0.0148↓           0.0006↑           0.0157↑          
Competence Need Frustration                   0.0026↑           0.0000↑           0.0047↓          
Relatedness Need Satisfaction                 0.0069↓           0.0003↓           0.0143↑          
Relatedness Need Frustration                  0.0000↑           0.0001↓           0.0000↑          
Cognitive Discomfort                          0.0001↓           0.0010↓           0.0021↑          
Moral Disengagement                           0.0000↑    

In [5]:
# =============================================================================
# RQ3.1.1: CONCEPT vs MECHANISM COMPARISON
# =============================================================================

print("\n" + "="*80)
print("RQ3.1.1: CONCEPT-LEVEL vs MECHANISM-LEVEL PREDICTION ACCURACY")
print("="*80)

concept_mechanism_mapping = {
    'autonomy': ['autonomy_need_satisfaction', 'autonomy_need_frustration'],
    'competence': ['competence_need_satisfaction', 'competence_need_frustration'],
    'relatedness': ['relatedness_need_satisfaction', 'relatedness_need_frustration'],
    'cognitive_dissonance': ['cognitive_discomfort', 'moral_disengagement'],
    'social_norms': ['injunctive_norms', 'descriptive_norms', 'reference_group_identification', 'social_sanctions'],
    'self_efficacy': ['performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal']
}

def compare_concept_vs_mechanisms():
    """Compare prediction accuracy: concepts alone vs their underlying mechanisms"""
    comparison_results = {}
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    
    for concept, related_mechanisms in concept_mechanism_mapping.items():
        comparison_results[concept] = {}
        concept_data = intervention_data[intervention_data['concept'] == concept].copy()
        
        if len(concept_data) < 30:
            continue
        
        for target_group, group_name in groups:
            # Concept-level prediction
            other_data = intervention_data[
                (intervention_data['concept'] != concept) & 
                (intervention_data['cheating_behavior'] == target_group)
            ]
            
            all_data = pd.concat([
                concept_data.assign(concept_received=1),
                other_data.assign(concept_received=0)
            ])
            
            y_concept = (all_data['cheating_behavior'] == target_group).astype(float).values
            X_concept = all_data[['concept_received']].values
            
            valid_idx = ~np.isnan(y_concept)
            if valid_idx.sum() > 10 and len(np.unique(y_concept[valid_idx])) > 1:
                try:
                    lr = LinearRegression().fit(X_concept[valid_idx], y_concept[valid_idx])
                    concept_r2 = max(0, r2_score(y_concept[valid_idx], lr.predict(X_concept[valid_idx])))
                except:
                    concept_r2 = 0
            else:
                concept_r2 = 0
            
            # Mechanism-level prediction
            mechanism_cols = [mech for mech in related_mechanisms if mech in concept_data.columns]
            
            if mechanism_cols:
                mechanism_deltas = []
                for mech in mechanism_cols:
                    mechanism_deltas.append(concept_data[mech] - control_means[mech])
                
                X_mechanisms = pd.concat(mechanism_deltas, axis=1).fillna(0).values
                y_mechanisms = (concept_data['cheating_behavior'] == target_group).astype(float).values
                
                valid_mech_idx = ~np.isnan(y_mechanisms)
                y_mechanisms = y_mechanisms[valid_mech_idx]
                X_mechanisms = X_mechanisms[valid_mech_idx]
                
                if len(np.unique(y_mechanisms)) > 1 and len(y_mechanisms) >= 10:
                    try:
                        lr_mech = LinearRegression().fit(X_mechanisms, y_mechanisms)
                        mechanism_r2 = max(0, r2_score(y_mechanisms, lr_mech.predict(X_mechanisms)))
                    except:
                        mechanism_r2 = 0
                else:
                    mechanism_r2 = 0
            else:
                mechanism_r2 = 0
            
            comparison_results[concept][group_name] = {
                'concept_r2': concept_r2,
                'mechanism_r2': mechanism_r2,
                'improvement': mechanism_r2 - concept_r2
            }
    
    return comparison_results

concept_vs_mechanism_results = compare_concept_vs_mechanisms()

print(f"{'Concept':<20} {'Group':<18} {'Concept R²':<12} {'Mechanism R²':<14} {'Improvement':<12}")
print("-" * 76)

for concept, results in concept_vs_mechanism_results.items():
    for group_name, values in results.items():
        concept_display = concept.replace('_', ' ').title()
        print(f"{concept_display:<20} {group_name:<18} "
              f"{values['concept_r2']:.4f}      {values['mechanism_r2']:.4f}        "
              f"{values['improvement']:+.4f}")

print(f"\n✓ RQ3.1.1 Complete")


RQ3.1.1: CONCEPT-LEVEL vs MECHANISM-LEVEL PREDICTION ACCURACY
Concept              Group              Concept R²   Mechanism R²   Improvement 
----------------------------------------------------------------------------
Autonomy             Non-Cheaters       0.3188      0.0282        -0.2906
Autonomy             Partial-Cheaters   0.7433      0.0441        -0.6993
Autonomy             Full-Cheaters      0.8129      0.0222        -0.7907
Competence           Non-Cheaters       0.3142      0.0271        -0.2871
Competence           Partial-Cheaters   0.8273      0.0112        -0.8161
Competence           Full-Cheaters      0.7351      0.0346        -0.7005
Relatedness          Non-Cheaters       0.3345      0.0038        -0.3307
Relatedness          Partial-Cheaters   0.8288      0.0933        -0.7355
Relatedness          Full-Cheaters      0.7069      0.0296        -0.6772

✓ RQ3.1.1 Complete


In [None]:
# =============================================================================
# RQ3: How do concept-based interventions influence cheating behavior, 
# performance, and experience through underlying psychological mechanisms?
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
import sys, os, warnings

# Setup
os.chdir('game-behavior-analytics/data_analysis_notebook/')
sys.path.append(os.path.abspath('utils'))
warnings.filterwarnings('ignore')

from data_utils import load_and_prepare_data

# Load data
df, concepts = load_and_prepare_data("../data/final_dataset.csv")

print("="*80)
print("RQ3: MECHANISM-BASED PREDICTION ANALYSIS")
print("="*80)
print("RQ3.1: Can we predict cheating group changes via mechanism changes?")
print("RQ3.1.1: Is prediction accuracy better than using concepts alone?")
print("RQ3.2: Can we predict performance changes via mechanisms by cheating groups?")
print("RQ3.3: Can we predict experience changes via mechanisms by cheating groups?")
print("="*80)

In [None]:

# =============================================================================
# SETUP: Define mechanisms and calculate baseline changes
# =============================================================================

# Define psychological mechanisms from four theoretical frameworks
mechanisms = [
    # Self-Determination Theory
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration',
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    # Cognitive Dissonance Theory
    'cognitive_discomfort', 'moral_disengagement',
    # Social Norms Theory
    'injunctive_norms', 'descriptive_norms', 'reference_group_identification', 'social_sanctions',
    # Self-Efficacy Theory
    'performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal'
]

# Calculate control group baseline
control_data = df[df['concept'] == 'control']
control_means = control_data[mechanisms].mean()
control_performance = control_data['performance'].mean()
control_experience = control_data['experience'].mean()

print(f"✓ Control baseline calculated (N={len(control_data)})")

# Calculate mechanism changes for all participants (delta from control)
for mech in mechanisms:
    df[f'delta_{mech}'] = df[mech] - control_means[mech]

# Calculate outcome changes
df['delta_performance'] = df['performance'] - control_performance
df['delta_experience'] = df['experience'] - control_experience

# Create intervention dataset (exclude control)
intervention_data = df[df['concept'] != 'control'].copy()
mechanism_delta_cols = [f'delta_{mech}' for mech in mechanisms]

print(f"✓ Intervention participants: {len(intervention_data)}")
print(f"✓ Mechanism changes calculated for {len(mechanisms)} mechanisms")
# =============================================================================
# RQ3.1: Can we predict cheating group changes via mechanism changes?
# =============================================================================

print("\n" + "="*80)
print("RQ3.1: PREDICTING CHEATING GROUP CHANGES VIA MECHANISM CHANGES")
print("="*80)

def predict_cheating_from_mechanisms():
    """Calculate R² for predicting cheating behavior from individual mechanism changes"""
    
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    cheating_r2_results = {}
    
    print("Calculating R² values for cheating behavior prediction...")
    
    for mechanism in mechanisms:
        if mechanism not in intervention_data.columns:
            continue
            
        # Calculate mechanism change from control
        mechanism_delta = intervention_data[mechanism] - control_means[mechanism]
        
        # Clean data
        valid_data = pd.DataFrame({
            'delta_mechanism': mechanism_delta,
            'cheating_behavior': intervention_data['cheating_behavior']
        }).dropna()
        
        if len(valid_data) < 50:
            cheating_r2_results[mechanism] = {group_name: {'r2': 0, 'direction': 'none'} 
                                           for _, group_name in groups}
            continue
            
        # Calculate R² for each cheating group (binary: group vs others)
        r2_values = {}
        for target_group, group_name in groups:
            y_binary = (valid_data['cheating_behavior'] == target_group).astype(float)
            X = valid_data[['delta_mechanism']].values
            
            if len(np.unique(y_binary)) > 1:
                try:
                    lr = LinearRegression().fit(X, y_binary)
                    r2_val = max(0, r2_score(y_binary, lr.predict(X)))
                    coefficient = lr.coef_[0]
                    r2_values[group_name] = {
                        'r2': r2_val,
                        'direction': 'positive' if coefficient > 0 else 'negative'
                    }
                except:
                    r2_values[group_name] = {'r2': 0, 'direction': 'none'}
            else:
                r2_values[group_name] = {'r2': 0, 'direction': 'none'}
        
        cheating_r2_results[mechanism] = r2_values
    
    return cheating_r2_results

# Execute RQ3.1
cheating_prediction_results = predict_cheating_from_mechanisms()

# Display complete R² table for cheating behavior
print("\nCOMPLETE R² TABLE: MECHANISM CHANGES → CHEATING BEHAVIOR")
print("="*90)
print(f"{'Mechanism':<40} {'Non-Cheaters':<15} {'Partial-Cheaters':<15} {'Full-Cheaters':<15}")
print("-" * 90)

for mechanism in mechanisms:
    if mechanism in cheating_prediction_results:
        display_name = mechanism.replace('_', ' ').title()
        
        nc_result = cheating_prediction_results[mechanism]['Non-Cheaters']
        pc_result = cheating_prediction_results[mechanism]['Partial-Cheaters'] 
        fc_result = cheating_prediction_results[mechanism]['Full-Cheaters']
        
        nc_text = f"{nc_result['r2']:.4f}"
        pc_text = f"{pc_result['r2']:.4f}"
        fc_text = f"{fc_result['r2']:.4f}"
        
        print(f"{display_name:<40} {nc_text:<15} {pc_text:<15} {fc_text:<15}")

print(f"\n✓ RQ3.1 Complete: All {len(mechanisms)} mechanisms analyzed")
# =============================================================================
# RQ3.1.1: Is prediction accuracy better than using concepts alone?
# =============================================================================

print("\n" + "="*80)
print("RQ3.1.1: COMPARISON - MECHANISMS vs CONCEPTS FOR CHEATING PREDICTION")
print("="*80)

# Define concept-to-mechanism mapping based on theory
concept_mechanism_mapping = {
    # Self-Determination Theory
    'autonomy': ['autonomy_need_satisfaction', 'autonomy_need_frustration'],
    'competence': ['competence_need_satisfaction', 'competence_need_frustration'], 
    'relatedness': ['relatedness_need_satisfaction', 'relatedness_need_frustration'],
    
    # Cognitive Dissonance Theory
    'self_concept': ['cognitive_discomfort', 'moral_disengagement'],
    'cognitive_inconsistency': ['cognitive_discomfort', 'moral_disengagement'],
    'dissonance_arousal': ['cognitive_discomfort', 'moral_disengagement'], 
    'dissonance_reduction': ['cognitive_discomfort', 'moral_disengagement'],
    
    # Self-Efficacy Theory
    'performance_accomplishments': ['performance_accomplishments'],
    'vicarious_experience': ['vicarious_experience'],
    'verbal_persuasion': ['verbal_persuasion'],
    'emotional_arousal': ['emotional_arousal'],
    
    # Social Norms Theory
    'descriptive_norms': ['descriptive_norms'],
    'injunctive_norms': ['injunctive_norms'],
    'social_sanctions': ['social_sanctions'],
    'reference_group_identification': ['reference_group_identification']
}

def compare_concept_vs_mechanisms():
    """Compare prediction accuracy: concept-level vs mechanism-level for each concept"""
    
    comparison_results = {}
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    
    print("Calculating concept vs mechanism comparison...")
    
    for concept, related_mechanisms in concept_mechanism_mapping.items():
        comparison_results[concept] = {}
        
        # Get data for this specific concept
        concept_data = intervention_data[intervention_data['concept'] == concept].copy()
        
        if len(concept_data) < 30:
            for _, group_name in groups:
                comparison_results[concept][group_name] = {
                    'concept_r2': 0,
                    'mechanism_r2': 0,
                    'improvement': 0
                }
            continue
        
        for target_group, group_name in groups:
            y_binary = (concept_data['cheating_behavior'] == target_group).astype(float)
            
            if len(np.unique(y_binary)) <= 1:
                comparison_results[concept][group_name] = {
                    'concept_r2': 0,
                    'mechanism_r2': 0,
                    'improvement': 0
                }
                continue
            
            # Method 1: Concept-level prediction (binary: received this concept or not)
            # For within-concept analysis, we compare to all other concepts
            other_concepts_data = intervention_data[intervention_data['concept'] != concept]
            combined_data = pd.concat([concept_data.assign(concept_received=1), 
                                     other_concepts_data.assign(concept_received=0)])
            
            y_combined = (combined_data['cheating_behavior'] == target_group).astype(float)
            X_concept = combined_data[['concept_received']].values
            
            valid_concept_idx = y_combined.notna()
            y_concept = y_combined[valid_concept_idx]
            X_concept = X_concept[valid_concept_idx]
            
            if len(np.unique(y_concept)) > 1:
                try:
                    lr_concept = LinearRegression().fit(X_concept, y_concept)
                    concept_r2 = max(0, r2_score(y_concept, lr_concept.predict(X_concept)))
                except:
                    concept_r2 = 0
            else:
                concept_r2 = 0
            
            # Method 2: Mechanism-level prediction (use related mechanisms)
            mechanism_cols = [f'delta_{mech}' for mech in related_mechanisms if f'delta_{mech}' in concept_data.columns]
            
            if mechanism_cols:
                X_mechanisms = concept_data[mechanism_cols].fillna(0).values
                y_mechanisms = (concept_data['cheating_behavior'] == target_group).astype(float).values
                
                valid_mech_idx = ~np.isnan(y_mechanisms)
                y_mechanisms = y_mechanisms[valid_mech_idx]
                X_mechanisms = X_mechanisms[valid_mech_idx]
                
                if len(np.unique(y_mechanisms)) > 1 and len(y_mechanisms) >= 10:
                    try:
                        if len(mechanism_cols) == 1:
                            lr_mech = LinearRegression().fit(X_mechanisms.reshape(-1, 1), y_mechanisms)
                            mechanism_r2 = max(0, r2_score(y_mechanisms, lr_mech.predict(X_mechanisms.reshape(-1, 1))))
                        else:
                            lr_mech = LinearRegression().fit(X_mechanisms, y_mechanisms)
                            mechanism_r2 = max(0, r2_score(y_mechanisms, lr_mech.predict(X_mechanisms)))
                    except:
                        mechanism_r2 = 0
                else:
                    mechanism_r2 = 0
            else:
                mechanism_r2 = 0
            
            improvement = mechanism_r2 - concept_r2
            
            comparison_results[concept][group_name] = {
                'concept_r2': concept_r2,
                'mechanism_r2': mechanism_r2,
                'improvement': improvement
            }
    
    return comparison_results

# Execute RQ3.1.1
concept_vs_mechanism_results = compare_concept_vs_mechanisms()

# Display complete comparison table
print("\nCOMPLETE COMPARISON TABLE: CONCEPT vs MECHANISM PREDICTION")
print("="*130)
print(f"{'Concept':<25} {'Group':<15} {'Concept R²':<12} {'Mechanism R²':<15} {'Improvement':<15} {'Theory':<20}")
print("-" * 130)

theory_mapping = {
    'autonomy': 'Self-Determination',
    'competence': 'Self-Determination', 
    'relatedness': 'Self-Determination',
    'self_concept': 'Cognitive Dissonance',
    'cognitive_inconsistency': 'Cognitive Dissonance',
    'dissonance_arousal': 'Cognitive Dissonance',
    'dissonance_reduction': 'Cognitive Dissonance',
    'performance_accomplishments': 'Self-Efficacy',
    'vicarious_experience': 'Self-Efficacy',
    'verbal_persuasion': 'Self-Efficacy',
    'emotional_arousal': 'Self-Efficacy',
    'descriptive_norms': 'Social Norms',
    'injunctive_norms': 'Social Norms',
    'social_sanctions': 'Social Norms',
    'reference_group_identification': 'Social Norms'
}

groups = ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']

for concept, group_results in concept_vs_mechanism_results.items():
    theory = theory_mapping.get(concept, 'Unknown')
    concept_display = concept.replace('_', ' ').title()
    
    for group in groups:
        if group in group_results:
            result = group_results[group]
            concept_r2_text = f"{result['concept_r2']:.4f}"
            mechanism_r2_text = f"{result['mechanism_r2']:.4f}"
            improvement_text = f"{result['improvement']:+.4f}"
            
            print(f"{concept_display:<25} {group:<15} {concept_r2_text:<12} {mechanism_r2_text:<15} {improvement_text:<15} {theory:<20}")

print(f"\n✓ RQ3.1.1 Complete: All {len(concept_mechanism_mapping)} concepts compared")

# =============================================================================
# RQ3.2: Can we predict performance changes via mechanisms by cheating groups?
# =============================================================================

print("\n" + "="*80)
print("RQ3.2: PREDICTING PERFORMANCE CHANGES VIA MECHANISMS BY CHEATING GROUPS")
print("="*80)

def predict_performance_by_groups():
    """Calculate R² for predicting performance changes by cheating groups"""
    
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    performance_results = {}
    
    print("Calculating performance prediction by cheating groups...")
    
    for mechanism in mechanisms:
        performance_results[mechanism] = {}
        
        for group_val, group_name in groups:
            group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
            
            if len(group_data) < 30:
                performance_results[mechanism][f'{group_name}_performance'] = {'r2': 0, 'direction': 'none'}
                continue
            
            # Prepare data for this group
            mechanism_delta = group_data[mechanism] - control_means[mechanism]
            performance_delta = group_data['delta_performance']
            
            valid_data = pd.DataFrame({
                'delta_mechanism': mechanism_delta,
                'delta_performance': performance_delta
            }).dropna()
            
            if len(valid_data) < 20:
                performance_results[mechanism][f'{group_name}_performance'] = {'r2': 0, 'direction': 'none'}
                continue
            
            # Calculate R² for performance prediction
            X = valid_data[['delta_mechanism']].values
            y = valid_data['delta_performance'].values
            
            if len(np.unique(y)) > 1:
                try:
                    lr = LinearRegression().fit(X, y)
                    r2_val = max(0, r2_score(y, lr.predict(X)))
                    coefficient = lr.coef_[0]
                    performance_results[mechanism][f'{group_name}_performance'] = {
                        'r2': r2_val,
                        'direction': 'positive' if coefficient > 0 else 'negative'
                    }
                except:
                    performance_results[mechanism][f'{group_name}_performance'] = {'r2': 0, 'direction': 'none'}
            else:
                performance_results[mechanism][f'{group_name}_performance'] = {'r2': 0, 'direction': 'none'}
    
    return performance_results

# Execute RQ3.2
performance_prediction_results = predict_performance_by_groups()

# Display complete R² table for performance changes
print("\nCOMPLETE R² TABLE: MECHANISM CHANGES → PERFORMANCE CHANGES BY GROUPS")
print("="*90)
print(f"{'Mechanism':<40} {'Non-Cheaters':<15} {'Partial-Cheaters':<15} {'Full-Cheaters':<15}")
print("-" * 90)

for mechanism in mechanisms:
    if mechanism in performance_prediction_results:
        display_name = mechanism.replace('_', ' ').title()
        
        nc_result = performance_prediction_results[mechanism].get('Non-Cheaters_performance', {'r2': 0})
        pc_result = performance_prediction_results[mechanism].get('Partial-Cheaters_performance', {'r2': 0})
        fc_result = performance_prediction_results[mechanism].get('Full-Cheaters_performance', {'r2': 0})
        
        nc_text = f"{nc_result['r2']:.4f}"
        pc_text = f"{pc_result['r2']:.4f}"
        fc_text = f"{fc_result['r2']:.4f}"
        
        print(f"{display_name:<40} {nc_text:<15} {pc_text:<15} {fc_text:<15}")

print(f"\n✓ RQ3.2 Complete: All {len(mechanisms)} mechanisms analyzed for performance prediction")

# =============================================================================
# RQ3.3: Can we predict experience changes via mechanisms by cheating groups?
# =============================================================================

print("\n" + "="*80)
print("RQ3.3: PREDICTING EXPERIENCE CHANGES VIA MECHANISMS BY CHEATING GROUPS")
print("="*80)

def predict_experience_by_groups():
    """Calculate R² for predicting experience changes by cheating groups"""
    
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    experience_results = {}
    
    print("Calculating experience prediction by cheating groups...")
    
    for mechanism in mechanisms:
        experience_results[mechanism] = {}
        
        for group_val, group_name in groups:
            group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
            
            if len(group_data) < 30:
                experience_results[mechanism][f'{group_name}_experience'] = {'r2': 0, 'direction': 'none'}
                continue
            
            # Prepare data for this group
            mechanism_delta = group_data[mechanism] - control_means[mechanism]
            experience_delta = group_data['delta_experience']
            
            valid_data = pd.DataFrame({
                'delta_mechanism': mechanism_delta,
                'delta_experience': experience_delta
            }).dropna()
            
            if len(valid_data) < 20:
                experience_results[mechanism][f'{group_name}_experience'] = {'r2': 0, 'direction': 'none'}
                continue
            
            # Calculate R² for experience prediction
            X = valid_data[['delta_mechanism']].values
            y = valid_data['delta_experience'].values
            
            if len(np.unique(y)) > 1:
                try:
                    lr = LinearRegression().fit(X, y)
                    r2_val = max(0, r2_score(y, lr.predict(X)))
                    coefficient = lr.coef_[0]
                    experience_results[mechanism][f'{group_name}_experience'] = {
                        'r2': r2_val,
                        'direction': 'positive' if coefficient > 0 else 'negative'
                    }
                except:
                    experience_results[mechanism][f'{group_name}_experience'] = {'r2': 0, 'direction': 'none'}
            else:
                experience_results[mechanism][f'{group_name}_experience'] = {'r2': 0, 'direction': 'none'}
    
    return experience_results

# Execute RQ3.3
experience_prediction_results = predict_experience_by_groups()

# Display complete R² table for experience changes
print("\nCOMPLETE R² TABLE: MECHANISM CHANGES → EXPERIENCE CHANGES BY GROUPS")
print("="*90)
print(f"{'Mechanism':<40} {'Non-Cheaters':<15} {'Partial-Cheaters':<15} {'Full-Cheaters':<15}")
print("-" * 90)

for mechanism in mechanisms:
    if mechanism in experience_prediction_results:
        display_name = mechanism.replace('_', ' ').title()
        
        nc_result = experience_prediction_results[mechanism].get('Non-Cheaters_experience', {'r2': 0})
        pc_result = experience_prediction_results[mechanism].get('Partial-Cheaters_experience', {'r2': 0})
        fc_result = experience_prediction_results[mechanism].get('Full-Cheaters_experience', {'r2': 0})
        
        nc_text = f"{nc_result['r2']:.4f}"
        pc_text = f"{pc_result['r2']:.4f}"
        fc_text = f"{fc_result['r2']:.4f}"
        
        print(f"{display_name:<40} {nc_text:<15} {pc_text:<15} {fc_text:<15}")

print(f"\n✓ RQ3.3 Complete: All {len(mechanisms)} mechanisms analyzed for experience prediction")




In [None]:
# =============================================================================
# SIMPLIFIED VISUALIZATION: MECHANISM R² PREDICTION ACCURACY
# =============================================================================

def calculate_prediction_accuracy(intervention_data, mechanisms, control_means):
    """
    Calculate R² values for mechanism predictions.
    Returns both cheating behavior (regular R²) and performance/experience predictions.
    """
    from sklearn.linear_model import LinearRegression
    from sklearn.metrics import r2_score
    
    # Shared variables
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    cheating_results = {}
    performance_experience_results = {}
    
    print("Calculating R² values for all prediction tasks...")
    
    # ========== PART 1: CHEATING BEHAVIOR PREDICTION (Regular R²) ==========
    for mechanism in mechanisms:
        if mechanism not in intervention_data.columns:
            continue
            
        # Calculate mechanism change from control (reuse for both predictions)
        mechanism_delta = intervention_data[mechanism] - control_means[mechanism]
        
        # Clean data for cheating prediction
        valid_cheating_data = pd.DataFrame({
            'delta_mechanism': mechanism_delta,
            'cheating_behavior': intervention_data['cheating_behavior']
        }).dropna()
        
        if len(valid_cheating_data) < 50:
            cheating_results[mechanism] = {'Non-Cheaters': 0, 'Partial-Cheaters': 0, 'Full-Cheaters': 0, 'n': 0}
            continue
            
        # Calculate R² for each cheating group vs others (binary classification as regression)
        r2_values = {}
        for target_group, group_name in groups:
            y_binary = (valid_cheating_data['cheating_behavior'] == target_group).astype(float)
            X = valid_cheating_data[['delta_mechanism']].values
            
            if len(np.unique(y_binary)) > 1:  # Check for variance
                try:
                    lr = LinearRegression().fit(X, y_binary)
                    r2_val = max(0, r2_score(y_binary, lr.predict(X)))
                    coefficient = lr.coef_[0]  # GET DIRECTION
                    r2_values[group_name] = {
                        'r2': r2_val,
                        'coef': coefficient,
                        'direction': 'positive' if coefficient > 0 else 'negative'
                    }
                except:
                    r2_values[group_name] = {'r2': 0, 'coef': 0, 'direction': 'none'}
            else:
                r2_values[group_name] = {'r2': 0, 'coef': 0, 'direction': 'none'}
        
        r2_values['n'] = len(valid_cheating_data)
        cheating_results[mechanism] = r2_values
    
    # ========== PART 2: PERFORMANCE & EXPERIENCE PREDICTION ==========
    for mechanism in mechanisms:
        performance_experience_results[mechanism] = {}
        
        for group_val, group_name in groups:
            group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
            
            if len(group_data) < 30:  # Skip if insufficient data
                continue
            
            # Reuse mechanism delta calculation
            group_data[f'delta_{mechanism}'] = group_data[mechanism] - control_means[mechanism]
            group_data['delta_performance'] = group_data['performance'] - control_performance
            group_data['delta_experience'] = group_data['experience'] - control_experience
            
            # Clean data
            valid_data = group_data[[f'delta_{mechanism}', 'delta_performance', 'delta_experience']].dropna()
            
            if len(valid_data) < 20:
                continue
                
            X = valid_data[[f'delta_{mechanism}']].values
            
            # Performance prediction
            y_perf = valid_data['delta_performance'].values
            if len(np.unique(y_perf)) > 1:
                lr_perf = LinearRegression().fit(X, y_perf)
                r2_perf = max(0, r2_score(y_perf, lr_perf.predict(X)))
                coef_perf = lr_perf.coef_[0]
            else:
                r2_perf = 0
                coef_perf = 0
                
            # Experience prediction with direction
            y_exp = valid_data['delta_experience'].values
            if len(np.unique(y_exp)) > 1:
                lr_exp = LinearRegression().fit(X, y_exp)
                r2_exp = max(0, r2_score(y_exp, lr_exp.predict(X)))
                coef_exp = lr_exp.coef_[0]
            else:
                r2_exp = 0
                coef_exp = 0

            # Store results with direction
            performance_experience_results[mechanism][f'{group_name}_performance'] = {
                'r2': r2_perf, 'coef': coef_perf, 'direction': 'positive' if coef_perf > 0 else 'negative'
            }
            performance_experience_results[mechanism][f'{group_name}_experience'] = {
                'r2': r2_exp, 'coef': coef_exp, 'direction': 'positive' if coef_exp > 0 else 'negative'
            }
            performance_experience_results[mechanism][f'{group_name}_n'] = len(valid_data)
    
    return cheating_results, performance_experience_results

def create_horizontal_bar_plots(cheating_results, performance_experience_results, mechanisms):
    import matplotlib.patches as mpatches
    import matplotlib.patheffects as pe

    # Display names and order (same as your original)
    mechanism_display_names = {
        'descriptive_norms': 'Perceived Descriptive Norms',
        'injunctive_norms': 'Perceived Injunctive Norms',
        'reference_group_identification': 'Perceived Reference Group Identification',
        'social_sanctions': 'Perceived Social Sanctions',
        'performance_accomplishments': 'Perceived Performance Accomplishments',
        'vicarious_experience': 'Perceived Vicarious Experience',
        'verbal_persuasion': 'Perceived Verbal Persuasion',
        'emotional_arousal': 'Perceived Emotional Arousal'
    }

    mechanism_order = [
        'autonomy_need_satisfaction', 'autonomy_need_frustration',
        'competence_need_satisfaction', 'competence_need_frustration',
        'relatedness_need_satisfaction', 'relatedness_need_frustration',
        'cognitive_discomfort', 'moral_disengagement',
        'descriptive_norms', 'injunctive_norms',
        'social_sanctions', 'reference_group_identification',
        'performance_accomplishments', 'vicarious_experience',
        'verbal_persuasion', 'emotional_arousal'
    ][::-1]

    def prepare_df(results, groups_or_outcomes, display_names, mech_order):
        data = []
        direction_data = {}  # Store direction info separately
        
        for mech in mech_order:
            if mech not in results:
                continue
            name = display_names.get(mech, mech.replace('_', ' ').title())
            for key in groups_or_outcomes:
                result = results[mech].get(key, {'r2': 0, 'coef': 0, 'direction': 'none'})
                if isinstance(result, dict):
                    val = result['r2']
                    direction = result['direction']
                else:
                    val = result  # backward compatibility
                    direction = 'unknown'
                
                data.append({'mechanism': name, 'key': key, 'r2': val, 'direction': direction})
                # Store direction info for later lookup
                direction_data[(name, key)] = direction
        
        df = pd.DataFrame(data)
        pivot = df.pivot_table(index='mechanism', columns='key', values='r2', fill_value=0)
        pivot = pivot.reindex([display_names.get(m, m.replace('_',' ').title()) 
                               for m in mech_order if m in results])
        
        # Attach direction data to pivot table
        pivot._direction_data = direction_data
        
        return pivot
    
    def plot_horizontal_bar(pivot, keys, colors, title, show_values=True, border_keys=None, r2_threshold=0.1):
        """
        r2_threshold: only show numeric labels for bars with r2 >= r2_threshold
        border_keys: list of keys (column names) that should get inward borders
        """
        fig, ax = plt.subplots(figsize=(12, 10))
        y_pos = np.arange(len(pivot.index))
        left_positions = np.zeros(len(pivot.index))
        bar_height = 0.6
        border_keys = border_keys or []

        for key in keys:
            if key not in pivot.columns:
                continue
            vals = pivot[key].values
            bars = ax.barh(y_pos, vals, left=left_positions, height=bar_height,
                           color=colors[key], label=key.replace('_', ' ').replace('Cheaters','Cheater'),
                           alpha=0.8, edgecolor='none', linewidth=2)

            # Add inward border effect only if key is in border_keys
            if key in border_keys:
                for bar in bars:
                    # shrink inner bar slightly to create a visible border inside
                    ax.barh(
                        bar.get_y() + 0.3, bar.get_width(), height=bar.get_height(),
                        left=bar.get_x(), facecolor="none",
                        edgecolor="black", linewidth=2, alpha=1,
                        zorder=2
                    )

            # Show values with direction indicators only if above threshold
            if show_values:
                for i, (bar, val) in enumerate(zip(bars, vals)):
                    if val >= r2_threshold:
                        # Get direction from pivot data - FIXED
                        direction_symbol = ""
                        mechanism_name = pivot.index[i]
                        if hasattr(pivot, '_direction_data') and pivot._direction_data:
                            direction = pivot._direction_data.get((mechanism_name, key), 'unknown')
                            if direction == 'positive':
                                direction_symbol = "↑"
                            elif direction == 'negative':
                                direction_symbol = "↓"
                            
                        # Always use consistent formatting with direction symbol
                        text = f'{val:.2f}{direction_symbol}'
                        ax.text(left_positions[i] + val/2,
                                bar.get_y() + bar.get_height()/2,
                                text,
                                ha='center', va='center', fontweight='bold', fontsize=12)

            left_positions += vals

        handles = []
        labels = []

        for key in keys:
            if key not in colors:
                continue
            facecolor = colors[key]
            if key in border_keys:
                # Add border in legend patch
                patch = mpatches.Patch(facecolor=facecolor, edgecolor="black", linewidth=2, label=key.replace('_',' '))
            else:
                patch = mpatches.Patch(facecolor=facecolor, edgecolor="none", label=key.replace('_',' '))
            handles.append(patch)
            labels.append(key.replace('_',' ').replace('Cheaters','Cheater'))

        ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fontsize=13)
        ax.set_yticks(y_pos)
        ax.set_yticklabels(pivot.index, fontsize=16)
        ax.set_ylabel('Psychological Mechanisms', fontsize=14, fontweight='bold')
        ax.set_title(title, fontsize=18, fontweight='bold', pad=40)
        ax.grid(True, axis='x', alpha=0.3)
        ax.set_xlim(0, max(left_positions)*1.1 if max(left_positions) > 0 else 0.1)
        ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        plt.tight_layout()
        plt.show()
        
    # ===== CHEATING PLOT =====
    groups = ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']
    group_colors = {'Non-Cheaters':'#1f77b4','Partial-Cheaters':'#fdbf7a','Full-Cheaters':'#d6604d'}
    cheating_pivot = prepare_df(cheating_results, groups, mechanism_display_names, mechanism_order)
    plot_horizontal_bar(cheating_pivot, groups, group_colors,
                        'Psychological Mechanisms Show Weak Predictive Power for Cheating Behavior:\n'
                        'Most mechanisms explain <2% of variance (R² < 0.02) in cheating decisions',
                        r2_threshold=0.01)  # keep the 0.01 label cutoff for cheating

    # ===== PERFORMANCE & EXPERIENCE PLOT =====
    outcomes = ['Non-Cheaters_performance', 'Non-Cheaters_experience',
                'Partial-Cheaters_performance', 'Partial-Cheaters_experience',
                'Full-Cheaters_performance', 'Full-Cheaters_experience']
    pe_colors = {
        'Non-Cheaters_performance':'#1f77b4','Non-Cheaters_experience':'#6baed6',
        'Partial-Cheaters_performance':'#e6550d','Partial-Cheaters_experience':'#fdae6b',
        'Full-Cheaters_performance':'#c34444','Full-Cheaters_experience':'#fe845f'
    }
    pe_pivot = prepare_df(performance_experience_results, outcomes, mechanism_display_names, mechanism_order)

    # Keep only mechanisms with any non-zero predictive power
    pe_pivot_filtered = pe_pivot[(pe_pivot.sum(axis=1) > 0)]

    plot_horizontal_bar(pe_pivot_filtered, outcomes, pe_colors,
                        'Mechanisms Better Predict Performance/Experience than Cheating Behavior:\n'
                        'Several mechanisms show meaningful predictive power (R² > 0.1) for outcomes by group',
                        border_keys=[k for k in outcomes if 'performance' in k],
                        r2_threshold=0.1)  # show labels only for R² >= 0.1
      
def print_summary_results(cheating_results, performance_experience_results):
    """
    Print comprehensive summary of prediction results with detailed R² tables
    """
    mechanism_order = [
        'emotional_arousal', 'verbal_persuasion', 'vicarious_experience', 'performance_accomplishments',
        'social_sanctions', 'reference_group_identification', 'descriptive_norms', 'injunctive_norms',
        'moral_disengagement', 'cognitive_discomfort',
        'relatedness_need_frustration', 'relatedness_need_satisfaction',
        'competence_need_frustration', 'competence_need_satisfaction', 
        'autonomy_need_frustration', 'autonomy_need_satisfaction'
    ][::-1]
    
    # Helper function to safely extract values from dict structure
    def get_values(result):
        if isinstance(result, dict):
            return result.get('r2', 0), result.get('direction', 'unknown')
        return result, 'unknown'
    
    # Display names mapping
    mechanism_display_names = {
        'autonomy_need_satisfaction': 'Autonomy Need Satisfaction',
        'autonomy_need_frustration': 'Autonomy Need Frustration',
        'competence_need_satisfaction': 'Competence Need Satisfaction',
        'competence_need_frustration': 'Competence Need Frustration',
        'relatedness_need_satisfaction': 'Relatedness Need Satisfaction',
        'relatedness_need_frustration': 'Relatedness Need Frustration',
        'cognitive_discomfort': 'Cognitive Discomfort',
        'moral_disengagement': 'Moral Disengagement',
        'descriptive_norms': 'Perceived Descriptive Norms',
        'injunctive_norms': 'Perceived Injunctive Norms',
        'social_sanctions': 'Perceived Social Sanctions',
        'reference_group_identification': 'Perceived Reference Group Identification',
        'performance_accomplishments': 'Perceived Performance Accomplishments',
        'vicarious_experience': 'Perceived Vicarious Experience',
        'verbal_persuasion': 'Perceived Verbal Persuasion',
        'emotional_arousal': 'Perceived Emotional Arousal'
    }
    
    # =============================================================================
    # PLOT 1: CHEATING BEHAVIOR PREDICTION - COMPLETE R² TABLE
    # =============================================================================
    
    print("\n" + "="*100)
    print("PLOT 1: CHEATING BEHAVIOR PREDICTION - ALL R² VALUES")
    print("="*100)
    
    print(f"{'Mechanism':<40} {'Non-Cheaters':<15} {'Partial-Cheaters':<18} {'Full-Cheaters':<15} {'Best':<12}")
    print("-" * 100)
    
    cheating_table_data = []
    
    for mechanism in mechanism_order:
        if mechanism not in cheating_results:
            continue
            
        display_name = mechanism_display_names.get(mechanism, mechanism.replace('_', ' ').title())
        
        # Extract R² and direction for each group
        nc_r2, nc_dir = get_values(cheating_results[mechanism].get('Non-Cheaters', 0))
        pc_r2, pc_dir = get_values(cheating_results[mechanism].get('Partial-Cheaters', 0))
        fc_r2, fc_dir = get_values(cheating_results[mechanism].get('Full-Cheaters', 0))
        
        # Direction symbols
        nc_symbol = "↑" if nc_dir == 'positive' else "↓" if nc_dir == 'negative' else ""
        pc_symbol = "↑" if pc_dir == 'positive' else "↓" if pc_dir == 'negative' else ""
        fc_symbol = "↑" if fc_dir == 'positive' else "↓" if fc_dir == 'negative' else ""
        
        # Find best group
        max_r2 = max(nc_r2, pc_r2, fc_r2)
        if max_r2 > 0:
            if nc_r2 == max_r2:
                best_group = "Non-Cheaters"
            elif pc_r2 == max_r2:
                best_group = "Partial"
            else:
                best_group = "Full"
        else:
            best_group = "None"
            
        # Format the row
        nc_text = f"{nc_r2:.3f}{nc_symbol}" if nc_r2 >= 0.001 else f"{nc_r2:.3f}"
        pc_text = f"{pc_r2:.3f}{pc_symbol}" if pc_r2 >= 0.001 else f"{pc_r2:.3f}"
        fc_text = f"{fc_r2:.3f}{fc_symbol}" if fc_r2 >= 0.001 else f"{fc_r2:.3f}"
        
        print(f"{display_name[:39]:<40} {nc_text:<15} {pc_text:<18} {fc_text:<15} {best_group:<12}")
        
        cheating_table_data.append({
            'mechanism': display_name,
            'non_cheaters': nc_r2,
            'partial_cheaters': pc_r2, 
            'full_cheaters': fc_r2,
            'max_r2': max_r2
        })
    
    # Summary statistics for Plot 1
    if cheating_table_data:
        all_r2_values = []
        for row in cheating_table_data:
            all_r2_values.extend([row['non_cheaters'], row['partial_cheaters'], row['full_cheaters']])
        
        print(f"\nPLOT 1 SUMMARY:")
        print(f"  • Total R² values: {len(all_r2_values)}")
        print(f"  • Mean R²: {np.mean(all_r2_values):.4f}")
        print(f"  • Max R²: {np.max(all_r2_values):.4f}")
        print(f"  • Values ≥ 0.01: {sum(1 for x in all_r2_values if x >= 0.01)} ({100*sum(1 for x in all_r2_values if x >= 0.01)/len(all_r2_values):.1f}%)")
        print(f"  • Values ≥ 0.02: {sum(1 for x in all_r2_values if x >= 0.02)} ({100*sum(1 for x in all_r2_values if x >= 0.02)/len(all_r2_values):.1f}%)")
    
    # =============================================================================
    # PLOT 2: PERFORMANCE & EXPERIENCE PREDICTION - COMPLETE R² TABLE  
    # =============================================================================
    
    print("\n" + "="*120)
    print("PLOT 2: PERFORMANCE & EXPERIENCE PREDICTION - ALL R² VALUES")
    print("="*120)
    
    print(f"{'Mechanism':<40} {'NC-Perf':<10} {'NC-Exp':<10} {'PC-Perf':<10} {'PC-Exp':<10} {'FC-Perf':<10} {'FC-Exp':<10} {'Best':<15}")
    print("-" * 120)
    
    pe_table_data = []
    
    for mechanism in mechanism_order:
        if mechanism not in performance_experience_results:
            continue
            
        display_name = mechanism_display_names.get(mechanism, mechanism.replace('_', ' ').title())
        
        # Extract R² and direction for all combinations
        outcomes_data = {}
        for group in ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']:
            for outcome in ['performance', 'experience']:
                key = f'{group}_{outcome}'
                result = performance_experience_results[mechanism].get(key, 0)
                r2, direction = get_values(result)
                symbol = "↑" if direction == 'positive' else "↓" if direction == 'negative' else ""
                outcomes_data[key] = {'r2': r2, 'symbol': symbol}
        
        # Find best combination
        max_r2 = 0
        best_combo = "None"
        for key, data in outcomes_data.items():
            if data['r2'] > max_r2:
                max_r2 = data['r2']
                # Format best combo name
                parts = key.split('_')
                group_short = parts[0][:2] if parts[0] == 'Non-Cheaters' else parts[0][:1]
                outcome_short = parts[1][:4].title()
                best_combo = f"{group_short}-{outcome_short}"
        
        # Format each column
        nc_perf_text = f"{outcomes_data['Non-Cheaters_performance']['r2']:.3f}{outcomes_data['Non-Cheaters_performance']['symbol']}"
        nc_exp_text = f"{outcomes_data['Non-Cheaters_experience']['r2']:.3f}{outcomes_data['Non-Cheaters_experience']['symbol']}"
        pc_perf_text = f"{outcomes_data['Partial-Cheaters_performance']['r2']:.3f}{outcomes_data['Partial-Cheaters_performance']['symbol']}"
        pc_exp_text = f"{outcomes_data['Partial-Cheaters_experience']['r2']:.3f}{outcomes_data['Partial-Cheaters_experience']['symbol']}"
        fc_perf_text = f"{outcomes_data['Full-Cheaters_performance']['r2']:.3f}{outcomes_data['Full-Cheaters_performance']['symbol']}"
        fc_exp_text = f"{outcomes_data['Full-Cheaters_experience']['r2']:.3f}{outcomes_data['Full-Cheaters_experience']['symbol']}"
        
        print(f"{display_name[:39]:<40} {nc_perf_text:<10} {nc_exp_text:<10} {pc_perf_text:<10} {pc_exp_text:<10} {fc_perf_text:<10} {fc_exp_text:<10} {best_combo:<15}")
        
        pe_table_data.append({
            'mechanism': display_name,
            'max_r2': max_r2,
            'all_values': [data['r2'] for data in outcomes_data.values()]
        })
    
    # Summary statistics for Plot 2
    if pe_table_data:
        all_pe_r2_values = []
        for row in pe_table_data:
            all_pe_r2_values.extend(row['all_values'])
        
        print(f"\nPLOT 2 SUMMARY:")
        print(f"  • Total R² values: {len(all_pe_r2_values)}")
        print(f"  • Mean R²: {np.mean(all_pe_r2_values):.4f}")
        print(f"  • Max R²: {np.max(all_pe_r2_values):.4f}")
        print(f"  • Values ≥ 0.05: {sum(1 for x in all_pe_r2_values if x >= 0.05)} ({100*sum(1 for x in all_pe_r2_values if x >= 0.05)/len(all_pe_r2_values):.1f}%)")
        print(f"  • Values ≥ 0.10: {sum(1 for x in all_pe_r2_values if x >= 0.10)} ({100*sum(1 for x in all_pe_r2_values if x >= 0.10)/len(all_pe_r2_values):.1f}%)")
        print(f"  • Values ≥ 0.20: {sum(1 for x in all_pe_r2_values if x >= 0.20)} ({100*sum(1 for x in all_pe_r2_values if x >= 0.20)/len(all_pe_r2_values):.1f}%)")
    
    # =============================================================================
    # COMPARATIVE SUMMARY
    # =============================================================================
    
    print("\n" + "="*80)
    print("COMPARATIVE SUMMARY: TOP PREDICTORS")
    print("="*80)
    
    # Top cheating predictors
    if cheating_table_data:
        cheating_predictors = [(row['mechanism'], row['max_r2']) for row in cheating_table_data if row['max_r2'] > 0.01]
        cheating_predictors.sort(key=lambda x: x[1], reverse=True)
        
        print("\nTOP CHEATING BEHAVIOR PREDICTORS (R² > 0.01):")
        print("-" * 50)
        if cheating_predictors:
            for i, (mech, r2_val) in enumerate(cheating_predictors[:5], 1):
                print(f"{i:2d}. {mech[:35]:<35} R² = {r2_val:.3f}")
        else:
            print("No mechanisms show meaningful prediction accuracy for cheating behavior")
    
    # Top performance/experience predictors
    if pe_table_data:
        pe_predictors = [(row['mechanism'], row['max_r2']) for row in pe_table_data if row['max_r2'] > 0.10]
        pe_predictors.sort(key=lambda x: x[1], reverse=True)
        
        print("\nTOP PERFORMANCE/EXPERIENCE PREDICTORS (R² > 0.10):")
        print("-" * 50)
        if pe_predictors:
            for i, (mech, r2_val) in enumerate(pe_predictors[:10], 1):
                print(f"{i:2d}. {mech[:35]:<35} R² = {r2_val:.3f}")
        else:
            print("No mechanisms show strong prediction accuracy (R² > 0.10)")
    
    print("\n" + "="*80)
    print("Note: ↑ = positive relationship, ↓ = negative relationship")
    print("NC=Non-Cheaters, PC=Partial-Cheaters, FC=Full-Cheaters")
    print("Perf=Performance, Exp=Experience")
    print("="*80)

def calculate_overall_mechanism_effects(intervention_data, mechanisms, control_means):
    """Calculate R² when ALL mechanisms are used together as predictors"""
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import r2_score
    
    mechanism_delta_cols = [f'delta_{mech}' for mech in mechanisms]
    overall_cheating = {}
    overall_pe = {}
    
    # CHEATING PREDICTION using all mechanisms together
    for group_val, group_name in [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]:
        group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
        if len(group_data) < 30: continue
            
        valid_data = group_data[mechanism_delta_cols + ['cheating_rate_main_round']].dropna()
        if len(valid_data) < 20: continue
            
        X = valid_data[mechanism_delta_cols].values
        y = valid_data['cheating_rate_main_round'].values
        
        if len(np.unique(y)) > 1:
            model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=3)
            model.fit(X, y)
            r2 = max(0, r2_score(y, model.predict(X)))
            overall_cheating[group_name] = {'r2': r2, 'direction': 'combined'}
        
    # PERFORMANCE & EXPERIENCE PREDICTION using all mechanisms together
    for group_val, group_name in [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]:
        group_data = intervention_data[intervention_data['cheating_behavior'] == group_val].copy()
        if len(group_data) < 30: continue
            
        valid_data = group_data[mechanism_delta_cols + ['delta_performance', 'delta_experience']].dropna()
        if len(valid_data) < 20: continue
            
        X = valid_data[mechanism_delta_cols].values
        
        # Performance
        y_perf = valid_data['delta_performance'].values
        if len(np.unique(y_perf)) > 1:
            model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=3)
            model.fit(X, y_perf)
            r2_perf = max(0, r2_score(y_perf, model.predict(X)))
            overall_pe[f'{group_name}_performance'] = {'r2': r2_perf, 'direction': 'combined'}
        
        # Experience  
        y_exp = valid_data['delta_experience'].values
        if len(np.unique(y_exp)) > 1:
            model = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=3)
            model.fit(X, y_exp)
            r2_exp = max(0, r2_score(y_exp, model.predict(X)))
            overall_pe[f'{group_name}_experience'] = {'r2': r2_exp, 'direction': 'combined'}
    
    return overall_cheating, overall_pe
    
# ========== MAIN EXECUTION ==========
print("\n" + "="*80)
print("MECHANISM R² PREDICTION ACCURACY ANALYSIS")
print("="*80)

# Calculate individual mechanism effects (existing code)
cheating_r2_results, pe_r2_results = calculate_prediction_accuracy(
    intervention_data, mechanisms, control_means)

# Calculate and add overall effects as new row
overall_cheating, overall_pe = calculate_overall_mechanism_effects(intervention_data, mechanisms, control_means)
cheating_r2_results['OVERALL_ALL_MECHANISMS'] = overall_cheating
pe_r2_results['OVERALL_ALL_MECHANISMS'] = overall_pe

# Update mechanism order and display names
mechanisms_with_overall = ['OVERALL_ALL_MECHANISMS'] + mechanisms

# Create plots (this will automatically include the new row)
create_horizontal_bar_plots(cheating_r2_results, pe_r2_results, mechanisms_with_overall)

# Print summaries
print_summary_results(cheating_r2_results, pe_r2_results)


In [None]:
# =============================================================================
# RQ3: How do concept-based interventions influence cheating behavior, 
# performance, and experience through underlying psychological mechanisms?
# =============================================================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import sys, os, warnings

# Setup
os.chdir('game-behavior-analytics/data_analysis_notebook/')
sys.path.append(os.path.abspath('utils'))
warnings.filterwarnings('ignore')

from data_utils import load_and_prepare_data


In [None]:
# =============================================================================
# 1. DATA LOADING AND BASELINE SETUP
# =============================================================================

df, concepts = load_and_prepare_data("../data/final_dataset.csv")

print("="*80)
print("RQ3: MECHANISM-BASED PREDICTION ANALYSIS")
print("="*80)

# Define mechanisms (original names before renaming)
MECHANISMS = [
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration',
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    'cognitive_discomfort', 'moral_disengagement',
    'injunctive_norms', 'descriptive_norms', 'reference_group_identification', 'social_sanctions',
    'performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal'
]

# Rename mechanisms in dataframe (one time only)
MECHANISM_RENAMES = {
    'descriptive_norms': 'perceived_descriptive_norms',
    'injunctive_norms': 'perceived_injunctive_norms', 
    'reference_group_identification': 'perceived_group_identification',
    'social_sanctions': 'perceived_social_sanctions',
    'performance_accomplishments': 'perceived_performance_accomplishments',
    'vicarious_experience': 'perceived_vicarious_experience',
    'verbal_persuasion': 'perceived_verbal_persuasion',
    'emotional_arousal': 'perceived_emotional_arousal'
}

df.rename(columns=MECHANISM_RENAMES, inplace=True)

# Update mechanism list with new names
MECHANISMS = [MECHANISM_RENAMES.get(m, m) for m in MECHANISMS]

# Calculate control baseline (one time only)
control_data = df[df['concept'] == 'control']
CONTROL_MEANS = {
    'mechanisms': control_data[MECHANISMS].mean(),
    'performance': control_data['performance'].mean(),
    'experience': control_data['experience'].mean()
}

print(f"✓ Control baseline (N={len(control_data)})")

# Calculate deltas (one time only)
for mech in MECHANISMS:
    df[f'delta_{mech}'] = df[mech] - CONTROL_MEANS['mechanisms'][mech]

df['delta_performance'] = df['performance'] - CONTROL_MEANS['performance']
df['delta_experience'] = df['experience'] - CONTROL_MEANS['experience']

# Create intervention dataset
intervention_data = df[df['concept'] != 'control'].copy()
print(f"✓ Intervention participants: {len(intervention_data)}")

In [None]:
# =============================================================================
# 2. MECHANISM-CONCEPT MAPPING (CONFIGURATION)
# =============================================================================

CONCEPT_MECHANISM_MAPPING = {
    'autonomy': ['autonomy_need_satisfaction', 'autonomy_need_frustration'],
    'competence': ['competence_need_satisfaction', 'competence_need_frustration'],
    'relatedness': ['relatedness_need_satisfaction', 'relatedness_need_frustration'],
    'self_concept': ['cognitive_discomfort', 'moral_disengagement'],
    'cognitive_inconsistency': ['cognitive_discomfort', 'moral_disengagement'],
    'dissonance_arousal': ['cognitive_discomfort', 'moral_disengagement'],
    'dissonance_reduction': ['cognitive_discomfort', 'moral_disengagement'],
    'performance_accomplishments': ['perceived_performance_accomplishments'],
    'vicarious_experience': ['perceived_vicarious_experience'],
    'verbal_persuasion': ['perceived_verbal_persuasion'],
    'emotional_arousal': ['perceived_emotional_arousal'],
    'descriptive_norms': ['perceived_descriptive_norms'],
    'injunctive_norms': ['perceived_injunctive_norms'],
    'social_sanctions': ['perceived_social_sanctions'],
    'reference_group_identification': ['perceived_group_identification']
}

# Create reverse mapping
MECHANISM_TO_CONCEPTS = {}
for concept, mechs in CONCEPT_MECHANISM_MAPPING.items():
    for mech in mechs:
        MECHANISM_TO_CONCEPTS.setdefault(mech, []).append(concept)

# Display names for mechanisms
MECHANISM_DISPLAY_NAMES = {
    'autonomy_need_satisfaction': 'Autonomy Need Satisfaction',
    'autonomy_need_frustration': 'Autonomy Need Frustration',
    'competence_need_satisfaction': 'Competence Need Satisfaction',
    'competence_need_frustration': 'Competence Need Frustration',
    'relatedness_need_satisfaction': 'Relatedness Need Satisfaction',
    'relatedness_need_frustration': 'Relatedness Need Frustration',
    'cognitive_discomfort': 'Cognitive Discomfort',
    'moral_disengagement': 'Moral Disengagement',
    'perceived_descriptive_norms': 'Perceived Descriptive Norms',
    'perceived_injunctive_norms': 'Perceived Injunctive Norms',
    'perceived_group_identification': 'Perceived Group Identification',
    'perceived_social_sanctions': 'Perceived Social Sanctions',
    'perceived_performance_accomplishments': 'Perceived Performance Accomplishments',
    'perceived_vicarious_experience': 'Perceived Vicarious Experience',
    'perceived_verbal_persuasion': 'Perceived Verbal Persuasion',
    'perceived_emotional_arousal': 'Perceived Emotional Arousal'
}

# Mechanism ordering for plots
MECHANISM_ORDER = [
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration',
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    'cognitive_discomfort', 'moral_disengagement',
    'perceived_performance_accomplishments', 'perceived_vicarious_experience',
    'perceived_verbal_persuasion', 'perceived_emotional_arousal',
    'perceived_descriptive_norms', 'perceived_injunctive_norms',
    'perceived_group_identification', 'perceived_social_sanctions'
]


In [None]:
# =============================================================================
# 3. CORE COMPARISON FUNCTION
# =============================================================================

def compare_mechanisms_vs_concepts(outcome_col):
    """
    Compare mechanism vs concept prediction for any outcome
    Returns: {(mechanism, concept): {group: {mechanism_r2, concept_r2}}}
    """
    results = {}
    groups = [(0, 'Non-Cheaters'), (1, 'Partial-Cheaters'), (2, 'Full-Cheaters')]
    
    print(f"Calculating R² for {outcome_col}...")
    
    for mechanism in MECHANISMS:
        related_concepts = MECHANISM_TO_CONCEPTS.get(mechanism, [])
        if not related_concepts:
            continue
        
        # Determine concept name(s) for display
        if mechanism in ['cognitive_discomfort', 'moral_disengagement']:
            concept_display = 'Cognitive Dissonance'
        else:
            concept_display = related_concepts[0].replace('_', ' ').title()
        
        key = (mechanism, concept_display)
        results[key] = {}
        
        for target_group, group_name in groups:
            # Get data for this mechanism's related concepts
            if outcome_col == 'cheating_behavior':
                concept_data = intervention_data[intervention_data['concept'].isin(related_concepts)]
                other_data = intervention_data[~intervention_data['concept'].isin(related_concepts)]
            else:
                concept_data = intervention_data[
                    (intervention_data['concept'].isin(related_concepts)) & 
                    (intervention_data['cheating_behavior'] == target_group)
                ]
                other_data = intervention_data[
                    (~intervention_data['concept'].isin(related_concepts)) & 
                    (intervention_data['cheating_behavior'] == target_group)
                ]
            
            if len(concept_data) < 20:
                results[key][group_name] = {'concept_r2': 0, 'mechanism_r2': 0}
                continue
            
            # Method 1: Concept prediction (binary: received concept or not)
            all_data = pd.concat([
                concept_data.assign(concept_received=1),
                other_data.assign(concept_received=0)
            ])
            
            if outcome_col == 'cheating_behavior':
                y_concept = (all_data['cheating_behavior'] == target_group).astype(float).values
            else:
                y_concept = all_data[outcome_col].values
            
            X_concept = all_data[['concept_received']].values
            
            # Calculate concept R²
            valid = ~np.isnan(y_concept)
            if valid.sum() > 10 and len(np.unique(y_concept[valid])) > 1:
                try:
                    model = LinearRegression().fit(X_concept[valid], y_concept[valid])
                    y_pred = model.predict(X_concept[valid])
                    concept_r2 = max(0, r2_score(y_concept[valid], y_pred))
                except:
                    concept_r2 = 0
            else:
                concept_r2 = 0
            
            # Method 2: Mechanism prediction (continuous delta value)
            mech_col = f'delta_{mechanism}'
            if mech_col in concept_data.columns:
                X_mech = concept_data[[mech_col]].fillna(0).values
                
                if outcome_col == 'cheating_behavior':
                    y_mech = (concept_data['cheating_behavior'] == target_group).astype(float).values
                else:
                    y_mech = concept_data[outcome_col].values
                
                valid = ~np.isnan(y_mech)
                if valid.sum() > 10 and len(np.unique(y_mech[valid])) > 1:
                    try:
                        model = LinearRegression().fit(X_mech[valid], y_mech[valid])
                        y_pred = model.predict(X_mech[valid])
                        mechanism_r2 = max(0, r2_score(y_mech[valid], y_pred))
                    except:
                        mechanism_r2 = 0
                else:
                    mechanism_r2 = 0
            else:
                mechanism_r2 = 0
            
            results[key][group_name] = {
                'concept_r2': concept_r2,
                'mechanism_r2': mechanism_r2
            }
    
    return results


In [None]:
# =============================================================================
# 4. DISPLAY FUNCTION
# =============================================================================

def display_comparison_table(results, outcome_name):
    """Display comparison table"""
    print(f"\n{'='*150}")
    print(f"COMPARISON: MECHANISMS vs CONCEPTS FOR {outcome_name} PREDICTION")
    print(f"{'='*150}")
    
    header = (f"{'Mechanism':<50} {'Concept':<35} "
             f"{'Non-Cheaters':^20} {'Partial-Cheaters':^20} {'Full-Cheaters':^20}")
    subheader = (f"{'':<85} "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}")
    
    print(header)
    print(subheader)
    print("-" * 150)
    
    for (mechanism, concept), group_results in sorted(results.items()):
        mech_display = mechanism.replace('_', ' ').title()
        
        nc = group_results.get('Non-Cheaters', {'mechanism_r2': 0, 'concept_r2': 0})
        pc = group_results.get('Partial-Cheaters', {'mechanism_r2': 0, 'concept_r2': 0})
        fc = group_results.get('Full-Cheaters', {'mechanism_r2': 0, 'concept_r2': 0})
        
        print(f"{mech_display:<50} {concept:<35} "
              f"{nc['mechanism_r2']*100:>8.2f}% {nc['concept_r2']*100:>8.2f}%  "
              f"{pc['mechanism_r2']*100:>8.2f}% {pc['concept_r2']*100:>8.2f}%  "
              f"{fc['mechanism_r2']*100:>8.2f}% {fc['concept_r2']*100:>8.2f}% ")
    
    print(f"\n✓ Complete: {len(results)} mechanism-concept pairs")
    print("=" * 150)


In [None]:
# =============================================================================
# 5. RUN ANALYSES
# =============================================================================

print("\n" + "="*80)
print("RQ3.1: CHEATING BEHAVIOR PREDICTION")
print("="*80)
cheating_results = compare_mechanisms_vs_concepts('cheating_behavior')
display_comparison_table(cheating_results, "CHEATING")

print("\n" + "="*80)
print("RQ3.2: PERFORMANCE PREDICTION")
print("="*80)
performance_results = compare_mechanisms_vs_concepts('delta_performance')
display_comparison_table(performance_results, "PERFORMANCE")

print("\n" + "="*80)
print("RQ3.3: EXPERIENCE PREDICTION")
print("="*80)
experience_results = compare_mechanisms_vs_concepts('delta_experience')
display_comparison_table(experience_results, "EXPERIENCE")

In [None]:
# =============================================================================
# 6. VISUALIZATION FUNCTIONS
# =============================================================================

def restructure_for_plotting(results):
    """Convert results to {mechanism: {group: metrics}} format"""
    restructured = {}
    for (mechanism, concept), group_data in results.items():
        if mechanism not in restructured:
            restructured[mechanism] = {}
        for group, metrics in group_data.items():
            restructured[mechanism][group] = metrics
    return restructured

def prepare_plot_dataframe(results_dict, groups, display_names, mechanism_order):
    """Prepare dataframe for plotting"""
    data = []
    for mech in mechanism_order:
        if mech not in results_dict:
            continue
        row = {'mechanism': display_names.get(mech, mech.replace('_', ' ').title())}
        for group in groups:
            if group in results_dict[mech]:
                row[group] = results_dict[mech][group]['mechanism_r2']
            else:
                row[group] = 0
        data.append(row)
    
    if not data:
        return pd.DataFrame()
    
    df_plot = pd.DataFrame(data).set_index('mechanism')
    display_order = [display_names.get(m, m.replace('_', ' ').title()) 
                    for m in mechanism_order if m in results_dict]
    return df_plot.reindex(display_order)

def plot_horizontal_bars(pivot, groups, colors, title, border_groups=None, threshold=0.1):
    """Create horizontal stacked bar plot"""
    if len(pivot) == 0:
        print("No data to plot")
        return
    
    fig, ax = plt.subplots(figsize=(12, 10))
    y_pos = np.arange(len(pivot))
    left = np.zeros(len(pivot))
    border_groups = border_groups or []
    
    for group in groups:
        if group not in pivot.columns:
            continue
        vals = pivot[group].values
        bars = ax.barh(y_pos, vals, left=left, height=0.6,
                      color=colors[group], label=group.replace('-', ' '),
                      alpha=0.8)
        
        # Add border for specified groups
        if group in border_groups:
            for bar in bars:
                ax.barh(bar.get_y(), bar.get_width(), height=bar.get_height(),
                       left=bar.get_x(), facecolor="none", edgecolor="black",
                       linewidth=2, zorder=3)
        
        # Show values above threshold
        for i, (bar, val) in enumerate(zip(bars, vals)):
            if val >= threshold:
                ax.text(left[i] + val/2, bar.get_y() + bar.get_height()/2,
                       f'{val:.2f}', ha='center', va='center',
                       fontweight='bold', fontsize=11)
        
        left += vals
    
    ax.set_yticks(y_pos)
    ax.set_yticklabels(pivot.index, fontsize=14)
    ax.set_ylabel('Psychological Mechanisms', fontsize=14, fontweight='bold')
    ax.set_title(title, fontsize=16, fontweight='bold', pad=40)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fontsize=12)
    ax.grid(True, axis='x', alpha=0.3)
    ax.set_xlim(0, max(left)*1.1 if max(left) > 0 else 0.1)
    plt.tight_layout()
    plt.show()


In [None]:
# =============================================================================
# 7. CREATE VISUALIZATIONS
# =============================================================================

print("\n" + "="*80)
print("CREATING VISUALIZATIONS")
print("="*80)

# Restructure results
cheating_plot = restructure_for_plotting(cheating_results)
performance_plot = restructure_for_plotting(performance_results)
experience_plot = restructure_for_plotting(experience_results)

# Plot 1: Cheating behavior
groups = ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']
colors = {'Non-Cheaters': '#1f77b4', 'Partial-Cheaters': '#fdbf7a', 'Full-Cheaters': '#d6604d'}

cheating_df = prepare_plot_dataframe(cheating_plot, groups, MECHANISM_DISPLAY_NAMES, MECHANISM_ORDER)
if len(cheating_df) > 0:
    cheating_filtered = cheating_df[cheating_df.sum(axis=1) > 0]
    plot_horizontal_bars(cheating_filtered, groups, colors,
                        'Psychological Mechanisms Show Weak Predictive Power for Cheating',
                        threshold=0.01)

# Plot 2: Performance & Experience
perf_exp_results = {}
for mech in MECHANISM_ORDER:
    perf_exp_results[mech] = {}
    for group in groups:
        if mech in performance_plot and group in performance_plot[mech]:
            perf_exp_results[mech][f'{group}_performance'] = performance_plot[mech][group]
        if mech in experience_plot and group in experience_plot[mech]:
            perf_exp_results[mech][f'{group}_experience'] = experience_plot[mech][group]

pe_groups = ['Non-Cheaters_performance', 'Non-Cheaters_experience',
             'Partial-Cheaters_performance', 'Partial-Cheaters_experience',
             'Full-Cheaters_performance', 'Full-Cheaters_experience']

pe_colors = {
    'Non-Cheaters_performance': '#1f77b4', 'Non-Cheaters_experience': '#6baed6',
    'Partial-Cheaters_performance': '#e6550d', 'Partial-Cheaters_experience': '#fdae6b',
    'Full-Cheaters_performance': '#c34444', 'Full-Cheaters_experience': '#fe845f'
}

pe_df = prepare_plot_dataframe(perf_exp_results, pe_groups, MECHANISM_DISPLAY_NAMES, MECHANISM_ORDER)
if len(pe_df) > 0:
    pe_filtered = pe_df[pe_df.sum(axis=1) > 0]
    border_groups = [g for g in pe_groups if 'performance' in g]
    plot_horizontal_bars(pe_filtered, pe_groups, pe_colors,
                        'Mechanisms Better Predict Performance/Experience than Cheating',
                        border_groups=border_groups, threshold=0.1)

print("\n✓ Analysis complete!")

In [None]:
# =============================================================================
# SETUP: Define mechanisms and calculate baseline changes
# =============================================================================

# Define mechanisms
mechanisms = [
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration',
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    'cognitive_discomfort', 'moral_disengagement',
    'injunctive_norms', 'descriptive_norms', 
    'reference_group_identification', 'social_sanctions',
    'performance_accomplishments', 'vicarious_experience', 
    'verbal_persuasion', 'emotional_arousal'
]

# Rename mechanisms
mechanism_renames = {
    'descriptive_norms': 'perceived_descriptive_norms',
    'injunctive_norms': 'perceived_injunctive_norms', 
    'reference_group_identification': 'perceived_group_identification',
    'social_sanctions': 'perceived_social_sanctions',
    'performance_accomplishments': 'perceived_performance_accomplishments',
    'vicarious_experience': 'perceived_vicarious_experience',
    'verbal_persuasion': 'perceived_verbal_persuasion',
    'emotional_arousal': 'perceived_emotional_arousal'
}

pme_renames = {
    'PME_on_honest_task_completion': 'perceived_honesty',
    'PME_on_task_performance': 'perceived_performance_effect', 
    'PME_on_task_experience': 'perceived_experience_effect'
}

# Apply renames
df.rename(columns={**mechanism_renames, **pme_renames}, inplace=True)

# Update mechanisms list with renamed variables
mechanisms = [mechanism_renames.get(m, m) for m in mechanisms]

# Calculate control baseline
control_data = df[df['concept'] == 'control']
control_means = control_data[mechanisms].mean()
control_performance = control_data['performance'].mean()
control_experience = control_data['experience'].mean()

print(f"✓ Control baseline calculated (N={len(control_data)})")

# Calculate deltas
for mech in mechanisms:
    df[f'delta_{mech}'] = df[mech] - control_means[mech]

df['delta_performance'] = df['performance'] - control_performance
df['delta_experience'] = df['experience'] - control_experience

# Create intervention dataset
intervention_data = df[df['concept'] != 'control'].copy()
mechanism_delta_cols = [f'delta_{mech}' for mech in mechanisms]

print(f"✓ Intervention participants: {len(intervention_data)}")
print(f"✓ Mechanism changes calculated for {len(mechanisms)} mechanisms")

# Define concept-mechanism mapping
concept_mechanism_mapping = {
    'autonomy': ['autonomy_need_satisfaction', 'autonomy_need_frustration'],
    'competence': ['competence_need_satisfaction', 'competence_need_frustration'],
    'relatedness': ['relatedness_need_satisfaction', 'relatedness_need_frustration'],
    'cognitive_dissonance': ['cognitive_discomfort'],
    'moral_licensing': ['moral_disengagement'],
    'performance_accomplishments': ['perceived_performance_accomplishments'],
    'vicarious_experience': ['perceived_vicarious_experience'],
    'verbal_persuasion': ['perceived_verbal_persuasion'],
    'emotional_arousal': ['perceived_emotional_arousal'],
    'descriptive_norms': ['perceived_descriptive_norms'],
    'injunctive_norms': ['perceived_injunctive_norms'],
    'social_sanctions': ['perceived_social_sanctions'],
    'reference_group_identification': ['perceived_group_identification']
}

# Mechanism display mapping
mechanism_to_concept_map = {
    'autonomy_need_satisfaction': 'autonomy',
    'autonomy_need_frustration': 'autonomy',
    'competence_need_satisfaction': 'competence',
    'competence_need_frustration': 'competence',
    'relatedness_need_satisfaction': 'relatedness',
    'relatedness_need_frustration': 'relatedness',
    'cognitive_discomfort': 'cognitive_dissonance',
    'moral_disengagement': 'moral_licensing',
    'perceived_descriptive_norms': 'descriptive_norms',
    'perceived_injunctive_norms': 'injunctive_norms',
    'perceived_group_identification': 'reference_group_identification',
    'perceived_social_sanctions': 'social_sanctions',
    'perceived_performance_accomplishments': 'performance_accomplishments',
    'perceived_vicarious_experience': 'vicarious_experience',
    'perceived_verbal_persuasion': 'verbal_persuasion',
    'perceived_emotional_arousal': 'emotional_arousal'
}


In [None]:
# =============================================================================
# REUSABLE FUNCTION FOR COMPARISON TABLES
# =============================================================================



    """Display comparison table with proper mechanism-concept mapping"""
    
    print(f"\nCOMPARISON TABLE: MECHANISMS vs CONCEPTS FOR {outcome_name} PREDICTION")
    print("="*150)
    
    # Create header
    header = (f"{'Mechanism':<50} {'Concept':<35} "
             f"{'Non-Cheaters':^20} {'Partial-Cheaters':^20} {'Full-Cheaters':^20}")
    
    subheader = (f"{'':<85} "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}")
    
    print(header)
    print(subheader)
    print("-" * 150)
    
    # Sort and display results
    for mech_concept_pair, group_results in sorted(comparison_results.items()):
        mechanism, concept = mech_concept_pair
        mech_display = mechanism.replace('_', ' ').title()
        
        # Extract R² values and convert to percentage
        nc_mech = group_results.get('Non-Cheaters', {}).get('mechanism_r2', 0) * 100
        nc_conc = group_results.get('Non-Cheaters', {}).get('concept_r2', 0) * 100
        
        pc_mech = group_results.get('Partial-Cheaters', {}).get('mechanism_r2', 0) * 100
        pc_conc = group_results.get('Partial-Cheaters', {}).get('concept_r2', 0) * 100
        
        fc_mech = group_results.get('Full-Cheaters', {}).get('mechanism_r2', 0) * 100
        fc_conc = group_results.get('Full-Cheaters', {}).get('concept_r2', 0) * 100
        
        print(f"{mech_display:<50} {concept:<35} "
              f"{nc_mech:>8.2f}% {nc_conc:>8.2f}%  "
              f"{pc_mech:>8.2f}% {pc_conc:>8.2f}%  "
              f"{fc_mech:>8.2f}% {fc_conc:>8.2f}% ")
    
    print(f"\n✓ Complete: All {len(comparison_results)} mechanism-concept pairs compared")
    print("=" * 150 + "\n")

In [None]:
# =============================================================================
# GENERIC COMPARISON FUNCTION
# =============================================================================

def display_comparison_table(comparison_results, outcome_name):
    """Display comparison table - SINGLE DEFINITION"""
    print(f"\nCOMPARISON TABLE: MECHANISMS vs CONCEPTS FOR {outcome_name} PREDICTION")
    print("="*150)
    
    header = (f"{'Mechanism':<50} {'Concept':<35} "
             f"{'Non-Cheaters':^20} {'Partial-Cheaters':^20} {'Full-Cheaters':^20}")
    
    subheader = (f"{'':<85} "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}  "
                f"{'Mech R²':>9} {'Conc R²':>9}")
    
    print(header)
    print(subheader)
    print("-" * 150)
    
    for mech_concept_pair, group_results in sorted(comparison_results.items()):
        mechanism, concept = mech_concept_pair
        mech_display = mechanism.replace('_', ' ').title()
        
        nc_mech = group_results.get('Non-Cheaters', {}).get('mechanism_r2', 0) * 100
        nc_conc = group_results.get('Non-Cheaters', {}).get('concept_r2', 0) * 100
        pc_mech = group_results.get('Partial-Cheaters', {}).get('mechanism_r2', 0) * 100
        pc_conc = group_results.get('Partial-Cheaters', {}).get('concept_r2', 0) * 100
        fc_mech = group_results.get('Full-Cheaters', {}).get('mechanism_r2', 0) * 100
        fc_conc = group_results.get('Full-Cheaters', {}).get('concept_r2', 0) * 100
        
        print(f"{mech_display:<50} {concept:<35} "
              f"{nc_mech:>8.2f}% {nc_conc:>8.2f}%  "
              f"{pc_mech:>8.2f}% {pc_conc:>8.2f}%  "
              f"{fc_mech:>8.2f}% {fc_conc:>8.2f}% ")
    
    print(f"\n✓ Complete: All {len(comparison_results)} pairs compared")
    print("=" * 150 + "\n")

# Include the FIXED compare_mechanisms_vs_concepts() from FIX 1 above
# Include all visualization functions ONCE


In [None]:
# =============================================================================
# RQ3.1.1: CHEATING PREDICTION
# =============================================================================

print("\n" + "="*80)
print("RQ3.1.1: COMPARISON - MECHANISMS vs CONCEPTS FOR CHEATING PREDICTION")
print("="*80)

cheating_results = compare_mechanisms_vs_concepts('cheating_behavior')
display_comparison_table(cheating_results, "CHEATING")

In [None]:
# =============================================================================
# RQ3.2.1: PERFORMANCE PREDICTION
# =============================================================================

print("\n" + "="*80)
print("RQ3.2.1: COMPARISON - MECHANISMS vs CONCEPTS FOR PERFORMANCE PREDICTION")
print("="*80)

performance_results = compare_mechanisms_vs_concepts('delta_performance')
display_comparison_table(performance_results, "PERFORMANCE")


In [None]:
# =============================================================================
# RQ3.3.1: EXPERIENCE PREDICTION
# =============================================================================

print("\n" + "="*80)
print("RQ3.3.1: COMPARISON - MECHANISMS vs CONCEPTS FOR EXPERIENCE PREDICTION")
print("="*80)

experience_results = compare_mechanisms_vs_concepts('delta_experience')
display_comparison_table(experience_results, "EXPERIENCE")

In [None]:
# =============================================================================
# RQ3 VISUALIZATION: HORIZONTAL BAR PLOTS (FIXED FOR CORRECT DATA STRUCTURE)
# =============================================================================

import matplotlib.patches as mpatches
import matplotlib.patheffects as pe
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print("\n" + "="*80)
print("RQ3 VISUALIZATION: CREATING HORIZONTAL BAR PLOTS")
print("="*80)

def create_horizontal_bar_plots_for_rq3(cheating_results, performance_results, experience_results, mechanisms):
    """Create horizontal stacked bar plots with updated mechanism names
    
    Note: Results structure is {(mechanism, concept): {group: {mechanism_r2, concept_r2}}}
    """
    
    # Define mechanism display names
    mechanism_display_names = {
        'autonomy_need_satisfaction': 'Autonomy Need Satisfaction',
        'autonomy_need_frustration': 'Autonomy Need Frustration',
        'competence_need_satisfaction': 'Competence Need Satisfaction',
        'competence_need_frustration': 'Competence Need Frustration',
        'relatedness_need_satisfaction': 'Relatedness Need Satisfaction',
        'relatedness_need_frustration': 'Relatedness Need Frustration',
        'cognitive_discomfort': 'Cognitive Discomfort',
        'moral_disengagement': 'Moral Disengagement',
        'perceived_descriptive_norms': 'Perceived Descriptive Norms',
        'perceived_injunctive_norms': 'Perceived Injunctive Norms',
        'perceived_group_identification': 'Perceived Group Identification',
        'perceived_social_sanctions': 'Perceived Social Sanctions',
        'perceived_performance_accomplishments': 'Perceived Performance Accomplishments',
        'perceived_vicarious_experience': 'Perceived Vicarious Experience',
        'perceived_verbal_persuasion': 'Perceived Verbal Persuasion',
        'perceived_emotional_arousal': 'Perceived Emotional Arousal'
    }
    
    # Define mechanism order (ALL 16 mechanisms)
    mechanism_order = [
        'autonomy_need_satisfaction', 'autonomy_need_frustration',
        'competence_need_satisfaction', 'competence_need_frustration',
        'relatedness_need_satisfaction', 'relatedness_need_frustration',
        'cognitive_discomfort', 'moral_disengagement',
        'perceived_descriptive_norms', 'perceived_injunctive_norms',
        'perceived_group_identification', 'perceived_social_sanctions',
        'perceived_performance_accomplishments', 'perceived_vicarious_experience',
        'perceived_verbal_persuasion', 'perceived_emotional_arousal'
    ]
    
    def restructure_results(results):
        """Convert {(mechanism, concept): {group: data}} to {mechanism: {group: data}}
        by averaging across concepts for the same mechanism"""
        restructured = {}
        
        for (mechanism, concept), group_data in results.items():
            if mechanism not in restructured:
                restructured[mechanism] = {}
            
            for group, metrics in group_data.items():
                if group not in restructured[mechanism]:
                    restructured[mechanism][group] = []
                restructured[mechanism][group].append(metrics)
        
        # Average the metrics across concepts
        for mechanism in restructured:
            for group in restructured[mechanism]:
                metrics_list = restructured[mechanism][group]
                avg_mechanism_r2 = np.mean([m['mechanism_r2'] for m in metrics_list])
                avg_concept_r2 = np.mean([m['concept_r2'] for m in metrics_list])
                restructured[mechanism][group] = {
                    'mechanism_r2': avg_mechanism_r2,
                    'concept_r2': avg_concept_r2
                }
        
        return restructured
    
    def prepare_df(results, keys, display_names, mech_order):
        """Prepare dataframe for plotting with direction tracking"""
        data = []
        direction_data = {}
        
        for mech in mech_order:
            if mech not in results:
                continue
            row = {'mechanism': display_names.get(mech, mech.replace('_', ' ').title())}
            for key in keys:
                if key in results[mech]:
                    r2_val = results[mech][key]['mechanism_r2']
                    row[key] = r2_val
                    
                    # Determine direction (positive or negative)
                    # For now, we'll mark as unknown since direction isn't in the data
                    direction = 'unknown'
                    name = display_names.get(mech, mech.replace('_', ' ').title())
                    direction_data[(name, key)] = direction
                else:
                    row[key] = 0
            data.append(row)
        
        if len(data) == 0:
            print(f"Warning: No data found for keys {keys}")
            return pd.DataFrame()
        
        df = pd.DataFrame(data)
        
        # Set mechanism as index
        pivot = df.set_index('mechanism')
        
        # Reindex to maintain order
        display_order = [display_names.get(m, m.replace('_', ' ').title()) 
                        for m in mech_order if m in results]
        pivot = pivot.reindex(display_order)
        
        # Attach direction data to pivot table
        pivot._direction_data = direction_data
        
        return pivot
    
    def plot_horizontal_bar(pivot, keys, colors, title, show_values=True, border_keys=None, r2_threshold=0.1):
        """Create horizontal stacked bar plot"""
        if len(pivot) == 0:
            print(f"Warning: Empty pivot table for plot")
            return
            
        fig, ax = plt.subplots(figsize=(12, 10))
        y_pos = np.arange(len(pivot.index))
        left_positions = np.zeros(len(pivot.index))
        bar_height = 0.6
        border_keys = border_keys or []

        for key in keys:
            if key not in pivot.columns:
                continue
            vals = pivot[key].values
            bars = ax.barh(y_pos, vals, left=left_positions, height=bar_height,
                           color=colors[key], label=key.replace('_', ' ').replace('Cheaters', 'Cheater'),
                           alpha=0.8, edgecolor='none', linewidth=2)

            # Add inward border effect only if key is in border_keys
            if key in border_keys:
                for bar in bars:
                    ax.barh(
                        bar.get_y()+0.3, bar.get_width(), height=bar.get_height(),
                        left=bar.get_x(), facecolor="none",
                        edgecolor="black", 
                        linewidth=2, alpha=1,
                        zorder=2
                    )

            # Show values only if above threshold
            if show_values:
                for i, (bar, val) in enumerate(zip(bars, vals)):
                    if val >= r2_threshold:
                        text = f'{val:.2f}'
                        ax.text(left_positions[i] + val/2,
                                bar.get_y() + bar.get_height()/2,
                                text,
                                ha='center', va='center', fontweight='bold', fontsize=12)

            left_positions += vals

        # Create legend
        handles = []
        labels = []

        for key in keys:
            if key not in colors:
                continue
            facecolor = colors[key]
            if key in border_keys:
                patch = mpatches.Patch(facecolor=facecolor, edgecolor="black", linewidth=2, 
                                      label=key.replace('_', ' '))
            else:
                patch = mpatches.Patch(facecolor=facecolor, edgecolor="none", 
                                      label=key.replace('_', ' '))
            handles.append(patch)
            labels.append(key.replace('_', ' ').replace('Cheaters', 'Cheater'))

        ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, 1.05), 
                 ncol=3, fontsize=13)
        ax.set_yticks(y_pos)
        ax.set_yticklabels(pivot.index, fontsize=16)
        ax.set_ylabel('Psychological Mechanisms', fontsize=14, fontweight='bold')
        ax.set_title(title, fontsize=18, fontweight='bold', pad=40)
        ax.grid(True, axis='x', alpha=0.3)
        ax.set_xlim(0, max(left_positions)*1.1 if max(left_positions) > 0 else 0.1)
        ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        plt.tight_layout()
        plt.show()
    
    # Restructure results to have {mechanism: {group: metrics}} format
    cheating_restructured = restructure_results(cheating_results)
    performance_restructured = restructure_results(performance_results)
    experience_restructured = restructure_results(experience_results)
    
    # ===== PLOT 1: CHEATING BEHAVIOR PREDICTION =====
    print("Creating Plot 1: Cheating Behavior Prediction...")
    groups = ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']
    group_colors = {'Non-Cheaters': '#1f77b4', 'Partial-Cheaters': '#fdbf7a', 'Full-Cheaters': '#d6604d'}
    cheating_pivot = prepare_df(cheating_restructured, groups, mechanism_display_names, mechanism_order)
    
    # Filter to show only mechanisms with any predictive power
    if len(cheating_pivot) > 0:
        cheating_pivot_filtered = cheating_pivot[(cheating_pivot.sum(axis=1) > 0)]
        
        plot_horizontal_bar(cheating_pivot_filtered, groups, group_colors,
                            'Psychological Mechanisms Show Weak Predictive Power for Cheating Behavior:\n'
                            'Most mechanisms explain <2% of variance (R² < 0.02) in cheating decisions',
                            r2_threshold=0.01)

    # ===== PLOT 2: PERFORMANCE & EXPERIENCE PREDICTION =====
    print("Creating Plot 2: Performance & Experience Prediction...")
    
    # Combine performance and experience results with suffixed keys
    performance_experience_results = {}
    
    for mechanism in mechanism_order:
        performance_experience_results[mechanism] = {}
        
        # Add performance results with _performance suffix
        if mechanism in performance_restructured:
            for group in ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']:
                if group in performance_restructured[mechanism]:
                    performance_experience_results[mechanism][f'{group}_performance'] = \
                        performance_restructured[mechanism][group]
        
        # Add experience results with _experience suffix
        if mechanism in experience_restructured:
            for group in ['Non-Cheaters', 'Partial-Cheaters', 'Full-Cheaters']:
                if group in experience_restructured[mechanism]:
                    performance_experience_results[mechanism][f'{group}_experience'] = \
                        experience_restructured[mechanism][group]

    outcomes = ['Non-Cheaters_performance', 'Non-Cheaters_experience',
                'Partial-Cheaters_performance', 'Partial-Cheaters_experience',
                'Full-Cheaters_performance', 'Full-Cheaters_experience']
    
    pe_colors = {
        'Non-Cheaters_performance': '#1f77b4', 'Non-Cheaters_experience': '#6baed6',
        'Partial-Cheaters_performance': '#e6550d', 'Partial-Cheaters_experience': '#fdae6b',
        'Full-Cheaters_performance': '#c34444', 'Full-Cheaters_experience': '#fe845f'
    }
    
    pe_pivot = prepare_df(performance_experience_results, outcomes, mechanism_display_names, mechanism_order)

    # Keep only mechanisms with any non-zero predictive power
    if len(pe_pivot) > 0:
        pe_pivot_filtered = pe_pivot[(pe_pivot.sum(axis=1) > 0)]

        plot_horizontal_bar(pe_pivot_filtered, outcomes, pe_colors,
                            'Mechanisms Better Predict Performance/Experience than Cheating Behavior:\n'
                            'Several mechanisms show meaningful predictive power (R² > 0.1) for outcomes by group',
                            border_keys=[k for k in outcomes if 'performance' in k],
                            r2_threshold=0.1)

    print("✓ All visualization plots completed!")

# Execute the visualization with CORRECTED variable names
create_horizontal_bar_plots_for_rq3(
    cheating_results,  # Correct variable name
    performance_results,  # Correct variable name
    experience_results,  # Correct variable name
    mechanisms
)