In [1]:
# ============================================================================
# 0. SETUP AND DATA LOADING
# ============================================================================

import sys, os, warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from itertools import combinations, product
import matplotlib.patches as mpatches

os.chdir('game-behavior-analytics/data_analysis_notebook/')
sys.path.append(os.path.abspath('utils'))
warnings.filterwarnings('ignore')
os.getcwd()
from data_utils import load_and_prepare_data
from metadata import theory_order, theory_map

# Load and preprocess data
df, concepts = load_and_prepare_data("../data/final_dataset.csv")

print("="*80)
print("RQ3: HOW DO INTERVENTIONS REDUCE CHEATING WITHOUT HARMING PERFORMANCE AND EXPERIENCE?")
print("SYSTEMATIC ANALYSIS OF PSYCHOLOGICAL MECHANISMS")
print("="*80)
print(f"Sample size: {len(df)}")
print()


RQ3: HOW DO INTERVENTIONS REDUCE CHEATING WITHOUT HARMING PERFORMANCE AND EXPERIENCE?
SYSTEMATIC ANALYSIS OF PSYCHOLOGICAL MECHANISMS
Sample size: 1232



In [9]:
# ============================================================================
# 1. DATA TYPE CLASSIFICATION AND PREPARATION
# ============================================================================

print("1. DATA TYPE CLASSIFICATION")
print("-" * 40)

# Define data types and variables
data_types = {
    'ordinal': ['cheating_behavior'],  # 0=non, 1=partial, 2=full
    'categorical': ['concept'],  # intervention concepts + control
    'continuous': []  # will be populated below
}

mechanism_renames = {
    'descriptive_norms': 'perceived_descriptive_norms',
    'injunctive_norms': 'perceived_injunctive_norms', 
    'reference_group_identification': 'perceived_group_identification',
    'social_sanctions': 'perceived_social_sanctions',
    'performance_accomplishments': 'perceived_performance_accomplishments',
    'vicarious_experience': 'perceived_vicarious_experience',
    'verbal_persuasion': 'perceived_verbal_persuasion',
    'emotional_arousal': 'perceived_emotional_arousal'
}

# Rename PME variables for clarity
pme_renames = {
    'PME_on_honest_task_completion': 'perceived_honesty',
    'PME_on_task_performance': 'perceived_performance_effect', 
    'PME_on_task_experience': 'perceived_experience_effect'
}

# Apply all renames to the dataframe
all_renames = {**mechanism_renames, **pme_renames}
df = df.rename(columns=all_renames)

print("Renamed variables:")
for old_name, new_name in all_renames.items():
    if old_name in df.columns or new_name in df.columns:
        print(f"  {old_name} → {new_name}")


# Updated mechanism list with new names
mechanisms = [
    'autonomy_need_satisfaction', 'autonomy_need_frustration',
    'competence_need_satisfaction', 'competence_need_frustration', 
    'relatedness_need_satisfaction', 'relatedness_need_frustration',
    'cognitive_discomfort', 'moral_disengagement',
    'perceived_descriptive_norms', 'perceived_injunctive_norms', 
    'perceived_group_identification', 'perceived_social_sanctions',
    'perceived_performance_accomplishments', 'perceived_vicarious_experience',
    'perceived_verbal_persuasion', 'perceived_emotional_arousal',
    'perceived_honesty', 'perceived_performance_effect',
    'perceived_experience_effect', 'word_creation_skill_level'
]

perceived_outcomes = [
    'PME_on_honest_task_completion', 'PME_on_task_performance', 'PME_on_task_experience'
]

actual_outcomes = ['cheating_behavior', 'performance', 'experience']

# All continuous variables
continuous_vars = [var for var in mechanisms if var in df.columns] + ['performance', 'experience']
data_types['continuous'] = continuous_vars

# Handle perceived_ability mapping - word_creation_skill_level IS perceived_ability
if 'word_creation_skill_level' in df.columns:
    # Rename the column to avoid confusion
    df['perceived_ability'] = df['word_creation_skill_level']
    # Remove word_creation_skill_level from mechanisms list to avoid duplication
    if 'word_creation_skill_level' in mechanisms:
        mechanisms.remove('word_creation_skill_level')
    mechanisms.append('perceived_ability')
    continuous_vars.append('perceived_ability')

print(f"Ordinal variables: {data_types['ordinal']}")
print(f"Categorical variables: {data_types['categorical']}")
print(f"Continuous variables: {len(data_types['continuous'])} variables")
print()

1. DATA TYPE CLASSIFICATION
----------------------------------------
Renamed variables:
  descriptive_norms → perceived_descriptive_norms
  injunctive_norms → perceived_injunctive_norms
  reference_group_identification → perceived_group_identification
  social_sanctions → perceived_social_sanctions
  performance_accomplishments → perceived_performance_accomplishments
  vicarious_experience → perceived_vicarious_experience
  verbal_persuasion → perceived_verbal_persuasion
  emotional_arousal → perceived_emotional_arousal
  PME_on_honest_task_completion → perceived_honesty
  PME_on_task_performance → perceived_performance_effect
  PME_on_task_experience → perceived_experience_effect
Ordinal variables: ['cheating_behavior']
Categorical variables: ['concept']
Continuous variables: 23 variables



In [10]:
# ============================================================================
# 2. THEORETICAL FRAMEWORK MAPPING
# ============================================================================

print("2. THEORETICAL FRAMEWORK MAPPING")
print("-" * 40)

# Define theoretical frameworks and their mechanisms
theoretical_frameworks = {
    'Self-Determination': {
        'concepts': ['autonomy', 'competence', 'relatedness'],
        'mechanisms': ['autonomy_need_satisfaction', 'autonomy_need_frustration',
                      'competence_need_satisfaction', 'competence_need_frustration',
                      'relatedness_need_satisfaction', 'relatedness_need_frustration']
    },
    'Cognitive-Dissonance': {
        'concepts': ['self_concept', 'cognitive_inconsistency', 'dissonance_arousal', 'dissonance_reduction'],
        'mechanisms': ['cognitive_discomfort', 'moral_disengagement']
    },
    'Self-Efficacy': {
        'concepts': ['performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal'],
        'mechanisms': ['perceived_performance_accomplishments', 'perceived_vicarious_experience', 
                      'perceived_verbal_persuasion', 'perceived_emotional_arousal']
    },
    'Social-Norms': {
        'concepts': ['descriptive_norms', 'injunctive_norms', 'social_sanctions', 'reference_group_identification'],
        'mechanisms': ['perceived_descriptive_norms', 'perceived_injunctive_norms', 
                      'perceived_group_identification', 'perceived_social_sanctions']
    },
    'Perceived-Effectiveness': {
        'concepts': [],  # No direct concepts
        'mechanisms': ['perceived_honesty', 'perceived_performance_effect', 'perceived_experience_effect']
    },
    'Individual-Differences': {
        'concepts': [],
        'mechanisms': ['perceived_ability']
    }
}

# Create concept-to-theory and mechanism-to-theory mappings
concept_to_theory = {}
mechanism_to_theory = {}

for theory, items in theoretical_frameworks.items():
    for concept in items['concepts']:
        concept_to_theory[concept] = theory
    for mechanism in items['mechanisms']:
        mechanism_to_theory[mechanism] = theory

print("Theoretical frameworks defined:")
for theory, items in theoretical_frameworks.items():
    print(f"  {theory}: {len(items['concepts'])} concepts, {len(items['mechanisms'])} mechanisms")
print()

2. THEORETICAL FRAMEWORK MAPPING
----------------------------------------
Theoretical frameworks defined:
  Self-Determination: 3 concepts, 6 mechanisms
  Cognitive-Dissonance: 4 concepts, 2 mechanisms
  Self-Efficacy: 4 concepts, 4 mechanisms
  Social-Norms: 4 concepts, 4 mechanisms
  Perceived-Effectiveness: 0 concepts, 3 mechanisms
  Individual-Differences: 0 concepts, 1 mechanisms



In [11]:
# ============================================================================
# 3. PARTIAL CORRELATION CALCULATION FUNCTION
# ============================================================================

def calculate_partial_correlation(data, var1, var2, control_vars):
    """
    Calculate partial correlation between var1 and var2, controlling for control_vars
    """
    try:
        # Prepare data
        vars_needed = [var1, var2] + control_vars
        clean_data = data[vars_needed].dropna()
        
        if len(clean_data) < 10:  # Minimum sample size
            return np.nan
        
        # Standardize all variables
        scaler = StandardScaler()
        scaled_data = pd.DataFrame(
            scaler.fit_transform(clean_data),
            columns=clean_data.columns
        )
        
        # Residualize var1 and var2 against control variables
        if len(control_vars) > 0:
            X_controls = scaled_data[control_vars].values
            
            # Residualize var1
            reg1 = LinearRegression().fit(X_controls, scaled_data[var1])
            resid1 = scaled_data[var1] - reg1.predict(X_controls)
            
            # Residualize var2
            reg2 = LinearRegression().fit(X_controls, scaled_data[var2])
            resid2 = scaled_data[var2] - reg2.predict(X_controls)
            
            # Calculate correlation between residuals
            if len(np.unique(resid1)) > 1 and len(np.unique(resid2)) > 1:
                return np.corrcoef(resid1, resid2)[0, 1]
            else:
                return np.nan
        else:
            # No control variables - simple correlation
            return scaled_data[var1].corr(scaled_data[var2])
    
    except Exception as e:
        return np.nan

In [12]:
# ============================================================================
# 4. CREATE CONCEPT DUMMY VARIABLES
# ============================================================================

print("3. CREATING CONCEPT DUMMY VARIABLES")
print("-" * 40)

# Create dummy variables for each concept
available_concepts = [c for c in theoretical_frameworks['Self-Determination']['concepts'] + 
                     theoretical_frameworks['Cognitive-Dissonance']['concepts'] + 
                     theoretical_frameworks['Self-Efficacy']['concepts'] + 
                     theoretical_frameworks['Social-Norms']['concepts'] 
                     if c in df['concept'].unique()]

for concept in available_concepts:
    df[f'concept_{concept}'] = (df['concept'] == concept).astype(int)

concept_dummies = [f'concept_{c}' for c in available_concepts]
print(f"Created {len(concept_dummies)} concept dummy variables")
print()

3. CREATING CONCEPT DUMMY VARIABLES
----------------------------------------
Created 15 concept dummy variables



In [13]:
# ============================================================================
# 5. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS
# ============================================================================

def comprehensive_partial_correlation_analysis(data, group_name="All"):
    """
    Calculate partial correlations for all relevant variable combinations
    """
    print(f"4. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS - {group_name.upper()}")
    print("-" * 60)
    
    # Available variables
    available_mechanisms = [m for m in mechanisms if m in data.columns and not pd.isna(data[m]).all()]
    available_outcomes = [o for o in actual_outcomes if o in data.columns and not pd.isna(data[o]).all()]
    available_concepts = [c for c in concept_dummies if c in data.columns]
    
    all_edges = []
    
    # 1. CONCEPT-TO-MECHANISM EDGES
    print(f"Calculating Concept → Mechanism edges...")
    for concept_dummy in available_concepts:
        concept_name = concept_dummy.replace('concept_', '')
        for mechanism in available_mechanisms:
            # Control for other concepts and outcomes (but not other mechanisms)
            control_vars = [c for c in available_concepts if c != concept_dummy] + available_outcomes
            control_vars = [v for v in control_vars if v in data.columns]
            
            pcorr = calculate_partial_correlation(data, concept_dummy, mechanism, control_vars)
            
            if not np.isnan(pcorr):
                all_edges.append({
                    'source': concept_name,
                    'target': mechanism,
                    'partial_correlation': pcorr,
                    'edge_type': 'concept_to_mechanism',
                    'group': group_name
                })
    
    # 2. MECHANISM-TO-MECHANISM EDGES
    print(f"Calculating Mechanism ↔ Mechanism edges...")
    for mech1, mech2 in combinations(available_mechanisms, 2):
        # Control for concepts and outcomes
        control_vars = available_concepts + available_outcomes
        control_vars = [v for v in control_vars if v in data.columns]
        
        pcorr = calculate_partial_correlation(data, mech1, mech2, control_vars)
        
        if not np.isnan(pcorr):
            all_edges.append({
                'source': mech1,
                'target': mech2,
                'partial_correlation': pcorr,
                'edge_type': 'mechanism_to_mechanism',
                'group': group_name
            })
    
    # 3. MECHANISM-TO-OUTCOME EDGES
    print(f"Calculating Mechanism → Outcome edges...")
    for mechanism in available_mechanisms:
        for outcome in available_outcomes:
            if mechanism == outcome:  # Skip self-loops
                continue
                
            # Control for concepts and other outcomes
            control_vars = available_concepts + [o for o in available_outcomes if o != outcome]
            control_vars = [v for v in control_vars if v in data.columns]
            
            pcorr = calculate_partial_correlation(data, mechanism, outcome, control_vars)
            
            if not np.isnan(pcorr):
                all_edges.append({
                    'source': mechanism,
                    'target': outcome,
                    'partial_correlation': pcorr,
                    'edge_type': 'mechanism_to_outcome',
                    'group': group_name
                })
    
    # 4. OUTCOME-TO-OUTCOME EDGES
    print(f"Calculating Outcome ↔ Outcome edges...")
    for outcome1, outcome2 in combinations(available_outcomes, 2):
        # Control for concepts and mechanisms
        control_vars = available_concepts + available_mechanisms
        control_vars = [v for v in control_vars if v in data.columns]
        
        pcorr = calculate_partial_correlation(data, outcome1, outcome2, control_vars)
        
        if not np.isnan(pcorr):
            all_edges.append({
                'source': outcome1,
                'target': outcome2,
                'partial_correlation': pcorr,
                'edge_type': 'outcome_to_outcome',
                'group': group_name
            })
    
    print(f"Total edges calculated: {len(all_edges)}")
    return pd.DataFrame(all_edges)

In [14]:
# ============================================================================
# 6. CALCULATE PARTIAL CORRELATIONS FOR ALL GROUPS
# ============================================================================

# Calculate for overall sample
all_edges_df = comprehensive_partial_correlation_analysis(df, "All")

# Calculate for each cheating group
cheating_groups = {
    'non_cheaters': df[df['cheating_behavior'] == 0],
    'partial_cheaters': df[df['cheating_behavior'] == 1], 
    'full_cheaters': df[df['cheating_behavior'] == 2]
}

group_edges = []
for group_name, group_data in cheating_groups.items():
    if len(group_data) > 20:  # Minimum sample size
        group_df = comprehensive_partial_correlation_analysis(group_data, group_name)
        group_edges.append(group_df)

# Combine all group analyses
if group_edges:
    combined_edges = pd.concat([all_edges_df] + group_edges, ignore_index=True)
else:
    combined_edges = all_edges_df

print(f"\nCOMBINED RESULTS SUMMARY:")
print(f"Total edges across all groups: {len(combined_edges)}")
print("Edges by type:")
print(combined_edges.groupby(['edge_type', 'group']).size())

# Save to CSV
combined_edges.to_csv('comprehensive_partial_correlations.csv', index=False)
print(f"\nSaved to: comprehensive_partial_correlations.csv")
print()


4. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS - ALL
------------------------------------------------------------
Calculating Concept → Mechanism edges...
Calculating Mechanism ↔ Mechanism edges...
Calculating Mechanism → Outcome edges...
Calculating Outcome ↔ Outcome edges...
Total edges calculated: 553
4. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS - NON_CHEATERS
------------------------------------------------------------
Calculating Concept → Mechanism edges...
Calculating Mechanism ↔ Mechanism edges...
Calculating Mechanism → Outcome edges...
Calculating Outcome ↔ Outcome edges...
Total edges calculated: 531
4. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS - PARTIAL_CHEATERS
------------------------------------------------------------
Calculating Concept → Mechanism edges...
Calculating Mechanism ↔ Mechanism edges...
Calculating Mechanism → Outcome edges...
Calculating Outcome ↔ Outcome edges...
Total edges calculated: 531
4. COMPREHENSIVE PARTIAL CORRELATION ANALYSIS - FULL_CHEATERS
-

In [22]:
# ============================================================================
# 7. RQ3.1: MECHANISM ACTIVATION ANALYSIS
# ============================================================================

def analyze_activation(edges_df, expected_map, group="All"):
    """Analyze mechanism activation for given group"""
    concept_edges = edges_df[(edges_df['edge_type']=='concept_to_mechanism') & (edges_df['group']==group)]
    results = []
    
    for concept, expected_mechs in expected_map.items():
        c_edges = concept_edges[concept_edges['source']==concept]
        if len(c_edges)==0: continue
        
        print(f"\n{concept.upper().replace('_',' ')} CONCEPT:")
        print("-"*40)
        
        for mech in expected_mechs:
            match = c_edges[c_edges['target']==mech]
            if len(match)>0:
                pcorr = match.iloc[0]['partial_correlation']
                activated = abs(pcorr)>0.1
                print(f"  → {mech}: r={pcorr:.3f} {'✓ ACTIVATED' if activated else '✗ weak'}")
                results.append({'concept':concept,'mechanism':mech,'correlation':pcorr,'activated':activated,'group':group})
        
        # Unexpected activations
        unexpected = c_edges[(~c_edges['target'].isin(expected_mechs)) & (c_edges['partial_correlation'].abs()>0.1)]
        if len(unexpected)>0:
            print("  Unexpected strong activations:")
            for _,e in unexpected.head(3).iterrows():
                print(f"    → {e['target']}: r={e['partial_correlation']:.3f}")
    return results

print("7a. RQ3.1: DO INTERVENTIONS ACTIVATE THEIR INTENDED MECHANISMS?")
print("="*70)

# Expected relationships (reuse from previous analysis)
expected_relationships = {
    'autonomy':['autonomy_need_satisfaction','autonomy_need_frustration'],
    'competence':['competence_need_satisfaction','competence_need_frustration'],
    'relatedness':['relatedness_need_satisfaction','relatedness_need_frustration'],
    'self_concept':['cognitive_discomfort'],'cognitive_inconsistency':['cognitive_discomfort'],
    'dissonance_arousal':['cognitive_discomfort'],'dissonance_reduction':['cognitive_discomfort'],
    'performance_accomplishments':['performance_accomplishments'],'vicarious_experience':['vicarious_experience'],
    'verbal_persuasion':['verbal_persuasion'],'emotional_arousal':['emotional_arousal'],
    'descriptive_norms':['descriptive_norms'],'injunctive_norms':['injunctive_norms'],
    'social_sanctions':['social_sanctions'],'reference_group_identification':['reference_group_identification']
}

# Overall activation analysis
overall_results = analyze_activation(combined_edges, expected_relationships, "All")

# Group-wise activation analysis
print("\n7b. RQ3.1 BY CHEATER GROUP: DIFFERENTIAL MECHANISM ACTIVATION")
print("="*70)

group_results = {}
for group in ['non_cheaters','partial_cheaters','full_cheaters']:
    print(f"\n{group.replace('_',' ').upper()} GROUP:")
    group_results[group] = analyze_activation(combined_edges, expected_relationships, group)

# Activation rate summary with average correlations
print(f"\nACTIVATION RATE COMPARISON:")
print("-"*40)
for group, results in group_results.items():
    if results:
        activated = sum(1 for r in results if r['activated'])
        total = len(results)
        avg_corr = np.mean([abs(r['correlation']) for r in results])
        avg_activated_corr = np.mean([abs(r['correlation']) for r in results if r['activated']])
        print(f"{group.replace('_',' ').title()}: {activated}/{total} ({activated/total:.1%}) activation rate")
        print(f"  Average |correlation|: {avg_corr:.3f} (activated only: {avg_activated_corr:.3f})")

print()


7a. RQ3.1: DO INTERVENTIONS ACTIVATE THEIR INTENDED MECHANISMS?

AUTONOMY CONCEPT:
----------------------------------------
  → autonomy_need_satisfaction: r=0.025 ✗ weak
  → autonomy_need_frustration: r=-0.014 ✗ weak

COMPETENCE CONCEPT:
----------------------------------------
  → competence_need_satisfaction: r=0.059 ✗ weak
  → competence_need_frustration: r=-0.091 ✗ weak
  Unexpected strong activations:
    → perceived_emotional_arousal: r=0.108

RELATEDNESS CONCEPT:
----------------------------------------
  → relatedness_need_satisfaction: r=0.020 ✗ weak
  → relatedness_need_frustration: r=-0.010 ✗ weak

SELF CONCEPT CONCEPT:
----------------------------------------
  → cognitive_discomfort: r=-0.055 ✗ weak

COGNITIVE INCONSISTENCY CONCEPT:
----------------------------------------
  → cognitive_discomfort: r=0.001 ✗ weak

DISSONANCE AROUSAL CONCEPT:
----------------------------------------
  → cognitive_discomfort: r=0.011 ✗ weak

DISSONANCE REDUCTION CONCEPT:
-------------------

In [None]:
# ============================================================================
# 8. RQ3.2: MECHANISM INTERCONNECTIONS
# ============================================================================

def analyze_interconnections(edges_df, group="All", show_top=20):
    """Analyze mechanism interconnections"""
    mech_edges = edges_df[(edges_df['edge_type']=='mechanism_to_mechanism') & (edges_df['group']==group)]
    
    # Filter to theoretical mechanisms only
    theory_mechs = sum([v['mechanisms'] for k,v in theoretical_frameworks.items() 
                       if k not in ['Perceived-Effectiveness','Individual-Differences']],[])
    mech_edges = mech_edges[(mech_edges['source'].isin(theory_mechs)) & 
                           (mech_edges['target'].isin(theory_mechs))]
    
    # Classify connections
    mech_edges['source_theory'] = mech_edges['source'].map(mechanism_to_theory)
    mech_edges['target_theory'] = mech_edges['target'].map(mechanism_to_theory)
    mech_edges['conn_type'] = mech_edges.apply(
        lambda r: 'within' if r['source_theory']==r['target_theory'] else 'cross', axis=1)
    
    # Add absolute correlation column for sorting
    mech_edges['abs_corr'] = mech_edges['partial_correlation'].abs()
    
    within = mech_edges[mech_edges['conn_type']=='within'].nlargest(show_top,'abs_corr')
    cross = mech_edges[mech_edges['conn_type']=='cross'].nlargest(show_top,'abs_corr')
    
    return {'within':len(mech_edges[mech_edges['conn_type']=='within']),
            'cross':len(mech_edges[mech_edges['conn_type']=='cross']),
            'within_top':within,'cross_top':cross}

print("8a. RQ3.2: HOW DO THEORETICAL MECHANISMS INTERCONNECT?")
print("="*70)

# Overall interconnection analysis
overall_interconn = analyze_interconnections(combined_edges,"All")
print(f"Theoretical mechanism interconnections:")
print(f"  Within-theory: {overall_interconn['within']} (avg |r|: {overall_interconn['within_top']['partial_correlation'].abs().mean():.3f})")
print(f"  Cross-theory: {overall_interconn['cross']} (avg |r|: {overall_interconn['cross_top']['partial_correlation'].abs().mean():.3f})")

print(f"\nSTRONGEST WITHIN-THEORY CONNECTIONS:")
for _,e in overall_interconn['within_top'].iterrows():
    print(f"  {e['source']} ↔ {e['target']}: r={e['partial_correlation']:+.3f} ({e['source_theory']})")

print(f"\nSTRONGEST CROSS-THEORY CONNECTIONS:")
for _,e in overall_interconn['cross_top'].iterrows():
    print(f"  {e['source']} ({e['source_theory']}) ↔ {e['target']} ({e['target_theory']}): r={e['partial_correlation']:+.3f}")

print("\n\n8b. RQ3.2 BY CHEATER GROUP: DIFFERENTIAL MECHANISM INTERCONNECTIONS")
print("=" * 70)

def analyze_mechanism_interconnections_by_group():
    """Analyze how mechanism interconnections differ across cheater groups"""
    
    try:
        combined_edges = pd.read_csv('comprehensive_partial_correlations.csv')
    except:
        print("Error: comprehensive_partial_correlations.csv not found")
        return
    
    # Define theoretical mechanisms (excluding perceived effectiveness)
    theoretical_mechanisms = [
        'autonomy_need_satisfaction', 'autonomy_need_frustration',
        'competence_need_satisfaction', 'competence_need_frustration',
        'relatedness_need_satisfaction', 'relatedness_need_frustration',
        'cognitive_discomfort', 'moral_disengagement',
        'performance_accomplishments', 'vicarious_experience', 'verbal_persuasion', 'emotional_arousal',
        'descriptive_norms', 'injunctive_norms', 'reference_group_identification', 'social_sanctions'
    ]
    
    # Theory mapping for mechanisms (update based on actual mechanism names)
    mechanism_to_theory = {}
    
    # Map mechanisms to theories based on naming patterns
    for mech in theoretical_mechanisms:
        if any(term in mech for term in ['autonomy', 'competence', 'relatedness']):
            mechanism_to_theory[mech] = 'Self-Determination'
        elif any(term in mech for term in ['cognitive_discomfort', 'moral_disengagement', 'dissonance']):
            mechanism_to_theory[mech] = 'Cognitive-Dissonance'
        elif any(term in mech for term in ['performance_accomplishments', 'vicarious_experience', 
                                         'verbal_persuasion', 'emotional_arousal']):
            mechanism_to_theory[mech] = 'Self-Efficacy'
        elif any(term in mech for term in ['descriptive_norms', 'injunctive_norms', 
                                         'reference_group', 'social_sanctions']):
            mechanism_to_theory[mech] = 'Social-Norms'
        else:
            mechanism_to_theory[mech] = 'Other'
    
    print(f"\nMechanism-to-theory mapping:")
    for theory in ['Self-Determination', 'Cognitive-Dissonance', 'Self-Efficacy', 'Social-Norms', 'Other']:
        theory_mechs = [m for m, t in mechanism_to_theory.items() if t == theory]
        if theory_mechs:
            print(f"  {theory}: {len(theory_mechs)} mechanisms")
    
    groups = ['non_cheaters', 'partial_cheaters', 'full_cheaters']
    group_interconnection_summary = {}
    
    for group in groups:
        print(f"\n{group.replace('_', ' ').upper()} GROUP INTERCONNECTIONS:")
        print("-" * 50)
        
        # Filter mechanism-to-mechanism edges for this group
        group_edges = combined_edges[
            (combined_edges['group'] == group) & 
            (combined_edges['edge_type'] == 'mechanism_to_mechanism') &
            (combined_edges['source'].isin(theoretical_mechanisms)) &
            (combined_edges['target'].isin(theoretical_mechanisms))
        ].copy()
        
        if len(group_edges) == 0:
            print("No mechanism-to-mechanism edges found for this group")
            continue
        
        # Classify connections
        group_edges['source_theory'] = group_edges['source'].map(mechanism_to_theory)
        group_edges['target_theory'] = group_edges['target'].map(mechanism_to_theory)
        group_edges['connection_type'] = group_edges.apply(
            lambda row: 'within_theory' if row['source_theory'] == row['target_theory'] else 'cross_theory', 
            axis=1
        )
        
        within_theory = group_edges[group_edges['connection_type'] == 'within_theory']
        cross_theory = group_edges[group_edges['connection_type'] == 'cross_theory']
        
        print(f"Within-theory connections: {len(within_theory)}")
        print(f"Cross-theory connections: {len(cross_theory)}")
        
        # Show strongest within-theory connections
        if len(within_theory) > 0:
            print(f"\nSTRONGEST WITHIN-THEORY CONNECTIONS:")
            within_theory['abs_corr'] = within_theory['partial_correlation'].abs()
            within_theory_top = within_theory.nlargest(5, 'abs_corr')
            for _, edge in within_theory_top.iterrows():
                print(f"  {edge['source']} ↔ {edge['target']}: r = {edge['partial_correlation']:+.3f} ({edge['source_theory']})")
        
        # Show strongest cross-theory connections
        if len(cross_theory) > 0:
            print(f"\nSTRONGEST CROSS-THEORY CONNECTIONS:")
            cross_theory['abs_corr'] = cross_theory['partial_correlation'].abs()
            cross_theory_top = cross_theory.nlargest(5, 'abs_corr')
            for _, edge in cross_theory_top.iterrows():
                print(f"  {edge['source']} ({edge['source_theory']}) ↔ {edge['target']} ({edge['target_theory']}): r = {edge['partial_correlation']:+.3f}")
        
        # Store summary
        group_interconnection_summary[group] = {
            'within_theory': len(within_theory),
            'cross_theory': len(cross_theory),
            'total': len(group_edges),
            'cross_theory_ratio': len(cross_theory) / len(group_edges) if len(group_edges) > 0 else 0
        }
    
    # Compare interconnection patterns
    print(f"\nINTERCONNECTION PATTERN COMPARISON:")
    print("-" * 40)
    for group, summary in group_interconnection_summary.items():
        if summary['total'] > 0:
            print(f"{group.replace('_', ' ').title()}: {summary['cross_theory']}/{summary['total']} "
                  f"({summary['cross_theory_ratio']:.1%}) cross-theory connections")
        else:
            print(f"{group.replace('_', ' ').title()}: No mechanism connections found")
    
    return group_interconnection_summary

group_interconnection_results = analyze_mechanism_interconnections_by_group()

8a. RQ3.2: HOW DO THEORETICAL MECHANISMS INTERCONNECT?
Theoretical mechanism interconnections:
  Within-theory: 28 (avg |r|: 0.557)
  Cross-theory: 92 (avg |r|: 0.512)

STRONGEST WITHIN-THEORY CONNECTIONS:
  perceived_vicarious_experience ↔ perceived_verbal_persuasion: r=+0.810 (Self-Efficacy)
  perceived_verbal_persuasion ↔ perceived_emotional_arousal: r=+0.785 (Self-Efficacy)
  perceived_group_identification ↔ perceived_social_sanctions: r=+0.763 (Social-Norms)
  perceived_performance_accomplishments ↔ perceived_verbal_persuasion: r=+0.719 (Self-Efficacy)
  perceived_vicarious_experience ↔ perceived_emotional_arousal: r=+0.699 (Self-Efficacy)
  perceived_descriptive_norms ↔ perceived_group_identification: r=+0.681 (Social-Norms)
  perceived_descriptive_norms ↔ perceived_injunctive_norms: r=+0.661 (Social-Norms)
  perceived_descriptive_norms ↔ perceived_social_sanctions: r=+0.640 (Social-Norms)
  perceived_performance_accomplishments ↔ perceived_emotional_arousal: r=+0.624 (Self-Effic

In [30]:
# ============================================================================
# 9. RQ3.3: OUTCOME PREDICTORS
# ============================================================================

def analyze_predictors(edges_df, group="All", show_top=8):
    """Analyze mechanism-outcome predictors"""
    mech_out = edges_df[(edges_df['edge_type']=='mechanism_to_outcome') & 
                       (edges_df['group']==group) & 
                       (edges_df['target'].isin(['cheating_behavior','performance','experience']))]
    
    predictors = {}
    for outcome in ['cheating_behavior','performance','experience']:
        out_edges = mech_out[mech_out['target']==outcome]
        if len(out_edges)>0:
            out_edges['abs_corr'] = out_edges['partial_correlation'].abs()
            top_pred = out_edges.nlargest(show_top,'abs_corr')
            predictors[outcome] = top_pred
    return predictors

print("9a. RQ3.3: WHICH MECHANISMS MOST STRONGLY PREDICT OUTCOMES?")
print("="*70)

# Overall predictor analysis
overall_pred = analyze_predictors(combined_edges,"All")
print("Mechanism → Actual Outcome relationships:")
for outcome,preds in overall_pred.items():
    if len(preds)>0:
        avg_pred_strength = preds['partial_correlation'].abs().mean()
        print(f"\nSTRONGEST PREDICTORS OF {outcome.upper()} (avg |r|: {avg_pred_strength:.3f}):")
        for _,e in preds.iterrows():
            theory = mechanism_to_theory.get(e['source'],'Unknown')
            print(f"  {e['source']}: r={e['partial_correlation']:+.3f} ({theory})")

print("\n\n9b. RQ3.3 BY CHEATER GROUP: DIFFERENTIAL OUTCOME PREDICTORS")
print("=" * 70)

def analyze_outcome_predictors_by_group():
    """Analyze how mechanism-outcome relationships vary across cheater groups"""
    
    try:
        combined_edges = pd.read_csv('comprehensive_partial_correlations.csv')
    except:
        print("Error: comprehensive_partial_correlations.csv not found")
        return
    
    # Theory mapping for mechanisms
    mechanism_to_theory = {
        'autonomy_need_satisfaction': 'Self-Determination', 'autonomy_need_frustration': 'Self-Determination',
        'competence_need_satisfaction': 'Self-Determination', 'competence_need_frustration': 'Self-Determination',
        'relatedness_need_satisfaction': 'Self-Determination', 'relatedness_need_frustration': 'Self-Determination',
        'cognitive_discomfort': 'Cognitive-Dissonance', 'moral_disengagement': 'Cognitive-Dissonance',
        'performance_accomplishments': 'Self-Efficacy', 'vicarious_experience': 'Self-Efficacy',
        'verbal_persuasion': 'Self-Efficacy', 'emotional_arousal': 'Self-Efficacy',
        'descriptive_norms': 'Social-Norms', 'injunctive_norms': 'Social-Norms',
        'reference_group_identification': 'Social-Norms', 'social_sanctions': 'Social-Norms',
        'PME_on_honest_task_completion': 'Perceived-Effectiveness',
        'PME_on_task_performance': 'Perceived-Effectiveness',
        'PME_on_task_experience': 'Perceived-Effectiveness'
    }
    
    groups = ['non_cheaters', 'partial_cheaters', 'full_cheaters']
    outcomes = ['cheating_behavior', 'performance', 'experience']
    group_predictor_summary = {}
    
    for group in groups:
        print(f"\n{group.replace('_', ' ').upper()} GROUP PREDICTORS:")
        print("-" * 50)
        
        group_predictors = {}
        
        for outcome in outcomes:
            # Filter mechanism-to-outcome edges for this group and outcome
            outcome_edges = combined_edges[
                (combined_edges['group'] == group) & 
                (combined_edges['edge_type'] == 'mechanism_to_outcome') &
                (combined_edges['target'] == outcome)
            ].copy()
            
            if len(outcome_edges) > 0:
                # Sort by absolute correlation and show top predictors
                outcome_edges['abs_correlation'] = outcome_edges['partial_correlation'].abs()
                top_predictors = outcome_edges.nlargest(8, 'abs_correlation')
                
                print(f"\nSTRONGEST PREDICTORS OF {outcome.upper()}:")
                for _, edge in top_predictors.iterrows():
                    theory = mechanism_to_theory.get(edge['source'], 'Unknown')
                    print(f"  {edge['source']}: r = {edge['partial_correlation']:+.3f} ({theory})")
                
                group_predictors[outcome] = top_predictors.to_dict('records')
        
        group_predictor_summary[group] = group_predictors
    
    # Compare strongest predictors across groups for each outcome
    print(f"\nCOMPARATIVE ANALYSIS:")
    print("=" * 50)
    
    for outcome in outcomes:
        print(f"\n{outcome.upper()} - TOP PREDICTOR BY GROUP:")
        print("-" * 40)
        
        for group in groups:
            if outcome in group_predictor_summary[group] and group_predictor_summary[group][outcome]:
                top_predictor = group_predictor_summary[group][outcome][0]
                theory = mechanism_to_theory.get(top_predictor['source'], 'Unknown')
                print(f"{group.replace('_', ' ').title()}: {top_predictor['source']} "
                      f"(r = {top_predictor['partial_correlation']:+.3f}) [{theory}]")
    
    return group_predictor_summary

group_predictor_results = analyze_outcome_predictors_by_group()

9a. RQ3.3: WHICH MECHANISMS MOST STRONGLY PREDICT OUTCOMES?
Mechanism → Actual Outcome relationships:

STRONGEST PREDICTORS OF CHEATING_BEHAVIOR (avg |r|: 0.164):
  perceived_group_identification: r=+0.187 (Social-Norms)
  perceived_vicarious_experience: r=+0.183 (Self-Efficacy)
  perceived_descriptive_norms: r=+0.183 (Social-Norms)
  perceived_social_sanctions: r=+0.180 (Social-Norms)
  relatedness_need_satisfaction: r=+0.160 (Self-Determination)
  perceived_verbal_persuasion: r=+0.158 (Self-Efficacy)
  perceived_emotional_arousal: r=+0.130 (Self-Efficacy)
  perceived_injunctive_norms: r=+0.125 (Social-Norms)

STRONGEST PREDICTORS OF PERFORMANCE (avg |r|: 0.221):
  competence_need_frustration: r=-0.331 (Self-Determination)
  perceived_honesty: r=-0.226 (Perceived-Effectiveness)
  competence_need_satisfaction: r=+0.226 (Self-Determination)
  perceived_group_identification: r=-0.205 (Social-Norms)
  perceived_social_sanctions: r=-0.202 (Social-Norms)
  perceived_performance_effect: r=-0

In [31]:
# ============================================================================
# 10. RQ3.4: PERCEIVED-ACTUAL ALIGNMENT
# ============================================================================

def analyze_alignment(edges_df, group="All"):
    """Analyze perceived-actual outcome alignment"""
    perceived_vars = ['perceived_honesty','perceived_performance_effect','perceived_experience_effect']
    actual_vars = ['cheating_behavior','performance','experience']
    
    alignments = []
    for perc in perceived_vars:
        for act in actual_vars:
            # Look for direct edges between perceived and actual
            align_edges = edges_df[(edges_df['group']==group) & 
                                  (edges_df['source']==perc) & 
                                  (edges_df['target']==act)]
            if len(align_edges)>0:
                corr = align_edges.iloc[0]['partial_correlation']
                alignments.append({'perceived':perc,'actual':act,'correlation':corr})
                print(f"  {perc} → {act}: r={corr:.3f}")
    
    return alignments

print("10a. RQ3.4: DO PERCEIVED OUTCOMES ALIGN WITH ACTUAL OUTCOMES?")
print("="*70)

# Overall alignment analysis
print("Perceived vs Actual Outcome Correlations:")
overall_align = analyze_alignment(combined_edges,"All")
if overall_align:
    overall_avg_align = np.mean([abs(a['correlation']) for a in overall_align])
    print(f"Overall average perceived-actual alignment: {overall_avg_align:.3f}")
else:
    print("No perceived-actual alignment edges found")

print("\n\n10b. RQ3.4 BY CHEATER GROUP: PERCEIVED-ACTUAL ALIGNMENT DIFFERENCES")
print("=" * 70)

def analyze_perceived_actual_alignment_by_group():
    """Analyze how perceived-actual outcome alignment varies across cheater groups"""
    
    try:
        combined_edges = pd.read_csv('comprehensive_partial_correlations.csv')
    except:
        print("Error: comprehensive_partial_correlations.csv not found")
        return
    
    groups = ['non_cheaters', 'partial_cheaters', 'full_cheaters']
    perceived_outcomes = ['PME_on_honest_task_completion', 'PME_on_task_performance', 'PME_on_task_experience']
    actual_outcomes = ['cheating_behavior', 'performance', 'experience']
    
    group_alignment_summary = {}
    
    for group in groups:
        print(f"\n{group.replace('_', ' ').upper()} GROUP ALIGNMENT:")
        print("-" * 45)
        
        group_alignments = []
        
        # Look for edges between perceived and actual outcomes
        for perceived in perceived_outcomes:
            for actual in actual_outcomes:
                # Find edge from perceived to actual outcome
                alignment_edge = combined_edges[
                    (combined_edges['group'] == group) & 
                    (combined_edges['source'] == perceived) &
                    (combined_edges['target'] == actual)
                ]
                
                if len(alignment_edge) > 0:
                    correlation = alignment_edge.iloc[0]['partial_correlation']
                    print(f"  {perceived} → {actual}: r = {correlation:.3f}")
                    
                    group_alignments.append({
                        'perceived': perceived,
                        'actual': actual,
                        'correlation': correlation
                    })
        
        # Calculate average alignment for this group
        if group_alignments:
            avg_alignment = np.mean([abs(align['correlation']) for align in group_alignments])
            print(f"\nAverage perceived-actual alignment: {avg_alignment:.3f}")
        else:
            avg_alignment = 0
            print(f"\nNo perceived-actual alignment edges found")
        
        group_alignment_summary[group] = {
            'alignments': group_alignments,
            'avg_alignment': avg_alignment
        }
    
    # Compare alignment across groups
    print(f"\nALIGNMENT COMPARISON ACROSS GROUPS:")
    print("-" * 40)
    for group, summary in group_alignment_summary.items():
        print(f"{group.replace('_', ' ').title()}: Average alignment = {summary['avg_alignment']:.3f}")
    
    return group_alignment_summary

group_alignment_results = analyze_perceived_actual_alignment_by_group()


10a. RQ3.4: DO PERCEIVED OUTCOMES ALIGN WITH ACTUAL OUTCOMES?
Perceived vs Actual Outcome Correlations:
  perceived_honesty → cheating_behavior: r=0.115
  perceived_honesty → performance: r=-0.226
  perceived_honesty → experience: r=0.252
  perceived_performance_effect → cheating_behavior: r=0.107
  perceived_performance_effect → performance: r=-0.201
  perceived_performance_effect → experience: r=0.386
  perceived_experience_effect → cheating_behavior: r=0.094
  perceived_experience_effect → performance: r=-0.156
  perceived_experience_effect → experience: r=0.290
Overall average perceived-actual alignment: 0.203


10b. RQ3.4 BY CHEATER GROUP: PERCEIVED-ACTUAL ALIGNMENT DIFFERENCES

NON CHEATERS GROUP ALIGNMENT:
---------------------------------------------

No perceived-actual alignment edges found

PARTIAL CHEATERS GROUP ALIGNMENT:
---------------------------------------------

No perceived-actual alignment edges found

FULL CHEATERS GROUP ALIGNMENT:
--------------------------------