In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statistics
from scipy import stats

### Muller-Lyer Replication and Color Study Final Report - Analysis and Figures

This notebook contains the final data analysis to accompany our report. For complete information on data collection and data cleaning steps see this repository: https://github.com/m6urns/muller-lyer-replication

In [None]:
data_class = pd.read_csv('data/data_class.csv')
data_intensive = pd.read_csv('data/data_intensive.csv')

#### Class (Intermittent Sessions)

In [None]:
def analyze_group_performance(df):
    # Convert is_correct to numeric (1 for True, 0 for False)
    df['is_correct_numeric'] = df['is_correct'].astype(int)
    
    # Create group identifier
    df['main_group'] = df['speed_group'].str.split(' - ').str[0]
    df['speed_condition'] = df['speed_group'].str.split(' - ').str[1]
    
    # Print data validation
    print("Data distribution across groups:")
    print(df.groupby(['main_group', 'speed_condition']).size())
    
    # Get first and last days overall
    first_day = df['day'].min()
    last_day = df['day'].max()
    
    # Filter for first and last days
    first_last_data = df[df['day'].isin([first_day, last_day])].copy()
    first_last_data['day_type'] = first_last_data['day'].map({first_day: 'First', last_day: 'Last'})
    
    # Calculate metrics for each group and condition
    metrics = []
    for group in ['Group 1', 'Group 2']:
        for speed in ['Fast', 'Slow']:
            for day_type in ['First', 'Last']:
                mask = ((first_last_data['main_group'] == group) & 
                       (first_last_data['speed_condition'] == speed) & 
                       (first_last_data['day_type'] == day_type))
                
                group_data = first_last_data[mask]
                
                if len(group_data) > 0:
                    metrics.append({
                        'main_group': group,
                        'speed_condition': speed,
                        'day_type': day_type,
                        'accuracy': group_data['is_correct_numeric'].mean(),
                        'response_time': group_data['response_time'].mean(),
                        'n_trials': len(group_data),
                        'std_accuracy': group_data['is_correct_numeric'].std(),
                        'std_rt': group_data['response_time'].std()
                    })
    
    metrics_df = pd.DataFrame(metrics)
    
    # Statistical tests
    stats_results = {}
    for group in ['Group 1', 'Group 2']:
        group_data = first_last_data[first_last_data['main_group'] == group]
        
        # Accuracy
        first_acc = group_data[group_data['day_type'] == 'First']['is_correct_numeric']
        last_acc = group_data[group_data['day_type'] == 'Last']['is_correct_numeric']
        if len(first_acc) > 0 and len(last_acc) > 0:
            acc_ttest = stats.ttest_ind(first_acc, last_acc)
            stats_results[f'{group}_accuracy'] = acc_ttest
        
        # Response Time
        first_rt = group_data[group_data['day_type'] == 'First']['response_time']
        last_rt = group_data[group_data['day_type'] == 'Last']['response_time']
        if len(first_rt) > 0 and len(last_rt) > 0:
            rt_ttest = stats.ttest_ind(first_rt, last_rt)
            stats_results[f'{group}_rt'] = rt_ttest
    
    return {
        'metrics': metrics_df,
        'stats': stats_results,
        # 'plot': fig
    }

In [None]:
results = analyze_group_performance(data_class)

# Print results
print("\nMetrics Summary:")
print(results['metrics'])

print("\nStatistical Tests:")
for test_name, test_result in results['stats'].items():
    print(f"\n{test_name}:")
    print(f"t-statistic: {test_result.statistic:.3f}")
    print(f"p-value: {test_result.pvalue:.3f}")

In [None]:
def analyze_arrow_length(df, line_plot_title="Accuracy by Length Difference", 
                        bar_plot_title="Accuracy by Length Difference Category"):
    # Convert boolean to numeric
    df['is_correct_numeric'] = df['is_correct'].astype(int)
    
    # Analysis of arrow length effect
    df['length_diff_abs'] = abs(df['actual_difference'])
    
    # Group trials by difference magnitude
    df['diff_category'] = pd.cut(df['length_diff_abs'], 
                                bins=[0, 5, 15, 25, 35, float('inf')],
                                labels=['0-5', '6-15', '16-25', '26-35', '35+'])
    
    length_accuracy = df.groupby('diff_category')['is_correct_numeric'].agg([
        'mean', 'std', 'count'
    ]).round(3)
    
    # Create visualizations
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # 1. Line plot for accuracy by length difference
    sns.lineplot(data=df, x='length_diff_abs', y='is_correct_numeric', 
                 ci=95, ax=ax1)
    ax1.set_title(line_plot_title)
    ax1.set_xlabel('Absolute Length Difference')
    ax1.set_ylabel('Accuracy')
    
    # 2. Bar plot for categorized length differences
    sns.barplot(x=length_accuracy.index, y='mean',
                data=length_accuracy, ax=ax2)
    ax2.set_title(bar_plot_title)
    ax2.set_xlabel('Length Difference (pixels)')
    ax2.set_ylabel('Accuracy')
    
    plt.tight_layout()
    
    # Calculate correlation between length difference and accuracy
    length_corr = stats.pointbiserialr(df['length_diff_abs'], df['is_correct_numeric'])
    
    return {
        'length_stats': length_accuracy,
        'length_correlation': length_corr,
        'plot': fig
    }

In [None]:
# results = analyze_arrow_length(data_class)

results = analyze_arrow_length(
    data_class,
    line_plot_title="Accuracy by Length Difference - Intermittent",
    bar_plot_title="Accuracy by Length Difference Category - Intermittent"
)

print("\nLength Effect Statistics:")
print(results['length_stats'])
print("\nLength-Accuracy Correlation:")
print(f"Correlation: {results['length_correlation'].correlation:.3f}")
print(f"p-value: {results['length_correlation'].pvalue:.3f}")

plt.show()

In [None]:

def analyze_arrow_color(df):
    # Convert boolean to numeric
    df['is_correct_numeric'] = df['is_correct'].astype(int)
    
    # Analysis of arrow color effects
    color_accuracy = df.groupby('arrow_color')['is_correct_numeric'].agg([
        'mean', 'std', 'count'
    ]).round(3)
    
    # Calculate confidence intervals for colors
    color_accuracy['ci'] = 1.96 * np.sqrt(
        (color_accuracy['mean'] * (1 - color_accuracy['mean'])) / color_accuracy['count']
    )
    
    # T-test for color effect
    red_trials = df[df['arrow_color'] == 'red']['is_correct_numeric']
    black_trials = df[df['arrow_color'] == 'black']['is_correct_numeric']
    color_ttest = stats.ttest_ind(red_trials, black_trials)
    
    # Create visualizations
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    
    # 1. Bar plot for color accuracy
    sns.barplot(data=df, x='arrow_color', y='is_correct_numeric', 
                ci=95, ax=ax1)
    ax1.set_title('Accuracy by Arrow Color')
    ax1.set_ylabel('Accuracy')
    
    # 2. Boxplot for response times by color
    sns.boxplot(x='arrow_color', y='response_time', data=df, ax=ax2)
    ax2.set_title('Response Times by Arrow Color')
    ax2.set_ylabel('Response Time (seconds)')
    
    plt.tight_layout()
    
    return {
        'color_stats': color_accuracy,
        'color_ttest': color_ttest,
        'plot': fig
    }

In [None]:
results = analyze_arrow_color(data_class)

print("\nColor Effect Statistics:")
print(results['color_stats'])
print("\nColor T-test:")
print(f"t-statistic: {results['color_ttest'].statistic:.3f}")
print(f"p-value: {results['color_ttest'].pvalue:.3f}")

plt.show()

In [None]:
def analyze_control_comparison(df):
    # Ensure boolean type for is_correct
    df['is_correct'] = df['is_correct'].astype(bool)
    
    # Calculate percentages for control group
    correct_percentage_control = (
        df[df['is_control'] == True]
        .groupby('day')['is_correct']
        .mean() * 100
    ).reset_index()
    
    # Calculate percentages for non-control group
    correct_percentage_non_control = (
        df[df['is_control'] == False]
        .groupby('day')['is_correct']
        .mean() * 100
    ).reset_index()
    
    # Rename columns for clarity
    correct_percentage_control.columns = ['Day', 'Correct Percentage (Control)']
    correct_percentage_non_control.columns = ['Day', 'Correct Percentage (Non-Control)']
    
    # Extract values for statistics
    control_percentages = correct_percentage_control['Correct Percentage (Control)'].tolist()
    non_control_percentages = correct_percentage_non_control['Correct Percentage (Non-Control)'].tolist()
    
    # Calculate summary statistics
    summary_stats = {
        'control_mean': statistics.mean(control_percentages),
        'non_control_mean': statistics.mean(non_control_percentages),
        'days': correct_percentage_control['Day'].tolist(),
        'control_percentages': control_percentages,
        'non_control_percentages': non_control_percentages
    }
    
    # Create visualization
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plot control group
    ax.plot(correct_percentage_control['Day'], 
            correct_percentage_control['Correct Percentage (Control)'],
            marker='o', linestyle='-', color='b', label='Control')
    
    # Plot non-control group
    ax.plot(correct_percentage_non_control['Day'], 
            correct_percentage_non_control['Correct Percentage (Non-Control)'],
            marker='o', linestyle='--', color='r', label='Non-Control')
    
    # Customize plot
    ax.set_title('Correct Answer Percentage Over Days', fontsize=16)
    ax.set_xlabel('Day', fontsize=14)
    ax.set_ylabel('Correct Percentage (%)', fontsize=14)
    ax.set_xticks(correct_percentage_control['Day'])
    ax.tick_params(axis='both', labelsize=12)
    ax.set_ylim(0, 100)
    ax.grid(alpha=0.3)
    ax.legend(fontsize=12)
    
    plt.tight_layout()
    
    return {
        'summary_stats': summary_stats,
        'plot': fig
    }

In [None]:
# Analyze control vs non-control group performance
results = analyze_control_comparison(data_class)

# Print summary statistics
print("\nControl vs Non-Control Analysis:")
print(f"Control group mean accuracy: {results['summary_stats']['control_mean']:.2f}%")
print(f"Non-control group mean accuracy: {results['summary_stats']['non_control_mean']:.2f}%")
print("\nDaily percentages:")
for day, control, non_control in zip(
    results['summary_stats']['days'],
    results['summary_stats']['control_percentages'],
    results['summary_stats']['non_control_percentages']
):
    print(f"Day {day}: Control = {control:.2f}%, Non-Control = {non_control:.2f}%")

plt.show()

#### External (Intensive)

In [None]:
results = analyze_group_performance(data_intensive)

# Print results
print("\nMetrics Summary:")
print(results['metrics'])

print("\nStatistical Tests:")
for test_name, test_result in results['stats'].items():
    print(f"\n{test_name}:")
    print(f"t-statistic: {test_result.statistic:.3f}")
    print(f"p-value: {test_result.pvalue:.3f}")

In [None]:
# results = analyze_arrow_length(data_intensive)
results = analyze_arrow_length(
    data_intensive,
    line_plot_title="Accuracy by Length Difference - Intensive",
    bar_plot_title="Accuracy by Length Difference Category - Intensive"
)

print("\nLength Effect Statistics:")
print(results['length_stats'])
print("\nLength-Accuracy Correlation:")
print(f"Correlation: {results['length_correlation'].correlation:.3f}")
print(f"p-value: {results['length_correlation'].pvalue:.3f}")

plt.show()

In [None]:
results = analyze_arrow_color(data_intensive)

print("\nColor Effect Statistics:")
print(results['color_stats'])
print("\nColor T-test:")
print(f"t-statistic: {results['color_ttest'].statistic:.3f}")
print(f"p-value: {results['color_ttest'].pvalue:.3f}")

plt.show()

In [None]:
# Analyze control vs non-control group performance
results = analyze_control_comparison(data_intensive)

# Print summary statistics
print("\nControl vs Non-Control Analysis:")
print(f"Control group mean accuracy: {results['summary_stats']['control_mean']:.2f}%")
print(f"Non-control group mean accuracy: {results['summary_stats']['non_control_mean']:.2f}%")
print("\nDaily percentages:")
for day, control, non_control in zip(
    results['summary_stats']['days'],
    results['summary_stats']['control_percentages'],
    results['summary_stats']['non_control_percentages']
):
    print(f"Day {day}: Control = {control:.2f}%, Non-Control = {non_control:.2f}%")

plt.show()

#### Compare Groups

In [None]:

data = {
    'Group': ['Intermittent', 'Intermittent', 'Intermittent', 'Intermittent', 'Intermittent', 'Intermittent', 'Intermittent', 'Intermittent', 
              'Intensive', 'Intensive', 'Intensive', 'Intensive', 'Intensive', 'Intensive', 'Intensive', 'Intensive'],
    'Condition': ['Experimental', 'Experimental', 'Experimental', 'Experimental', 'Control', 'Control', 'Control', 'Control',
                 'Experimental', 'Experimental', 'Experimental', 'Experimental', 'Control', 'Control', 'Control', 'Control'],
    'Quiz Speed': ['Fast', 'Fast', 'Slow', 'Slow', 'Fast', 'Fast', 'Slow', 'Slow',
                  'Fast', 'Fast', 'Slow', 'Slow', 'Fast', 'Fast', 'Slow', 'Slow'],
    'Day': ['Start', 'End', 'Start', 'End', 'Start', 'End', 'Start', 'End',
            'Start', 'End', 'Start', 'End', 'Start', 'End', 'Start', 'End'],
    'Accuracy (Mean)': [0.194, 0.453, 0.109, 0.484, 0.313, 0.469, 0.354, 0.406,
                       0.521, 0.375, 0.563, 0.375, 0.547, 0.62, 0.46, 0.78],
    'Response Time (Mean)': [2.57, 1.83, 3.48, 3.06, 2.74, 2.86, 4.67, 3.28,
                           2.34, 1.46, 3.6, 1.93, 2.23, 1.69, 3.26, 2.2],
    'N Trials': [160, 128, 128, 128, 48, 32, 48, 32,
                 144, 48, 144, 48, 48, 50, 50, 50]
}

df = pd.DataFrame(data)

In [None]:

plt.figure(figsize=(12, 6))

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Plot Fast speeds
fast_data = df[df['Quiz Speed'] == 'Fast']
sns.barplot(data=fast_data, x='Group', y='Response Time (Mean)', 
            hue='Day', ax=ax1, palette='Set2')
ax1.set_title('Fast Quiz Speed')
ax1.set_ylabel('Response Time (seconds)')

# Plot Slow speeds
slow_data = df[df['Quiz Speed'] == 'Slow']
sns.barplot(data=slow_data, x='Group', y='Response Time (Mean)', 
            hue='Day', ax=ax2, palette='Set2')
ax2.set_title('Slow Quiz Speed')
ax2.set_ylabel('Response Time (seconds)')

for ax in [ax1, ax2]:
    ax.grid(True, alpha=0.3)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45)

plt.suptitle('Response Times by Group, Speed, and Day', y=1.05)
plt.tight_layout()
plt.show()