# T-Maze Group Statistics Analysis

This notebook demonstrates group-level statistical analysis for T-maze classification results:
- Group-level t-tests and permutation testing
- Linear Mixed Effects models
- Effect size calculations
- Multiple comparison correction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# T-Maze analysis imports
import sys
sys.path.append('..')

from statistics import (
    group_ttest,
    paired_ttest,
    repeated_measures_anova,
    group_roi_analysis,
    fdr_correction
)
from statistics.mixed_effects import (
    LinearMixedEffects,
    lme_roi_analysis,
    lme_temporal_analysis
)
from statistics.effect_sizes import (
    cohens_d,
    hedges_g,
    bootstrap_ci,
    bayesian_estimation
)
from statistics.permutation import (
    GroupPermutationTest,
    cluster_permutation_group
)

# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

## 1. Simulate Group Data

For demonstration, we'll simulate classification accuracy data from multiple subjects.

In [None]:
# Simulate data for 20 subjects
np.random.seed(42)
n_subjects = 20

# Classification accuracies (above chance)
# True effect: mean = 0.62, std = 0.08
accuracies = np.random.normal(0.62, 0.08, n_subjects)
accuracies = np.clip(accuracies, 0.4, 0.85)  # Realistic bounds

print(f"Mean accuracy: {np.mean(accuracies):.3f}")
print(f"Std accuracy: {np.std(accuracies):.3f}")
print(f"Range: [{np.min(accuracies):.3f}, {np.max(accuracies):.3f}]")

## 2. Group-Level T-Test

In [None]:
# Test if accuracy is significantly above chance (0.5)
result = group_ttest(accuracies, chance_level=0.5, alternative='greater')

print("=" * 50)
print("ONE-SAMPLE T-TEST (vs chance = 0.5)")
print("=" * 50)
print(f"Mean accuracy: {result.mean:.3f} ± {result.std:.3f}")
print(f"95% CI: [{result.ci_lower:.3f}, {result.ci_upper:.3f}]")
print(f"t({result.df:.0f}) = {result.statistic:.3f}")
print(f"p-value = {result.p_value:.4f}")
print(f"Cohen's d = {result.effect_size:.3f}")
print(f"Significant: {result.is_significant()}")

## 3. Permutation Test

In [None]:
# Non-parametric permutation test
perm_test = GroupPermutationTest(n_permutations=10000, tail='greater')
perm_result = perm_test.test(accuracies, chance=0.5)

print("\n" + "=" * 50)
print("PERMUTATION TEST")
print("=" * 50)
print(f"Observed mean: {perm_result.observed:.3f}")
print(f"p-value: {perm_result.p_value:.4f}")
print(f"Significant: {perm_result.significant}")

# Plot null distribution
fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(perm_result.null_distribution, bins=50, density=True, alpha=0.7, label='Null distribution')
ax.axvline(perm_result.observed, color='red', linewidth=2, label=f'Observed ({perm_result.observed:.3f})')
ax.axvline(0.5, color='gray', linestyle='--', label='Chance')
ax.set_xlabel('Accuracy')
ax.set_ylabel('Density')
ax.set_title('Permutation Test: Null Distribution')
ax.legend()
plt.tight_layout()
plt.show()

## 4. Effect Size Analysis

In [None]:
# Cohen's d
d_result = cohens_d(accuracies, mu=0.5, bootstrap_ci=True)
print("\n" + "=" * 50)
print("EFFECT SIZE ANALYSIS")
print("=" * 50)
print(d_result)

# Hedges' g (bias-corrected)
g_result = hedges_g(accuracies, mu=0.5)
print(f"\nHedges' g: {g_result.effect_size:.3f} (95% CI: [{g_result.ci_lower:.3f}, {g_result.ci_upper:.3f}])")

# Bootstrap CI on mean
ci_lower, ci_upper, boot_dist = bootstrap_ci(accuracies, n_bootstrap=10000, method='bca')
print(f"\nBootstrap 95% CI (BCa): [{ci_lower:.3f}, {ci_upper:.3f}]")

## 5. Bayesian Estimation

In [None]:
# Bayesian estimation with informative prior
bayes = bayesian_estimation(accuracies, prior_mu=0.5, prior_sigma=0.1)

print("\n" + "=" * 50)
print("BAYESIAN ESTIMATION")
print("=" * 50)
print(f"Posterior mean: {bayes['posterior_mean']:.3f}")
print(f"Posterior SD: {bayes['posterior_sd']:.3f}")
print(f"95% Credible interval: [{bayes['ci_lower']:.3f}, {bayes['ci_upper']:.3f}]")
print(f"P(accuracy > 0.5): {bayes['prob_above_chance']:.3f}")
print(f"Bayes Factor (BF10): {bayes['bayes_factor_10']:.2f}")

# Plot posterior
fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(bayes['posterior_samples'], bins=50, density=True, alpha=0.7, label='Posterior')
ax.axvline(0.5, color='gray', linestyle='--', label='Chance')
ax.axvline(bayes['posterior_mean'], color='red', linewidth=2, label=f"Mean = {bayes['posterior_mean']:.3f}")
ax.axvspan(bayes['ci_lower'], bayes['ci_upper'], alpha=0.2, color='red', label='95% CI')
ax.set_xlabel('Accuracy')
ax.set_ylabel('Density')
ax.set_title('Bayesian Posterior Distribution')
ax.legend()
plt.tight_layout()
plt.show()

## 6. Condition Comparison (Paired T-Test)

In [None]:
# Simulate two conditions per subject
np.random.seed(42)
acc_condition1 = np.random.normal(0.62, 0.08, n_subjects)  # Maze vs No-Maze
acc_condition2 = np.random.normal(0.58, 0.09, n_subjects)  # Reward vs No-Reward

# Paired t-test
paired_result = paired_ttest(acc_condition1, acc_condition2, alternative='two-sided')

print("\n" + "=" * 50)
print("PAIRED T-TEST (Maze vs Reward classification)")
print("=" * 50)
print(f"Condition 1 mean: {paired_result.metadata['mean_a']:.3f}")
print(f"Condition 2 mean: {paired_result.metadata['mean_b']:.3f}")
print(f"Difference: {paired_result.mean:.3f} ± {paired_result.std:.3f}")
print(f"t({paired_result.df:.0f}) = {paired_result.statistic:.3f}")
print(f"p-value = {paired_result.p_value:.4f}")
print(f"Cohen's d = {paired_result.effect_size:.3f}")

## 7. Repeated Measures ANOVA

In [None]:
# Simulate 4 conditions (2x2 factorial: Maze x Reward)
np.random.seed(42)
data_anova = np.column_stack([
    np.random.normal(0.65, 0.08, n_subjects),  # Maze + Reward
    np.random.normal(0.58, 0.09, n_subjects),  # Maze + No Reward
    np.random.normal(0.55, 0.08, n_subjects),  # No Maze + Reward
    np.random.normal(0.52, 0.07, n_subjects)   # No Maze + No Reward
])

conditions = ['MazeReward', 'MazeNoReward', 'NoMazeReward', 'NoMazeNoReward']

# Run ANOVA
anova_result = repeated_measures_anova(data_anova, factor_levels=conditions)

print("\n" + "=" * 50)
print("REPEATED MEASURES ANOVA")
print("=" * 50)
print(f"F({anova_result['df_factor']}, {anova_result['df_error']}) = {anova_result['F']:.3f}")
print(f"p-value = {anova_result['p_value']:.4f}")
print(f"Partial eta-squared = {anova_result['eta_squared']:.3f}")
if anova_result.get('p_value_gg'):
    print(f"p-value (Greenhouse-Geisser) = {anova_result['p_value_gg']:.4f}")

# Plot condition means
fig, ax = plt.subplots(figsize=(10, 6))
means = data_anova.mean(axis=0)
sems = data_anova.std(axis=0) / np.sqrt(n_subjects)
ax.bar(conditions, means, yerr=sems, capsize=5, color='steelblue', edgecolor='black')
ax.axhline(0.5, color='red', linestyle='--', label='Chance')
ax.set_ylabel('Accuracy')
ax.set_title('Classification Accuracy by Condition')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 8. Linear Mixed Effects Model

In [None]:
# Prepare data for LME
try:
    # Flatten to long format
    y = data_anova.flatten() - 0.5  # Center on chance
    subjects = np.repeat(np.arange(n_subjects), 4)
    conditions_arr = np.tile(np.arange(4), n_subjects)

    # Fit LME
    lme = LinearMixedEffects()
    lme.fit(y, subjects, conditions_arr)

    # Get results
    lme_result = lme.get_result('intercept')

    print("\n" + "=" * 50)
    print("LINEAR MIXED EFFECTS MODEL")
    print("=" * 50)
    print(lme_result)
    print(f"\nICC (Intraclass Correlation): {lme_result.icc:.3f}")
    print(f"Random effect variance: {lme_result.random_effect_var:.4f}")
    print(f"Residual variance: {lme_result.residual_var:.4f}")
except Exception as e:
    print(f"LME requires statsmodels: {e}")

## 9. Multiple ROI Analysis with FDR Correction

In [None]:
# Simulate per-ROI accuracies for multiple subjects
np.random.seed(42)
n_rois = 100
roi_names = [f'ROI_{i:03d}' for i in range(n_rois)]

# Most ROIs at chance, some significantly above
subject_roi_accuracies = {}
for subj_idx in range(n_subjects):
    subj_id = f'sub-{subj_idx+1:02d}'
    subject_roi_accuracies[subj_id] = {}
    
    for roi_idx, roi in enumerate(roi_names):
        # 10 ROIs have true effect, rest at chance
        if roi_idx < 10:
            acc = np.random.normal(0.58, 0.06)
        else:
            acc = np.random.normal(0.50, 0.05)
        subject_roi_accuracies[subj_id][roi] = np.clip(acc, 0.3, 0.9)

# Group analysis with FDR correction
roi_results = group_roi_analysis(
    subject_roi_accuracies,
    chance_level=0.5,
    correction='fdr',
    alpha=0.05
)

# Count significant ROIs
n_sig_uncorrected = sum(1 for r in roi_results.values() if r.p_value < 0.05)
n_sig_corrected = sum(1 for r in roi_results.values() if r.metadata.get('significant_corrected', False))

print("\n" + "=" * 50)
print("ROI-LEVEL ANALYSIS WITH FDR CORRECTION")
print("=" * 50)
print(f"Total ROIs: {n_rois}")
print(f"Significant (uncorrected): {n_sig_uncorrected}")
print(f"Significant (FDR-corrected): {n_sig_corrected}")

# Show top ROIs
sorted_rois = sorted(roi_results.items(), key=lambda x: x[1].accuracy, reverse=True)[:10]
print("\nTop 10 ROIs:")
for roi, result in sorted_rois:
    sig = '*' if result.metadata.get('significant_corrected', False) else ''
    print(f"  {roi}: {result.accuracy:.3f} (p={result.p_value:.4f}, p_fdr={result.metadata.get('p_corrected', 0):.4f}){sig}")

## 10. Summary Statistics Table

In [None]:
# Create summary table
import pandas as pd

summary_data = {
    'Analysis': [
        'Overall Accuracy',
        'Maze vs No-Maze',
        'Reward vs No-Reward',
        'Condition Effect (ANOVA)',
        'Significant ROIs (FDR)'
    ],
    'Mean': [
        f"{result.mean:.3f}",
        f"{np.mean(acc_condition1):.3f}",
        f"{np.mean(acc_condition2):.3f}",
        f"{np.mean(data_anova):.3f}",
        f"{n_sig_corrected}/{n_rois}"
    ],
    'Statistic': [
        f"t={result.statistic:.2f}",
        f"t={paired_result.statistic:.2f}",
        "-",
        f"F={anova_result['F']:.2f}",
        "-"
    ],
    'p-value': [
        f"{result.p_value:.4f}" + ('*' if result.p_value < 0.05 else ''),
        f"{paired_result.p_value:.4f}" + ('*' if paired_result.p_value < 0.05 else ''),
        "-",
        f"{anova_result['p_value']:.4f}" + ('*' if anova_result['p_value'] < 0.05 else ''),
        "-"
    ],
    'Effect Size': [
        f"d={result.effect_size:.2f}",
        f"d={paired_result.effect_size:.2f}",
        "-",
        f"η²={anova_result['eta_squared']:.2f}",
        "-"
    ]
}

summary_df = pd.DataFrame(summary_data)
print("\n" + "=" * 70)
print("SUMMARY TABLE")
print("=" * 70)
print(summary_df.to_string(index=False))