# Merged Analysis Pipeline

## 1. Setup and Configuration

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
import os
import re

# Configure plotting style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [12, 7]

### CRM Condition Mapping
Edit the dictionary below to assign a condition (e.g., 'BM', 'CI', 'HA') to each CRM file number. This mapping will be used to automatically assign conditions during data loading.

In [None]:
CRM_CONDITION_MAP = {
    0: 'BM',
    1: 'BM',
    2: 'BM',
    3: 'CI',
    4: 'CI',
    5: 'CI',
    6: 'HA',
    7: 'HA',
    8: 'HA',
    9: 'UNKNOWN',
    10: 'UNKNOWN'
}

## 2. Data Loading and Preprocessing
This section defines the functions to load and preprocess the data from the subject's directory. It prompts for a single directory path and then loads all relevant Vowel, Consonant, and CRM files. The CRM condition for each run is assigned based on the `CRM_CONDITION_MAP` dictionary defined above, streamlining the process.

In [None]:
# --- Helper functions from v2.11.19 needed for loading ---
def parse_crm_header(filepath):
    try:
        with open(filepath, 'r') as f:
            header = f.readline()
        match = re.search(r'Talker (\d+), Maskers (\d+) and (\d+)', header)
        if match:
            return int(match.group(1)), int(match.group(2)), int(match.group(3))
    except Exception:
        pass
    return None, None, None

def get_gender(talker_id):
    if talker_id is None:
        return 'Unknown'
    return 'M' if talker_id <= 3 else 'F'

def get_masker_type(talker, masker1, masker2):
    if talker is None or masker1 is None or masker2 is None:
        return 'unknown'
    t_gen, m1_gen, m2_gen = get_gender(talker), get_gender(masker1), get_gender(masker2)
    if t_gen == m1_gen == m2_gen:
        return 'same'
    if m1_gen == m2_gen and m1_gen != t_gen:
        return 'different'
    return 'mixed'

def calculate_srt(df_run):
    snr = df_run['snr'].values
    correct = ((df_run['target_color'] == df_run['response_color']) & \
               (df_run['target_number'] == df_run['response_number'])).values
    reversals = []
    if len(correct) > 1:
        prev = correct[0]
        for i in range(1, len(correct)):
            if correct[i] != prev:
                reversals.append(snr[i])
            prev = correct[i]
    
    if len(reversals) >= 5:
        calc_revs = reversals[4:14] if len(reversals) >= 14 else reversals[4:]
        return np.mean(calc_revs), np.std(calc_revs), len(reversals)
    return np.nan, np.nan, len(reversals)

In [None]:
# --- Main Data Loading Function ---

def load_all_data(base_path, crm_condition_map):
    subject_id = os.path.basename(base_path)
    print(f"Loading data for {subject_id}...")

    # Initialize dataframes
    df_vowel = pd.DataFrame()
    df_consonant = pd.DataFrame()
    df_crm = pd.DataFrame()
    df_crm_summary = pd.DataFrame()

    # 1. Load Vowels
    vowel_cols = ['talker_id', 'vowel_id', 'response_id', 'score', 'rt']
    vowel_map = {1:'AE', 2:'AH', 3:'AW', 4:'EH', 5:'IH', 6:'IY', 7:'OO', 8:'UH', 9:'UW'}
    try:
        df_v_bm = pd.read_csv(os.path.join(base_path, f'{subject_id}_vow9_BM_0.txt'), sep='\s+', header=None, names=vowel_cols)
        df_v_bm['condition'] = 'BM'
        df_v_ci = pd.read_csv(os.path.join(base_path, f'{subject_id}_vow9_CI_0.txt'), sep='\s+', header=None, names=vowel_cols)
        df_v_ci['condition'] = 'CI'
        df_vowel = pd.concat([df_v_bm, df_v_ci], ignore_index=True)
        df_vowel['vowel_label'] = df_vowel['vowel_id'].map(vowel_map)
        df_vowel['response_label'] = df_vowel['response_id'].map(vowel_map)
        print(f"Vowels loaded: {len(df_vowel)} trials")
    except Exception as e:
        print(f"Vowel load error: {e}")

    # 2. Load Consonants
    cons_cols = ['talker_id', 'consonant_id', 'response_id', 'score', 'rt']
    cons_map = {1:'#', 2:'_', 3:'b', 4:'d', 5:'f', 6:'g', 7:'k', 8:'m', 9:'n', 10:'%', 11:'p', 12:'s', 13:'t', 14:'v', 15:'z', 16:'$'}
    try:
        df_consonant = pd.read_csv(os.path.join(base_path, f'{subject_id}_cons_BM_n_0.out'), sep='\s+', header=None, names=cons_cols)
        df_consonant['consonant_label'] = df_consonant['consonant_id'].map(cons_map)
        df_consonant['response_label'] = df_consonant['response_id'].map(cons_map)
        print(f"Consonants loaded: {len(df_consonant)} trials")
    except Exception as e:
        print(f"Consonant load error: {e}")

    # 3. Load CRM
    print("Processing CRM files...")
    crm_cols = ['run', 'target_color', 'response_color', 'target_number', 'response_number', 'snr', 'rt']
    crm_files = sorted([f for f in os.listdir(base_path) if '_crm_' in f and f.endswith('.txt')])
    crm_data_frames = []
    crm_summary_list = []

    for f in crm_files:
        try:
            run_number = int(re.search(r'_crm_(\d+)\.txt', f).group(1))
            condition = crm_condition_map.get(run_number, 'Unknown')

            fpath = os.path.join(base_path, f)
            talker, m1, m2 = parse_crm_header(fpath)
            masker_type = get_masker_type(talker, m1, m2)

            df_temp = pd.read_csv(fpath, sep='\s+', header=None, skiprows=2, names=crm_cols, on_bad_lines='skip')
            df_temp = df_temp[pd.to_numeric(df_temp['run'], errors='coerce').notna()].astype(float)

            srt, sd, revs = calculate_srt(df_temp)

            df_temp['filename'] = f
            df_temp['condition'] = condition
            df_temp['masker_type'] = masker_type
            crm_data_frames.append(df_temp)

            crm_summary_list.append({
                'filename': f, 'condition': condition, 'masker_type': masker_type,
                'talker_gender': get_gender(talker), 'srt': srt, 'sd': sd, 'reversals': revs
            })
        except Exception as e:
            print(f"    - Could not process {f}: {e}")

    if crm_data_frames:
        df_crm = pd.concat(crm_data_frames, ignore_index=True)
        df_crm_summary = pd.DataFrame(crm_summary_list)
        print(f"CRM loaded: {len(crm_data_frames)} runs processed")

    return df_vowel, df_consonant, df_crm, df_crm_summary

In [None]:
# --- Execute Data Loading ---
data_path = input("Enter the directory path for the subject's data: ").strip()
if os.path.isdir(data_path):
    df_vowel, df_consonant, df_crm, df_crm_summary = load_all_data(data_path, CRM_CONDITION_MAP)
else:
    print(f"Error: Directory not found at {data_path}")

## 3. Vowel Recognition Analysis

### Methodology: Vowel Confusion Matrix
A confusion matrix is a powerful tool for analyzing classification performance. In this context:
- **Rows** represent the **actual vowel** (the target phoneme).
- **Columns** represent the **vowel perceived** by the listener (the response).
- The **diagonal elements** (from top-left to bottom-right) show the number or percentage of **correctly identified vowels**.
- **Off-diagonal elements** reveal specific error patterns. For example, a high value in the 'AH' row and 'AW' column indicates that the listener frequently mistakes 'AH' for 'AW'.
We will generate two matrices: one with the raw counts of responses and another normalized to show probabilities, which helps in identifying systematic confusion patterns independent of the number of trials.

In [None]:
if 'df_vowel' in locals() and not df_vowel.empty:
    # --- Overall Accuracy ---
    overall_vowel_accuracy = df_vowel['score'].mean() * 100
    print(f"Overall Vowel Accuracy: {overall_vowel_accuracy:.2f}%\n\n\n")

    # --- Confusion Matrix Generation ---
    vowel_labels_sorted = sorted(df_vowel['vowel_label'].dropna().unique())
    cm_counts = pd.crosstab(df_vowel['vowel_label'], df_vowel['response_label'], rownames=['Target'], colnames=['Response']).reindex(index=vowel_labels_sorted, columns=vowel_labels_sorted, fill_value=0)
    cm_prob = cm_counts.div(cm_counts.sum(axis=1), axis=0).fillna(0)

    # --- Visualization ---
    fig, axes = plt.subplots(1, 2, figsize=(18, 7))

    # Raw Counts Heatmap
    sns.heatmap(cm_counts, annot=True, fmt='d', cmap='viridis', ax=axes[0], linewidths=.5)
    axes[0].set_title('Vowel Confusion Matrix (Raw Counts)', fontsize=14)
    axes[0].set_xlabel('Response', fontsize=12)
    axes[0].set_ylabel('Target', fontsize=12)

    # Probability Heatmap
    sns.heatmap(cm_prob, annot=True, fmt='.2f', cmap='rocket_r', ax=axes[1], linewidths=.5)
    axes[1].set_title('Vowel Confusion Matrix (Probabilities)', fontsize=14)
    axes[1].set_xlabel('Response', fontsize=12)
    axes[1].set_ylabel('Target', fontsize=12)

    plt.tight_layout()
    plt.show()
else:
    print("Vowel data not loaded. Skipping analysis.")

### Statistical Comparison: Vowel Accuracy by Condition
To determine if there is a statistically significant difference in vowel recognition performance between the Bimodal (BM) and Cochlear Implant (CI) conditions, we use an independent samples t-test. This test is appropriate for comparing the means of two independent groups.

**Rationale:**
- **Null Hypothesis (H0):** There is no difference in the mean accuracy scores between the BM and CI groups.
- **Alternative Hypothesis (H1):** There is a difference in the mean accuracy scores.
- **Significance Level (Î±):** We will use a standard alpha of 0.05. If the p-value is less than 0.05, we reject the null hypothesis and conclude that the difference in performance is statistically significant.

In [None]:
if 'df_vowel' in locals() and not df_vowel.empty:
    bm_scores = df_vowel[df_vowel['condition'] == 'BM']['score']
    ci_scores = df_vowel[df_vowel['condition'] == 'CI']['score']

    if not bm_scores.empty and not ci_scores.empty:
        # --- Statistical Test ---
        ttest_res = stats.ttest_ind(bm_scores, ci_scores, equal_var=False)  # Welch's t-test for unequal variances
        print(f"--- T-test Results: Vowel Accuracy (BM vs CI) ---")
        print(f"BM Mean Accuracy: {bm_scores.mean() * 100:.2f}% (SD={bm_scores.std():.2f})")
        print(f"CI Mean Accuracy: {ci_scores.mean() * 100:.2f}% (SD={ci_scores.std():.2f})")
        print(f"T-statistic: {ttest_res.statistic:.3f}")
        print(f"P-value: {ttest_res.pvalue:.3f}")

        if ttest_res.pvalue < 0.05:
            print("Conclusion: The difference in vowel accuracy between BM and CI conditions is statistically significant.")
        else:
            print("Conclusion: There is no statistically significant difference in vowel accuracy between the conditions.")
else:
    print("Cannot perform statistical test: insufficient vowel data.")

## 4. Consonant Recognition Analysis

### Methodology: Phonetic Feature Analysis
Beyond simple accuracy, analyzing consonant confusions by phonetic features provides deeper insight into a listener's perceptual difficulties. This approach, similar to the principles outlined by Miller and Nicely (1955), categorizes phonemes based on core articulatory features:
- **Voicing:** Whether the vocal cords vibrate during articulation (e.g., /b/ is voiced, /p/ is voiceless).
- **Place of Articulation:** Where in the vocal tract the constriction occurs (e.g., bilabial for /p/, alveolar for /t/).
- **Manner of Articulation:** How the sound is produced (e.g., stop/plosive for /t/, fricative for /s/).

By mapping both the target and response phonemes to these features, we can calculate the **Information Transfer** (or feature transmission rate). This metric quantifies how well a listener preserves a specific feature, even if the phoneme itself is misidentified. For example, a high voicing score, despite low overall accuracy, suggests the listener can correctly distinguish voiced from voiceless sounds, but may be struggling with place or manner cues.

In [None]:
if 'df_consonant' in locals() and not df_consonant.empty:
    # --- Phonetic Feature Map ---
    # 1=Voiced, 0=Voiceless; Place: 1=Bilabial, 0=Alveolar/Other; Manner: 0=Stop, 1=Nasal, 2=Fricative, 3=Affricate
    feature_map = {
        'b': (1, 1, 0), 'd': (1, 0, 0), 'g': (1, 0, 0),
        'p': (0, 1, 0), 't': (0, 0, 0), 'k': (0, 0, 0),
        'm': (1, 1, 1), 'n': (1, 0, 1),
        'f': (0, 1, 2), 'v': (1, 1, 2), 's': (0, 0, 2), 'z': (1, 0, 2),
        '#': (0, 0, 2), '_': (1, 0, 2),  # Sh, Zh
        '%': (0, 0, 3), '$': (1, 0, 3)   # Ch, J
    }

    def calculate_information_transfer(df, label_col, resp_col, feat_map):
        valid_df = df[df[label_col].isin(feat_map.keys()) & df[resp_col].isin(feat_map.keys())].copy()
        if valid_df.empty:
            return None

        features = ['Voicing', 'Place', 'Manner']
        results = {}

        for i, feature in enumerate(features):
            target_feature = valid_df[label_col].apply(lambda x: feat_map[x][i])
            response_feature = valid_df[resp_col].apply(lambda x: feat_map[x][i])
            accuracy = (target_feature == response_feature).mean() * 100
            results[feature] = accuracy
        return pd.Series(results)

    # --- Analysis ---
    feature_transfer_results = calculate_information_transfer(df_consonant, 'consonant_label', 'response_label', feature_map)

    print("--- Phonetic Feature Transmission Rates ---")
    if feature_transfer_results is not None:
        print(feature_transfer_results.round(2))
        # --- Visualization ---
        plt.figure(figsize=(8, 5))
        sns.barplot(x=feature_transfer_results.index, y=feature_transfer_results.values, palette='colorblind')
        plt.title('Consonant Feature Transmission', fontsize=14)
        plt.ylabel('% Correct Transmission', fontsize=12)
        plt.ylim(0, 100)
        plt.show()
    else:
        print("Could not calculate feature transfer (no valid consonant pairs found).")
else:
    print("Consonant data not loaded. Skipping feature analysis.")

In [None]:
if 'df_consonant' in locals() and not df_consonant.empty:
    # --- Overall Accuracy ---
    overall_consonant_accuracy = df_consonant['score'].mean() * 100
    print(f"\n--- Consonant Confusion Matrix Analysis ---")
    print(f"Overall Consonant Accuracy: {overall_consonant_accuracy:.2f}%\n\n\n")

    # --- Confusion Matrix Generation ---
    consonant_labels_sorted = sorted(df_consonant['consonant_label'].dropna().unique())
    cm_counts_cons = pd.crosstab(df_consonant['consonant_label'], df_consonant['response_label'], rownames=['Target'], colnames=['Response']).reindex(index=consonant_labels_sorted, columns=consonant_labels_sorted, fill_value=0)
    cm_prob_cons = cm_counts_cons.div(cm_counts_cons.sum(axis=1), axis=0).fillna(0)

    # --- Visualization ---
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))

    # Raw Counts Heatmap
    sns.heatmap(cm_counts_cons, annot=True, fmt='d', cmap='viridis', ax=axes[0], linewidths=.5)
    axes[0].set_title('Consonant Confusion Matrix (Raw Counts)', fontsize=14)
    axes[0].set_xlabel('Response', fontsize=12)
    axes[0].set_ylabel('Target', fontsize=12)

    # Probability Heatmap
    sns.heatmap(cm_prob_cons, annot=True, fmt='.2f', cmap='rocket_r', ax=axes[1], linewidths=.5)
    axes[1].set_title('Consonant Confusion Matrix (Probabilities)', fontsize=14)
    axes[1].set_xlabel('Response', fontsize=12)
    axes[1].set_ylabel('Target', fontsize=12)

    plt.tight_layout()
    plt.show()
else:
    print("Consonant data not loaded. Skipping analysis.")

## 5. CRM Speech Reception Threshold (SRT) Analysis

### Methodology: Speech Reception Threshold (SRT)
The SRT represents the Signal-to-Noise Ratio (SNR) at which a listener can correctly identify 50% of the speech material. In this adaptive staircase procedure:
- The SNR is adjusted based on the listener's response: it decreases after a correct response (making the task harder) and increases after an incorrect one (making it easier).
- A **reversal** occurs when the direction of this change flips (e.g., from decreasing to increasing).
- To calculate the SRT for a given run, we average the SNR values of the reversals, typically excluding the first few to allow the staircase to converge. Following the MATLAB script's logic, we will average reversals 5 through 14.

Lower SRT values indicate better performance, as the listener can understand speech at more adverse SNR levels.

In [None]:
if 'df_crm_summary' in locals() and not df_crm_summary.empty:
    # --- Granular Violin Plots ---
    print("--- Granular SRT Analysis ---")
    fig, axes = plt.subplots(2, 2, figsize=(16, 12), dpi=100)
    axes = axes.flatten()

    # Plot 1: Global SRT Distribution
    sns.violinplot(y=df_crm_summary['srt'], ax=axes[0], color='skyblue', inner='quartile')
    sns.stripplot(y=df_crm_summary['srt'], ax=axes[0], color='darkblue', jitter=0.1, size=8, edgecolor='w', linewidth=1)
    axes[0].set_title('Global SRT Distribution', fontsize=14)
    axes[0].set_ylabel('SRT (dB)', fontsize=12)

    # Plot 2: SRT by Condition
    sns.violinplot(x='condition', y='srt', data=df_crm_summary, ax=axes[1], inner='quartile', palette='pastel')
    sns.stripplot(x='condition', y='srt', data=df_crm_summary, ax=axes[1], color='black', jitter=0.1, size=6)
    axes[1].set_title('SRT by Condition', fontsize=14)
    axes[1].set_xlabel('Condition', fontsize=12)
    axes[1].set_ylabel('SRT (dB)', fontsize=12)

    # Plot 3: SRT by Masker Type and Talker Gender
    sns.violinplot(x='masker_type', y='srt', hue='talker_gender', data=df_crm_summary, ax=axes[2], inner='quartile', split=True, palette='muted')
    sns.stripplot(x='masker_type', y='srt', hue='talker_gender', data=df_crm_summary, ax=axes[2], dodge=True, jitter=0.1, size=6, edgecolor='w', linewidth=1)
    axes[2].set_title('SRT by Masker Type & Talker Gender', fontsize=14)
    axes[2].set_xlabel('Masker Type', fontsize=12)
    axes[2].set_ylabel('SRT (dB)', fontsize=12)

    # Plot 4: SRT by Condition and Masker Type
    sns.violinplot(x='condition', y='srt', hue='masker_type', data=df_crm_summary, ax=axes[3], inner='quartile', palette='coolwarm')
    sns.stripplot(x='condition', y='srt', hue='masker_type', data=df_crm_summary, ax=axes[3], dodge=True, jitter=0.1, size=6, edgecolor='w', linewidth=1)
    axes[3].set_title('SRT by Condition & Masker Type', fontsize=14)
    axes[3].set_xlabel('Condition', fontsize=12)
    axes[3].set_ylabel('SRT (dB)', fontsize=12)

    plt.tight_layout()
    plt.show()
    
    # Faceted plot for the most granular view
    g = sns.catplot(x='masker_type', y='srt', hue='talker_gender', col='condition', data=df_crm_summary, kind='violin', inner='quartile', split=True, palette='husl', height=5, aspect=0.9, legend_out=True)
    g.map(sns.stripplot, 'masker_type', 'srt', 'talker_gender', dodge=True, jitter=0.1, size=5, edgecolor='black', linewidth=1)
    g.fig.suptitle('SRT Stratified by All Groups', y=1.03, fontsize=16)
    g.set_axis_labels('Masker Type', 'SRT (dB)')
    g.add_legend(title='Talker Gender')
    plt.show()
else:
    print("CRM data not available for SRT analysis.")

### Statistical Comparison: ANOVA on SRTs
To assess the impact of different factors on speech recognition, we perform a two-way ANOVA (Analysis of Variance). This test allows us to examine the main effects of `condition` and `masker_type` on SRT, as well as their interaction.

**Rationale:**
- **Main Effect of Condition:** Does the listening condition (BM, CI, HA) significantly affect SRT?
- **Main Effect of Masker Type:** Does the masker gender (same vs. different) significantly affect SRT?
- **Interaction Effect:** Does the effect of masker gender depend on the listening condition? (e.g., is the benefit of different-gender maskers larger in one condition than another?).

A significant p-value (p < 0.05) for any of these factors suggests it has a meaningful impact on performance.

In [None]:
if 'df_crm_summary' in locals() and len(df_crm_summary['condition'].unique()) > 1:
    print("--- Two-Way ANOVA Results: SRT ~ Condition * Masker Type ---")
    model = ols('srt ~ C(condition) * C(masker_type)', data=df_crm_summary.dropna()).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    print(anova_table)

    # Interpretation
    p_condition = anova_table['PR(>F)'][0]
    p_masker = anova_table['PR(>F)'][1]
    p_interaction = anova_table.get('PR(>F)', [np.nan, np.nan, np.nan, np.nan])[2]  # Handle case with no interaction term

    print("\n--- ANOVA Interpretation ---")
    if p_condition < 0.05:
        print(f"- The main effect of 'condition' is significant (p={p_condition:.3f}). SRT performance differs across conditions.")
    else:
        print(f"- The main effect of 'condition' is not significant (p={p_condition:.3f}).")

    if p_masker < 0.05:
        print(f"- The main effect of 'masker_type' is significant (p={p_masker:.3f}). Same vs. different-gender maskers yield different SRTs.")
    else:
        print(f"- The main effect of 'masker_type' is not significant (p={p_masker:.3f}).")

    if p_interaction < 0.05:
        print(f"- The interaction effect is significant (p={p_interaction:.3f}). The benefit of masker gender depends on the listening condition.")
    else:
        print(f"- The interaction effect is not significant (p={p_interaction:.3f}).")
else:
    print("ANOVA not performed: requires at least two conditions with valid SRT data.")

## 6. Advanced CRM and Temporal Analysis

### Advanced CRM Error Analysis
This analysis breaks down CRM trial outcomes into four categories to understand the nature of listener errors:
- **Correct:** Both color and number were identified correctly.
- **Color Error:** The number was correct, but the color was wrong.
- **Number Error:** The color was correct, but the number was wrong.
- **Both Error:** Both color and number were incorrect.

By stratifying these error types by condition, masker type, and talker gender, we can identify if specific listening situations lead to particular kinds of perceptual errors.

In [None]:
if 'df_crm' in locals() and not df_crm.empty:
    # --- Error Classification ---
    def classify_crm_error(row):
        color_ok = row['target_color'] == row['response_color']
        number_ok = row['target_number'] == row['response_number']
        if color_ok and number_ok: return 'Correct'
        if not color_ok and number_ok: return 'Color Error'
        if color_ok and not number_ok: return 'Number Error'
        return 'Both Error'

    df_crm['error_type'] = df_crm.apply(classify_crm_error, axis=1)
    df_crm['talker_gender'] = df_crm['filename'].apply(lambda f: get_gender(parse_crm_header(os.path.join(data_path, f))[0]))

    # --- Visualization Panels ---
    print("--- CRM Error Analysis by Group ---")
    error_order = ['Correct', 'Color Error', 'Number Error', 'Both Error']
    palette = {'Correct': 'green', 'Color Error': 'blue', 'Number Error': 'orange', 'Both Error': 'red'}

    # Faceted bar charts
    g = sns.catplot(x='condition', hue='error_type', col='masker_type', row='talker_gender',
                    data=df_crm, kind='count', hue_order=error_order, palette=palette,
                    height=5, aspect=1.2, legend=False)

    g.fig.suptitle('CRM Error Type Distribution by All Groups', y=1.03, fontsize=16)
    g.set_axis_labels('Condition', 'Number of Trials')
    g.set_titles('Masker: {col_name} | Talker: {row_name}')
    g.add_legend(title='Error Type')
    plt.tight_layout(rect=[0, 0, 1, 0.97])
    plt.show()
else:
    print("CRM data not available for error analysis.")

### Temporal Trend Analysis
This analysis investigates whether the listener's performance changes over the course of the experiment. An upward trend in accuracy (or downward trend in SRT) might suggest a learning effect, while a downward trend could indicate fatigue.

We will plot the score for each trial in sequential order and overlay a rolling average to smooth out noise and make the underlying trend more visible. A linear regression line is also fitted to provide a simple quantitative measure of the trend's direction and magnitude.

In [None]:
def plot_temporal_trend(df, metric_col, title, ax):
    df_seq = df.reset_index(drop=True).reset_index().rename(columns={'index': 'trial_sequence'})
    window_size = min(20, len(df_seq) // 2)
    if window_size > 0:
        df_seq['rolling_avg'] = df_seq[metric_col].rolling(window=window_size).mean()

    # Plotting
    sns.lineplot(x='trial_sequence', y='rolling_avg', data=df_seq, ax=ax, color='red', label=f'{window_size}-Trial Rolling Avg')
    sns.regplot(x='trial_sequence', y=metric_col, data=df_seq, ax=ax, scatter_kws={'alpha':0.2, 's':15}, line_kws={'color':'blue', 'linestyle':'--'}, label='Linear Trend')
    ax.set_title(title, fontsize=12)
    ax.set_xlabel('Trial Sequence', fontsize=10)
    ax.set_ylabel(metric_col.replace('_', ' ').title(), fontsize=10)
    ax.legend()

if 'df_vowel' in locals() and not df_vowel.empty:
    print("\n--- Temporal Trend Analysis: Vowel Accuracy ---")
    fig, axes = plt.subplots(1, 1, figsize=(14, 5))
    plot_temporal_trend(df_vowel, 'score', 'Overall Vowel Accuracy Trend', axes)
    plt.show()

    # Stratified by condition
    g = sns.lmplot(x='trial_sequence', y='score', data=df_vowel.reset_index(drop=True).reset_index().rename(columns={'index': 'trial_sequence'}),
                   col='condition', height=5, aspect=1.2, scatter_kws={'alpha':0.2, 's':15})
    g.fig.suptitle('Vowel Accuracy Trend by Condition', y=1.03, fontsize=16)
    g.set_axis_labels('Trial Sequence', 'Score')
    plt.show()
else:
    print("Vowel data not available for temporal analysis.")