In [2]:
import numpy as np 
import pandas as pd

In [6]:
subjects = ["sub-02","sub-03","sub-04", "sub-05", "sub-06", "sub-07","sub-08", "sub-09","sub-11", "sub-13", "sub-14", "sub-15", "sub-16", "sub-17"]

In [None]:
# PRETRAINED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        results.append({
            'subject': subject,
            'task': task,
            'model': 'pretrained',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_ee = pd.DataFrame(results)

# Optional: display or save
print(df_ee.head())
# df_ee.to_csv('ee_pretrained_summary.csv', index=False)


  subject      task       model      mean   75_perc
0  sub-02  fixation  pretrained  5.256241  6.885061
1  sub-02   pursuit  pretrained  4.288809  5.481960
2  sub-02  freeview  pretrained  4.147764  5.291535
3  sub-02       all  pretrained  4.436713  5.585948
4  sub-03  fixation  pretrained  5.427978  7.019215


In [10]:
# SCALED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
scaled_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        scaled_results.append({
            'subject': subject,
            'task': task,
            'model': 'scaled',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_trained = pd.DataFrame(scaled_results)

# Concatenate
df_ee = pd.concat([df_ee, df_trained], ignore_index=True)


In [11]:
# FINE TUNED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
calib_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        calib_results.append({
            'subject': subject,
            'task': task,
            'model': 'pt_calib',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_calib = pd.DataFrame(calib_results)

# Concatenate
df_ee = pd.concat([df_ee, df_calib], ignore_index=True)


In [28]:
from scipy.stats import permutation_test
import pandas as pd

def paired_permutation_tests(df, model_a, model_b):
    """
    Run paired permutation tests between two models for each task.

    Parameters:
        df (pd.DataFrame): DataFrame with columns ['subject', 'task', 'model', 'mean']
        model_a (str): Name of the first model
        model_b (str): Name of the second model

    Returns:
        pd.DataFrame: A DataFrame with task, p-value, and effect size (mean diff)
    """
    results = []

    # Group only by task
    grouped = df.groupby('task')

    for task, group in grouped:
        # Filter to the two models of interest
        group_filtered = group[group['model'].isin([model_a, model_b])]

        # Pivot: subjects as rows, models as columns
        pivoted = group_filtered.pivot(index='subject', columns='model', values='mean').dropna()

        if pivoted.shape[0] < 2:
            continue  # Not enough subjects with data for both models

        a_values = pivoted[model_a].values
        b_values = pivoted[model_b].values

        # Paired permutation test on mean difference
        result = permutation_test(
            (a_values, b_values),
            statistic=lambda x, y: (x - y).mean(),
            permutation_type='samples',
            vectorized=False,
            alternative='two-sided',
            n_resamples=10000
        )

        results.append({
            'task': task,
            'model_a': model_a,
            'model_b': model_b,
            'mean_diff': (a_values - b_values).mean(),
            'p_value': result.pvalue,
            'n_subjects': len(pivoted)
        })

    return pd.DataFrame(results)


In [37]:

# Run paired permutation tests 

sig_df_pt_vs_scaled = paired_permutation_tests(df_ee, model_a='pretrained', model_b='scaled')

# Run paired permutation tests for another pairing (e.g., pt vs pt_gaze) and append to the same dataframe
sig_df_pt_vs_calib = paired_permutation_tests(df_ee, model_a='pretrained', model_b='pt_calib')

sig_df_scaled_vs_calib = paired_permutation_tests(df_ee, model_a='scaled', model_b='pt_calib')

# Append the results from both pairings
sig_df = pd.concat([sig_df_pt_vs_scaled, sig_df_pt_vs_calib, sig_df_scaled_vs_calib], ignore_index=True)

# Show the combined dataframe with the significance results
print(sig_df)




        task     model_a   model_b  mean_diff   p_value  n_subjects
0        all  pretrained    scaled   0.248655  0.002000          14
1   fixation  pretrained    scaled   0.656068  0.000200          14
2   freeview  pretrained    scaled  -0.123212  0.187181          14
3    pursuit  pretrained    scaled   0.283770  0.001800          14
4        all  pretrained  pt_calib   0.507080  0.000200          14
5   fixation  pretrained  pt_calib   1.151713  0.000200          14
6   freeview  pretrained  pt_calib   0.147515  0.131787          14
7    pursuit  pretrained  pt_calib   0.404555  0.000200          14
8        all      scaled  pt_calib   0.258425  0.001800          14
9   fixation      scaled  pt_calib   0.495645  0.000400          14
10  freeview      scaled  pt_calib   0.270727  0.018398          14
11   pursuit      scaled  pt_calib   0.120785  0.073793          14


In [48]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "pt_calib": "DeepMreye + Calib"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-01': '#AA0DFE', 'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]


comparisons = [
    ("pretrained", "scaled"),
    ("pretrained", "pt_calib"),
    ("scaled", "pt_calib")
]

fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=[f"{label}" for label in task_labels for _ in range(3)] + ["<b>All Tasks<b>"] * 3
)


# Keep track of subjects already shown in legend
shown_subjects = set()

for i, task in enumerate(task_names):
    for j, (model_a, model_b) in enumerate(comparisons):
        row = i + 1
        col = j + 1

        df_plot = df_ee[
            (df_ee["task"] == task) &
            (df_ee["model"].isin([model_a, model_b]))
        ]

        df_pivot = df_plot.pivot(index="subject", columns="model", values="mean")[[model_a, model_b]].dropna()

        means = df_pivot.mean()
        stderrs = df_pivot.sem()
        x_vals = [0, 1]

        fig.add_trace(go.Bar(
            x=x_vals,
            y=means.values,
            error_y=dict(type="data", array=stderrs.values),
            marker_color=bar_color,
            width=0.25,
            showlegend=False
        ), row=row, col=col)

        # Add connecting lines per subject
        for subject, row_data in df_pivot.iterrows():
            color = colormap_subject_dict.get(subject, 'gray')
            show_legend = subject not in shown_subjects
            shown_subjects.add(subject)

            fig.add_trace(go.Scatter(
                x=x_vals,
                y=row_data.values,
                mode='lines+markers',
                line=dict(color=color, width=1.5),
                marker=dict(size=4),
                name=subject,
                opacity=0.9,
                showlegend=show_legend
            ), row=row, col=col)

        # Add significance marker if p < 0.05
        sig_row = sig_df[
            (sig_df.task == task) &
            (sig_df.model_a == model_a) &
            (sig_df.model_b == model_b)
        ]

        if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
            y_vals = means.values
            max_y = max(y_vals)
            line_y = max_y + 0.5

            fig.add_shape(
                type="line",
                x0=0,
                x1=1,
                y0=line_y,
                y1=line_y,
                line=dict(color="black", width=1),
                row=row, col=col
            )
            fig.add_trace(go.Scatter(
                x=[0.5],
                y=[line_y + 1],
                text=["*"],
                mode="text",
                showlegend=False
            ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=1300,
    title_text="Is Calib better than just scaling?",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    for j in range(1, 4):  # 3 columns
        if j == 1:
            model_b_label = model_name_map["scaled"]
        elif j == 2:
            model_b_label = model_name_map["pt_calib"]
        else:
            model_b_label = model_name_map["pt_calib"]  # comparing scaled vs pt_calib
        model_a_label = model_name_map["pretrained"] if j in [1, 2] else model_name_map["scaled"]

        fig.update_yaxes(range=[0, 8], title_text="Euclidean Error (dva)", row=i, col=j)
        fig.update_xaxes(
            tickvals=[0, 1],
            ticktext=[model_a_label, model_b_label],
            row=i, col=j
        )


fig.show()
