In [2]:
import numpy as np 
import pandas as pd

In [3]:
subjects = ["sub-02","sub-03","sub-04", "sub-05", "sub-06", "sub-07","sub-08", "sub-09","sub-10", "sub-11", "sub-13", "sub-14", "sub-15", "sub-16", "sub-17"]

In [4]:
df_ee = pd.DataFrame()

In [5]:
# PRETRAINED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_pretrained.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        results.append({
            'subject': subject,
            'task': task,
            'model': 'pretrained',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_ee = pd.DataFrame(results)

# Optional: display or save
print(df_ee.head())
# df_ee.to_csv('ee_pretrained_summary.csv', index=False)


  subject      task       model      mean   75_perc
0  sub-02  fixation  pretrained  5.256241  6.885061
1  sub-02   pursuit  pretrained  4.288809  5.481960
2  sub-02  freeview  pretrained  4.147764  5.291535
3  sub-02       all  pretrained  4.436713  5.585948
4  sub-03  fixation  pretrained  5.427978  7.019215


In [6]:
# SCALED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
scaled_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_scaled.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        scaled_results.append({
            'subject': subject,
            'task': task,
            'model': 'scaled',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_trained = pd.DataFrame(scaled_results)

# Concatenate
df_ee = pd.concat([df_ee, df_trained], ignore_index=True)


In [7]:
# FINE TUNED MODEL 

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
calib_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_no_interpol.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        calib_results.append({
            'subject': subject,
            'task': task,
            'model': 'pt_calib',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_calib = pd.DataFrame(calib_results)

# Concatenate
df_ee = pd.concat([df_ee, df_calib], ignore_index=True)


In [8]:
# PRETRAINED MODEL 5 DEG

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
calib_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_pt_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_pt_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_pt_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        calib_results.append({
            'subject': subject,
            'task': task,
            'model': 'pt_fivedegree',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_pt_5 = pd.DataFrame(calib_results)

# Concatenate
df_ee = pd.concat([df_ee, df_pt_5], ignore_index=True)


In [9]:
# FINE TUNED MODEL 5 deg

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
calib_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_ft_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_ft_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_ft_fivedegree.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        calib_results.append({
            'subject': subject,
            'task': task,
            'model': 'ft_fivedegree',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_ft_5 = pd.DataFrame(calib_results)

# Concatenate
df_ee = pd.concat([df_ee, df_ft_5], ignore_index=True)


In [10]:
# FINETUNED MODEL SIMULATED LABELS

# Define tasks
tasks = ['fixation', 'pursuit', 'freeview', 'all']

# Initialize list to store data rows
calib_results = []

for subject in subjects:
    for task in tasks:
        # Load data for each run
        ee_run_01 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_01_ee_ft_sim.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_02 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_02_ee_ft_sim.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        ee_run_03 = pd.read_csv(
            f"/Users/sinakling/disks/meso_shared/deepmreye/derivatives/pp_data/{subject}/eyetracking/timeseries/{subject}_task-DeepMReyeCalib_subtask-{task}_run_03_ee_ft_sim.tsv.gz",
            compression='gzip', delimiter='\t')[['ee']].to_numpy()
        
        # Combine runs and compute statistics
        all_ee = np.concatenate([ee_run_01, ee_run_02, ee_run_03])
        mean_ee = np.mean(all_ee)
        perc_ee = np.percentile(all_ee, 75)
        
        # Append result to the list
        calib_results.append({
            'subject': subject,
            'task': task,
            'model': 'ft_sim',
            'mean': mean_ee,
            '75_perc': perc_ee
        })

# Convert to DataFrame
df_ft_sim = pd.DataFrame(calib_results)

# Concatenate
df_ee = pd.concat([df_ee, df_ft_sim], ignore_index=True)


In [11]:
df_ee.groupby(['task', 'model'])['mean'].agg(['mean', 'std', 'count']).reset_index()


Unnamed: 0,task,model,mean,std,count
0,all,ft_fivedegree,2.547014,0.498806,15
1,all,ft_sim,3.140851,0.652825,15
2,all,pretrained,3.616241,0.602887,15
3,all,pt_calib,3.118574,0.643608,15
4,all,pt_fivedegree,2.327419,0.421354,15
5,all,scaled,3.379836,0.65328,15
6,fixation,ft_fivedegree,3.417341,1.09004,15
7,fixation,ft_sim,3.886668,1.106222,15
8,fixation,pretrained,5.176508,1.027671,15
9,fixation,pt_calib,4.041402,1.113978,15


In [26]:
# CORRELATION (all models)
df_corr_first = pd.read_csv('/Users/sinakling/Desktop/correlation_calib.csv')
df_corr_sim = pd.read_csv('/Users/sinakling/Desktop/correlation_sim_calib.csv')

df_corr = pd.concat([df_corr_first, df_corr_sim], ignore_index=True)

In [27]:
df_corr_first.groupby(['task', 'model'])['mean_pearson'].agg(['mean', 'std', 'count']).reset_index()

Unnamed: 0,task,model,mean,std,count
0,all,no_interpol,0.85201,0.051691,15
1,all,pretrained,0.814437,0.066147,15
2,all,scaled,0.814437,0.066147,15
3,fixation,no_interpol,0.874157,0.065567,15
4,fixation,pretrained,0.836007,0.087033,15
5,fixation,scaled,0.836007,0.087033,15
6,freeview,no_interpol,0.728674,0.140093,15
7,freeview,pretrained,0.697816,0.129343,15
8,freeview,scaled,0.697816,0.129343,15
9,pursuit,no_interpol,0.925135,0.027136,15


In [28]:
df_corr_sim.groupby(['task', 'model'])['mean_pearson'].agg(['mean', 'std', 'count']).reset_index()

Unnamed: 0,task,model,mean,std,count
0,all,sim,0.848858,0.053383,15
1,fixation,sim,0.878872,0.064404,15
2,freeview,sim,0.710842,0.149329,15
3,pursuit,sim,0.923188,0.027088,15


In [29]:
# CORRELATION (all models)
df_corr_5deg = pd.read_csv('/Users/sinakling/Desktop/correlation_5deg_calib.csv')
print(df_corr_5deg)


    subject      task          model  mean_pearson
0    sub-02       all  ft_fivedegree      0.632938
1    sub-02       all  pt_fivedegree      0.508749
2    sub-02  fixation  ft_fivedegree      0.788768
3    sub-02  fixation  pt_fivedegree      0.673887
4    sub-02  freeview  ft_fivedegree      0.610110
..      ...       ...            ...           ...
115  sub-17  fixation  pt_fivedegree      0.761205
116  sub-17  freeview  ft_fivedegree      0.726433
117  sub-17  freeview  pt_fivedegree      0.682034
118  sub-17   pursuit  ft_fivedegree      0.845607
119  sub-17   pursuit  pt_fivedegree      0.844276

[120 rows x 4 columns]


In [30]:
df_corr_5deg.groupby(['task', 'model'])['mean_pearson'].agg(['mean', 'std', 'count']).reset_index()

Unnamed: 0,task,model,mean,std,count
0,all,ft_fivedegree,0.71699,0.08348,15
1,all,pt_fivedegree,0.681204,0.098661,15
2,fixation,ft_fivedegree,0.751951,0.135461,15
3,fixation,pt_fivedegree,0.68988,0.169762,15
4,freeview,ft_fivedegree,0.608777,0.134957,15
5,freeview,pt_fivedegree,0.5855,0.119337,15
6,pursuit,ft_fivedegree,0.875777,0.041191,15
7,pursuit,pt_fivedegree,0.854861,0.08643,15


In [31]:
from scipy.stats import permutation_test
import pandas as pd

def paired_permutation_tests(df, model_a, model_b):
    """
    Run paired permutation tests between two models for each task.

    Parameters:
        df (pd.DataFrame): DataFrame with columns ['subject', 'task', 'model', 'mean']
        model_a (str): Name of the first model
        model_b (str): Name of the second model

    Returns:
        pd.DataFrame: A DataFrame with task, p-value, and effect size (mean diff)
    """
    results = []

    # Group only by task
    grouped = df.groupby('task')

    for task, group in grouped:
        # Filter to the two models of interest
        group_filtered = group[group['model'].isin([model_a, model_b])]

        # Pivot: subjects as rows, models as columns
        pivoted = group_filtered.pivot(index='subject', columns='model', values='mean_pearson').dropna()

        if pivoted.shape[0] < 2:
            continue  # Not enough subjects with data for both models

        a_values = pivoted[model_a].values
        b_values = pivoted[model_b].values

        # Paired permutation test on mean difference
        result = permutation_test(
            (a_values, b_values),
            statistic=lambda x, y: (x - y).mean(),
            permutation_type='samples',
            vectorized=False,
            alternative='two-sided',
            n_resamples=10000
        )

        results.append({
            'task': task,
            'model_a': model_a,
            'model_b': model_b,
            'mean_diff': (a_values - b_values).mean(),
            'p_value': result.pvalue,
            'n_subjects': len(pivoted)
        })

    return pd.DataFrame(results)


In [14]:

# Run paired permutation tests for ee 

sig_df_pt_vs_scaled = paired_permutation_tests(df_ee, model_a='pretrained', model_b='scaled')

# Run paired permutation tests for another pairing (e.g., pt vs pt_gaze) and append to the same dataframe
sig_df_pt_vs_calib = paired_permutation_tests(df_ee, model_a='pretrained', model_b='pt_calib')

sig_df_scaled_vs_calib = paired_permutation_tests(df_ee, model_a='scaled', model_b='pt_calib')

sig_df_pt_vs_ft_5 = paired_permutation_tests(df_ee, model_a='pt_fivedegree', model_b='ft_fivedegree')

sig_df_calib_vs_sim = paired_permutation_tests(df_ee, model_a='pt_calib', model_b='ft_sim')


# Append the results from both pairings
sig_df = pd.concat([sig_df_pt_vs_scaled, sig_df_pt_vs_calib, sig_df_scaled_vs_calib, sig_df_pt_vs_ft_5, sig_df_calib_vs_sim], ignore_index=True)

# Show the combined dataframe with the significance results
print(sig_df)




        task        model_a        model_b  mean_diff   p_value  n_subjects
0        all     pretrained         scaled   0.236405  0.000800          15
1   fixation     pretrained         scaled   0.603500  0.000400          15
2   freeview     pretrained         scaled  -0.161991  0.048995          15
3    pursuit     pretrained         scaled   0.306344  0.000800          15
4        all     pretrained       pt_calib   0.497667  0.000200          15
5   fixation     pretrained       pt_calib   1.135105  0.000200          15
6   freeview     pretrained       pt_calib   0.128995  0.158984          15
7    pursuit     pretrained       pt_calib   0.403891  0.000200          15
8        all         scaled       pt_calib   0.261261  0.000200          15
9   fixation         scaled       pt_calib   0.531605  0.000400          15
10  freeview         scaled       pt_calib   0.290986  0.007399          15
11   pursuit         scaled       pt_calib   0.097547  0.078592          15
12       all

In [32]:

# Run paired permutation tests for correlation

sig_df_pt_vs_scaled = paired_permutation_tests(df_corr, model_a='pretrained', model_b='scaled')

# Run paired permutation tests for another pairing (e.g., pt vs pt_gaze) and append to the same dataframe
sig_df_pt_vs_calib = paired_permutation_tests(df_corr, model_a='pretrained', model_b='no_interpol')

sig_df_scaled_vs_calib = paired_permutation_tests(df_corr, model_a='scaled', model_b='no_interpol')

sig_df_pt_vs_ft_5 = paired_permutation_tests(df_corr_5deg, model_a='pt_fivedegree', model_b='ft_fivedegree')

sig_df_calib_vs_sim = paired_permutation_tests(df_corr, model_a='no_interpol', model_b='sim')

# Append the results from both pairings
sig_df = pd.concat([sig_df_pt_vs_scaled, sig_df_pt_vs_calib, sig_df_scaled_vs_calib, sig_df_pt_vs_ft_5, sig_df_calib_vs_sim], ignore_index=True)

# Show the combined dataframe with the significance results
print(sig_df)

        task        model_a        model_b     mean_diff   p_value  n_subjects
0        all     pretrained         scaled  1.136159e-10  0.625137          15
1   fixation     pretrained         scaled  1.522174e-10  0.574943          15
2   freeview     pretrained         scaled  5.698260e-10  0.203380          15
3    pursuit     pretrained         scaled  3.320437e-11  0.898310          15
4        all     pretrained    no_interpol -3.757277e-02  0.000400          15
5   fixation     pretrained    no_interpol -3.814987e-02  0.000400          15
6   freeview     pretrained    no_interpol -3.085863e-02  0.002200          15
7    pursuit     pretrained    no_interpol -1.167386e-02  0.021998          15
8        all         scaled    no_interpol -3.757277e-02  0.000400          15
9   fixation         scaled    no_interpol -3.814987e-02  0.000200          15
10  freeview         scaled    no_interpol -3.085863e-02  0.005199          15
11   pursuit         scaled    no_interpol -1.167386

In [16]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "pt_calib": "DeepMreye + Calib"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-10': '#AA0DFE', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]


comparisons = [
    ("pretrained", "scaled"),
    ("pretrained", "pt_calib"),
    ("scaled", "pt_calib")
]

fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=[f"{label}" for label in task_labels for _ in range(3)] + ["<b>All Tasks<b>"] * 3
)


# Keep track of subjects already shown in legend
shown_subjects = set()

for i, task in enumerate(task_names):
    for j, (model_a, model_b) in enumerate(comparisons):
        row = i + 1
        col = j + 1

        df_plot = df_ee[
            (df_ee["task"] == task) &
            (df_ee["model"].isin([model_a, model_b]))
        ]

        df_pivot = df_plot.pivot(index="subject", columns="model", values="mean")[[model_a, model_b]].dropna()

        means = df_pivot.mean()
        stderrs = df_pivot.sem()
        x_vals = [0, 1]

        fig.add_trace(go.Bar(
            x=x_vals,
            y=means.values,
            error_y=dict(type="data", array=stderrs.values),
            marker_color=bar_color,
            width=0.25,
            showlegend=False
        ), row=row, col=col)

        # Add connecting lines per subject
        for subject, row_data in df_pivot.iterrows():
            color = colormap_subject_dict.get(subject, 'gray')
            show_legend = subject not in shown_subjects
            shown_subjects.add(subject)

            fig.add_trace(go.Scatter(
                x=x_vals,
                y=row_data.values,
                mode='lines+markers',
                line=dict(color=color, width=1.5),
                marker=dict(size=4),
                name=subject,
                opacity=0.9,
                showlegend=show_legend
            ), row=row, col=col)

        # Add significance marker if p < 0.05
        sig_row = sig_df[
            (sig_df.task == task) &
            (sig_df.model_a == model_a) &
            (sig_df.model_b == model_b)
        ]

        if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
            y_vals = means.values
            max_y = max(y_vals)
            line_y = max_y + 0.5

            fig.add_shape(
                type="line",
                x0=0,
                x1=1,
                y0=line_y,
                y1=line_y,
                line=dict(color="black", width=1),
                row=row, col=col
            )
            fig.add_trace(go.Scatter(
                x=[0.5],
                y=[line_y + 1],
                text=["*"],
                mode="text",
                showlegend=False
            ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=1300,
    title_text="Is Calib better than just scaling?",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    for j in range(1, 4):  # 3 columns
        if j == 1:
            model_b_label = model_name_map["scaled"]
        elif j == 2:
            model_b_label = model_name_map["pt_calib"]
        else:
            model_b_label = model_name_map["pt_calib"]  # comparing scaled vs pt_calib
        model_a_label = model_name_map["pretrained"] if j in [1, 2] else model_name_map["scaled"]

        fig.update_yaxes(range=[0, 8], title_text="Euclidean Error (dva)", row=i, col=j)
        fig.update_xaxes(
            tickvals=[0, 1],
            ticktext=[model_a_label, model_b_label],
            row=i, col=j
        )


fig.show()
#fig.write_image("/Users/sinakling/disks/meso_shared/deepmreye/derivatives/int_deepmreye/deepmreye_calib/figures/group/euclidean_error_barplots.pdf")


In [41]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Setup
task_names = ["fixation", "pursuit", "freeview"]
task_labels = ["<b>Guided Fixation</b>", "<b>Smooth Pursuit</b>", "<b>Freeviewing</b>"]
model_order = ["no_interpol", "scaled", "pretrained"]
bar_color = 'rgba(66, 129, 164, 0.25)' 

# Define color and name mappings
#model_name_map = {
#    "pretrained": "DeepMreye",
#    "scaled": "DeepMreye Scaled",
#    "pt_calib": "DeepMreye + Calib"
#}
# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "no_interpol": "DeepMreye + Calib"
}

colormap_subject_dict = {
    'sub-01': '#AA0DFE', 'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

fig = make_subplots(
    rows=3, cols=1,
    shared_xaxes=False,
    vertical_spacing=0.15,
    subplot_titles=task_labels
)

shown_subjects = set()

for i, task in enumerate(task_names):
    row = i + 1

    df_plot = df_corr[
        (df_corr["task"] == task) &
        (df_corr["model"].isin(model_order))
    ]

    df_pivot = df_plot.pivot(index="subject", columns="model", values="mean_pearson")[model_order].dropna()

    means = df_pivot.mean()
    stderrs = df_pivot.sem()
    y_vals = [model_name_map[m] for m in model_order]

    fig.add_trace(go.Bar(
        y=y_vals,
        x=means.values,
        error_x=dict(type="data", array=stderrs.values),
        orientation='h',
        marker_color=bar_color,
        width=0.4,
        showlegend=False
    ), row=row, col=1)


    offset_counter = 0
    for j in range(len(model_order)):
        for k in range(j + 1, len(model_order)):
            model_a = model_order[j]
            model_b = model_order[k]

            sig_row = sig_df[
                ((sig_df.task == task) &
                 (((sig_df.model_a == model_a) & (sig_df.model_b == model_b)) |
                  ((sig_df.model_a == model_b) & (sig_df.model_b == model_a))))
            ]

            if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
                model_a_name = model_name_map[model_a]
                model_b_name = model_name_map[model_b]

                # Get y positions (as strings for categorical y-axis)
                y0 = model_a_name
                y1 = model_b_name
                y_vals = [model_name_map[m] for m in model_order]
                y_indices = {name: idx for idx, name in enumerate(y_vals)}
                y0_idx = y_indices[y0]
                y1_idx = y_indices[y1]
                y_mid_idx = (y0_idx + y1_idx) / 2
                y_mid = y_vals[int(round(y_mid_idx))]

                # X positions (mean values)
                x0 = means[model_a]
                x1 = means[model_b]
                x_max = max(x0, x1)
                line_x = x_max + offset_counter * 0.3  # horizontal position of significance line

                # Add vertical connector line (between y0 and y1 at x=line_x)
                fig.add_shape(
                    type="line",
                    x0=line_x, x1=line_x,
                    y0=y0, y1=y1,
                    xref='x'+str(row) if row > 1 else 'x',
                    yref='y'+str(row) if row > 1 else 'y',
                    line=dict(color="black", width=1),
                    row=row, col=1
                )

                # Horizontal caps
                for y in [y0, y1]:
                    fig.add_shape(
                        type="line",
                        x0=line_x - 0.1, x1=line_x,
                        y0=y, y1=y,
                        xref='x'+str(row) if row > 1 else 'x',
                        yref='y'+str(row) if row > 1 else 'y',
                        line=dict(color="black", width=1),
                        row=row, col=1
                    )

                # Add significance star next to vertical line
                fig.add_trace(go.Scatter(
                    x=[line_x],
                    y=[y_mid],
                    text=["*"],
                    mode="text",
                    textposition="middle left",
                    showlegend=False
                ), row=row, col=1)

                offset_counter += 1



# Layout
fig.update_layout(
    height=1400,
    width=900,
    title_text="Correlation Across Tasks and Models",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=120)
)

# Axis formatting
for i in range(1, 4):
    fig.update_xaxes(range=[0, 1], title_text="Correlation (r)", row=i, col=1)
    fig.update_yaxes(title_text="", row=i, col=1)

fig.show()
fig.write_image("/Users/sinakling/Desktop/correlation_hor_barplots.pdf")


In [38]:
import plotly.graph_objects as go
import numpy as np

# Task and model info
task_order = ["fixation", "pursuit", "freeview"]
task_labels = ["Fixation", "Pursuit", "Freeviewing"]
models = ["pretrained", "scaled", "no_interpol"]

model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "no_interpol": "DeepMreye + Calib"
}
model_colors = {
    "pretrained": "rgba(66, 129, 164, 0.8)",
    "scaled": "rgba(243, 156, 18, 0.8)",
    "no_interpol": "rgba(39, 174, 96, 0.8)"
}

bar_width = 0.2  # numeric width of each bar

# Compute mean and SEM
means = df_corr.groupby(["task", "model"])["mean_pearson"].mean().reindex(task_order, level=0).unstack()
stderrs = df_corr.groupby(["task", "model"])["mean_pearson"].sem().reindex(task_order, level=0).unstack()

# Compute numeric x positions for each bar
x_pos = {}
for t_idx, task in enumerate(task_order):
    for m_idx, model in enumerate(models):
        # Center the bars around the integer task index
        x_pos[(task, model)] = t_idx - bar_width + m_idx * bar_width

# Create figure
fig = go.Figure()

# Add bars
for model in models:
    x_vals = [x_pos[(task, model)] for task in task_order]
    y_vals = [means.loc[task, model] for task in task_order]
    y_err = [stderrs.loc[task, model] for task in task_order]
    
    fig.add_trace(go.Bar(
        x=x_vals,
        y=y_vals,
        name=model_name_map[model],
        error_y=dict(type='data', array=y_err),
        width=bar_width,
        marker_color=model_colors[model]
    ))

# Add significance
comparisons = [
    ("pretrained", "scaled"),
    ("pretrained", "no_interpol"),
    ("scaled", "no_interpol")
]

for task in task_order:
    for model_a, model_b in comparisons:
        sig_row = sig_df[
            (sig_df.task == task) &
            (sig_df.model_a == model_a) &
            (sig_df.model_b == model_b)
        ]
        if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
            x0 = x_pos[(task, model_a)]
            x1 = x_pos[(task, model_b)]
            y_max = max(means.loc[task, model_a], means.loc[task, model_b]) + 0.1

            # Draw line
            fig.add_shape(type="line", x0=x0, x1=x1, y0=y_max, y1=y_max,
                          line=dict(color="black", width=1))
            # Add star
            fig.add_trace(go.Scatter(
                x=[(x0 + x1)/2],
                y=[y_max],
                text=["*"],
                mode="text",
                showlegend=False
            ))

# Fix x-axis ticks
tick_vals = [t_idx for t_idx in range(len(task_order))]
tick_text = task_labels
fig.update_layout(
    xaxis=dict(tickmode='array', tickvals=tick_vals, ticktext=tick_text),
    yaxis_title="Pearson Correlation (r)",
    title="Correlation Across Tasks for Different Models",
    barmode='overlay',  # bars already positioned manually
    template="simple_white",
    font=dict(size=14)
)

fig.show()
# Set figure size to match display
fig.update_layout(
    width=900,   # adjust to your preferred display width
    height=500   # adjust to your preferred display height
)

# Save as PDF
fig.write_image("/Users/sinakling/Desktop/calib_corr_grouped_new.pdf")


In [9]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "no_interpol": "DeepMreye + Calib"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-01': '#AA0DFE', 'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]


comparisons = [
    ("pretrained", "scaled"),
    ("pretrained", "no_interpol"),
    ("scaled", "no_interpol")
]

fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=[f"{label}" for label in task_labels for _ in range(3)] + ["<b>All Tasks<b>"] * 3
)


# Keep track of subjects already shown in legend
shown_subjects = set()

for i, task in enumerate(task_names):
    for j, (model_a, model_b) in enumerate(comparisons):
        row = i + 1
        col = j + 1

        df_plot = df_corr[
            (df_corr["task"] == task) &
            (df_corr["model"].isin([model_a, model_b]))
        ]

        df_pivot = df_plot.pivot(index="subject", columns="model", values="mean_pearson")[[model_a, model_b]].dropna()

        means = df_pivot.mean()
        stderrs = df_pivot.sem()
        x_vals = [0, 1]

        fig.add_trace(go.Bar(
            x=x_vals,
            y=means.values,
            error_y=dict(type="data", array=stderrs.values),
            marker_color=bar_color,
            width=0.25,
            showlegend=False
        ), row=row, col=col)

        # Add connecting lines per subject
        for subject, row_data in df_pivot.iterrows():
            color = colormap_subject_dict.get(subject, 'gray')
            show_legend = subject not in shown_subjects
            shown_subjects.add(subject)

            fig.add_trace(go.Scatter(
                x=x_vals,
                y=row_data.values,
                mode='lines+markers',
                line=dict(color=color, width=1.5),
                marker=dict(size=4),
                name=subject,
                opacity=0.9,
                showlegend=show_legend
            ), row=row, col=col)

        # Add significance marker if p < 0.05
        sig_row = sig_df[
            (sig_df.task == task) &
            (sig_df.model_a == model_a) &
            (sig_df.model_b == model_b)
        ]

        if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
            y_vals = means.values
            max_y = max(y_vals)
            line_y = max_y + 0.1

            fig.add_shape(
                type="line",
                x0=0,
                x1=1,
                y0=line_y,
                y1=line_y,
                line=dict(color="black", width=1),
                row=row, col=col
            )
            fig.add_trace(go.Scatter(
                x=[0.5],
                y=[line_y + 0.05],
                text=["*"],
                mode="text",
                showlegend=False
            ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=1300,
    title_text="Is Calib better than just scaling?",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    for j in range(1, 4):  # 3 columns
        if j == 1:
            model_b_label = model_name_map["scaled"]
        elif j == 2:
            model_b_label = model_name_map["no_interpol"]
        else:
            model_b_label = model_name_map["no_interpol"]  # comparing scaled vs pt_calib
        model_a_label = model_name_map["pretrained"] if j in [1, 2] else model_name_map["scaled"]

        fig.update_yaxes(range=[0, 1.115], title_text="Pearson Correlation", row=i, col=j)
        fig.update_xaxes(
            tickvals=[0, 1],
            ticktext=[model_a_label, model_b_label],
            row=i, col=j
        )


fig.show()
#fig.write_image("/Users/sinakling/disks/meso_shared/deepmreye/derivatives/int_deepmreye/deepmreye_calib/figures/group/correlation_barplots.pdf")

In [17]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "pt_calib": "DeepMreye + Calib", 
    "pt_fivedegree": "DeepMReye 5 degree", 
    "ft_fivedegree": "DeepMReye + Calib 5 degree"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-10': '#AA0DFE', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]

comparisons = [("pt_fivedegree", "ft_fivedegree")]

fig = make_subplots(
    rows=4, cols=1,
    subplot_titles=[f"{label}" for label in task_labels] + ["<b>All Tasks<b>"]
)

shown_subjects = set()

for i, task in enumerate(task_names):
    model_a, model_b = comparisons[0]
    row = i + 1
    col = 1

    df_plot = df_ee[
        (df_ee["task"] == task) &
        (df_ee["model"].isin([model_a, model_b]))
    ]

    df_pivot = df_plot.pivot(index="subject", columns="model", values="mean")[[model_a, model_b]].dropna()

    means = df_pivot.mean()
    stderrs = df_pivot.sem()
    x_vals = [0, 1]

    fig.add_trace(go.Bar(
        x=x_vals,
        y=means.values,
        error_y=dict(type="data", array=stderrs.values),
        marker_color=bar_color,
        width=0.25,
        showlegend=False
    ), row=row, col=col)

    for subject, row_data in df_pivot.iterrows():
        color = colormap_subject_dict.get(subject, 'gray')
        show_legend = subject not in shown_subjects
        shown_subjects.add(subject)

        fig.add_trace(go.Scatter(
            x=x_vals,
            y=row_data.values,
            mode='lines+markers',
            line=dict(color=color, width=1.5),
            marker=dict(size=4),
            name=subject,
            opacity=0.9,
            showlegend=show_legend
        ), row=row, col=col)

    # Significance marker
    sig_row = sig_df[
        (sig_df.task == task) &
        (sig_df.model_a == model_a) &
        (sig_df.model_b == model_b)
    ]

    if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
        y_vals = means.values
        max_y = max(y_vals)
        line_y = max_y + 0.5

        fig.add_shape(
            type="line",
            x0=0,
            x1=1,
            y0=line_y,
            y1=line_y,
            line=dict(color="black", width=1),
            row=row, col=col
        )
        fig.add_trace(go.Scatter(
            x=[0.5],
            y=[line_y + 1],
            text=["*"],
            mode="text",
            showlegend=False
        ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=800,
    title_text="Effect of fovea stimulus",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    fig.update_yaxes(range=[0, 8], title_text="Euclidean Error (dva)", row=i, col=1)
    fig.update_xaxes(
        tickvals=[0, 1],
        ticktext=[model_name_map["pt_fivedegree"], model_name_map["ft_fivedegree"]],
        row=i, col=1
    )

fig.show()


In [30]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "pt_calib": "DeepMreye + Calib", 
    "pt_fivedegree": "DeepMReye 5 degree", 
    "ft_fivedegree": "DeepMReye + Calib 5 degree"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-01': '#AA0DFE', 'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]

comparisons = [("pt_fivedegree", "ft_fivedegree")]

fig = make_subplots(
    rows=4, cols=1,
    subplot_titles=[f"{label}" for label in task_labels] + ["<b>All Tasks<b>"]
)

shown_subjects = set()

for i, task in enumerate(task_names):
    model_a, model_b = comparisons[0]
    row = i + 1
    col = 1

    df_plot = df_corr_5deg[
        (df_corr_5deg["task"] == task) &
        (df_corr_5deg["model"].isin([model_a, model_b]))
    ]

    df_pivot = df_plot.pivot(index="subject", columns="model", values="mean_pearson")[[model_a, model_b]].dropna()

    means = df_pivot.mean()
    stderrs = df_pivot.sem()
    x_vals = [0, 1]

    fig.add_trace(go.Bar(
        x=x_vals,
        y=means.values,
        error_y=dict(type="data", array=stderrs.values),
        marker_color=bar_color,
        width=0.25,
        showlegend=False
    ), row=row, col=col)

    for subject, row_data in df_pivot.iterrows():
        color = colormap_subject_dict.get(subject, 'gray')
        show_legend = subject not in shown_subjects
        shown_subjects.add(subject)

        fig.add_trace(go.Scatter(
            x=x_vals,
            y=row_data.values,
            mode='lines+markers',
            line=dict(color=color, width=1.5),
            marker=dict(size=4),
            name=subject,
            opacity=0.9,
            showlegend=show_legend
        ), row=row, col=col)

    # Significance marker
    sig_row = sig_df[
        (sig_df.task == task) &
        (sig_df.model_a == model_a) &
        (sig_df.model_b == model_b)
    ]

    if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
        y_vals = means.values
        max_y = max(y_vals)
        line_y = max_y + 0.5

        fig.add_shape(
            type="line",
            x0=0,
            x1=1,
            y0=line_y,
            y1=line_y,
            line=dict(color="black", width=1),
            row=row, col=col
        )
        fig.add_trace(go.Scatter(
            x=[0.5],
            y=[line_y + 1],
            text=["*"],
            mode="text",
            showlegend=False
        ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=800,
    title_text="Effect of fovea stimulus",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    fig.update_yaxes(range=[0, 1], title_text="Correlation", row=i, col=1)
    fig.update_xaxes(
        tickvals=[0, 1],
        ticktext=[model_name_map["pt_fivedegree"], model_name_map["ft_fivedegree"]],
        row=i, col=1
    )

fig.show()


In [37]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define color and name mappings
model_name_map = {
    "pretrained": "DeepMreye",
    "scaled": "DeepMreye Scaled",
    "pt_calib": "DeepMreye + Calib", 
    "pt_fivedegree": "DeepMReye 5 degree", 
    "ft_fivedegree": "DeepMReye + Calib 5 degree", 
    "ft_sim": "DeepMreye + Calib simulated labels"
}

bar_color = 'rgba(66, 129, 164, 0.25)' 

colormap_subject_dict = {
    'sub-01': '#AA0DFE', 'sub-02': '#3283FE', 'sub-03': '#85660D', 'sub-04': '#782AB6',
    'sub-05': '#565656', 'sub-06': '#1C8356', 'sub-07': '#16FF32', 'sub-08': '#F7E1A0',
    'sub-09': '#E2E2E2', 'sub-11': '#1CBE4F', 'sub-13': '#DEA0FD', 'sub-14': '#FBE426', 
    'sub-15': '#325A9B', 'sub-16': '#FEAF16', 'sub-17': '#F8A19F'
}

task_labels = ["<b>Guided Fixation<b>", "<b>Smooth Pursuit<b>", "<b>Freeviewing<b>"]
task_names = ["fixation", "pursuit", "freeview", "all"]

comparisons = [("pt_calib", "ft_sim")]

fig = make_subplots(
    rows=4, cols=1,
    subplot_titles=[f"{label}" for label in task_labels] + ["<b>All Tasks<b>"]
)

shown_subjects = set()

for i, task in enumerate(task_names):
    model_a, model_b = comparisons[0]
    row = i + 1
    col = 1

    df_plot = df_ee[
        (df_ee["task"] == task) &
        (df_ee["model"].isin([model_a, model_b]))
    ]

    df_pivot = df_plot.pivot(index="subject", columns="model", values="mean")[[model_a, model_b]].dropna()

    means = df_pivot.mean()
    stderrs = df_pivot.sem()
    x_vals = [0, 1]

    fig.add_trace(go.Bar(
        x=x_vals,
        y=means.values,
        error_y=dict(type="data", array=stderrs.values),
        marker_color=bar_color,
        width=0.25,
        showlegend=False
    ), row=row, col=col)

    for subject, row_data in df_pivot.iterrows():
        color = colormap_subject_dict.get(subject, 'gray')
        show_legend = subject not in shown_subjects
        shown_subjects.add(subject)

        fig.add_trace(go.Scatter(
            x=x_vals,
            y=row_data.values,
            mode='lines+markers',
            line=dict(color=color, width=1.5),
            marker=dict(size=4),
            name=subject,
            opacity=0.9,
            showlegend=show_legend
        ), row=row, col=col)

    # Significance marker
    sig_row = sig_df[
        (sig_df.task == task) &
        (sig_df.model_a == model_a) &
        (sig_df.model_b == model_b)
    ]

    if not sig_row.empty and sig_row.iloc[0].p_value < 0.05:
        y_vals = means.values
        max_y = max(y_vals)
        line_y = max_y + 0.5

        fig.add_shape(
            type="line",
            x0=0,
            x1=1,
            y0=line_y,
            y1=line_y,
            line=dict(color="black", width=1),
            row=row, col=col
        )
        fig.add_trace(go.Scatter(
            x=[0.5],
            y=[line_y + 1],
            text=["*"],
            mode="text",
            showlegend=False
        ), row=row, col=col)

# Layout
fig.update_layout(
    height=1800,
    width=800,
    title_text="Effect of fovea stimulus",
    template="simple_white",
    showlegend=True,
    font=dict(size=16, family="Arial"),
    margin=dict(t=150)
)

# Axis formatting
for i in range(1, 5):
    fig.update_yaxes(range=[0, 8], title_text="Euclidean Error (dva)", row=i, col=1)
    fig.update_xaxes(
        tickvals=[0, 1],
        ticktext=[model_name_map["pt_calib"], model_name_map["ft_sim"]],
        row=i, col=1
    )

fig.show()
