# Evaluate ALL tasks, ALL experiment types, for CVT/SWIN/VIT and print MLAE summaries.

In [1]:
# Evaluate ALL tasks, ALL experiment types, for CVT/SWIN/VIT and print MLAE summaries.

import os, sys, json
import warnings, contextlib, io, numpy as np, sys
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import pandas as pd

# If your notebook lives outside your repo root, point PROJECT_ROOT there:
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), "../../"))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

np.random.seed(0)
torch.manual_seed(0)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)



# mute numpy runtime warnings globally (mean of empty slice, invalid divide, etc.)
warnings.filterwarnings("ignore", category=RuntimeWarning)
np.seterr(all="ignore")  # silence numpy floating warnings inside ops

@contextlib.contextmanager
def silence_output():
    """
    Redirect stdout/stderr and keep them in a buffer (discarded) while the block runs.
    Also temporarily silences warnings if needed.
    """
    buf = io.StringIO()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        with contextlib.redirect_stdout(buf), contextlib.redirect_stderr(buf):
            yield


# --------------------
# Import project code:
# --------------------
from src.ClevelandMcGill.figure1 import Figure1
from src.ClevelandMcGill.figure12 import Figure12
from src.ClevelandMcGill.figure3 import Figure3
from src.ClevelandMcGill.figure4 import Figure4
from src.ClevelandMcGill.weber import Weber

from src.Datasets.testdataset import (
    TestDataset, test_normalization_data,
    test_pl_data_generation, test_pa_data_generation,
    test_bfr_data_generation, test_wb_data_generation, test_reg_data_generation
)

from src.Models.cvt import CvTRegression
from src.Models.swin import SwinRegression
from src.Models.vit import ViTRegression
from src.Models.one_epoch_run import testingEpoch, testingEpochOne

# -----------------
# Task definitions:
# -----------------
TASKS = ['Elementary Perceptual Task',
         'Position-Length',
         'Position-Angle',
         'Bar and Framed Rectangle',
         'Webers Law']

EL_DATATYPE_LIST = ['position_common_scale',
                    'position_non_aligned_scale',
                    'length',
                    'direction',
                    'angle',
                    'area',
                    'volume',
                    'curvature',
                    'shading']

PL_DATATYPE_LIST = ['data_to_type1',
                    'data_to_type2',
                    'data_to_type3',
                    'data_to_type4',
                    'data_to_type5']

PA_DATATYPE_LIST = ['data_to_barchart',
                    'data_to_piechart',
                    'data_to_piechart_aa']

BFR_DATATYPE_LIST = ['data_to_bars',
                     'data_to_framed_rectangles']

WB_DATATYPE_LIST = ['base10',
                    'base100',
                    'base1000']

DATATYPE_LIST = {TASKS[0]: EL_DATATYPE_LIST,
                 TASKS[1]: PL_DATATYPE_LIST,
                 TASKS[2]: PA_DATATYPE_LIST,
                 TASKS[3]: BFR_DATATYPE_LIST,
                 TASKS[4]: WB_DATATYPE_LIST}

# -------------
# Data helpers:
# -------------
def data_gen(task_name, transform, experiment_type):
    if task_name == "Elementary Perceptual Task":
        DATATYPE = eval('Figure1.' + experiment_type)
        X_test, y_test = test_reg_data_generation(DATATYPE, NOISE=True, test_target=200)
    elif task_name == "Position-Length":
        DATATYPE = eval('Figure4.' + experiment_type)
        X_test, y_test = test_pl_data_generation(DATATYPE, NOISE=True, test_target=200)
    elif task_name == "Position-Angle":
        DATATYPE = eval('Figure3.' + experiment_type)
        X_test, y_test = test_pa_data_generation(DATATYPE, NOISE=True, test_target=200)
    elif task_name == "Bar and Framed Rectangle":
        DATATYPE = eval('Figure12.' + experiment_type)
        X_test, y_test = test_bfr_data_generation(DATATYPE, NOISE=True, test_target=200)
    elif task_name == "Webers Law":
        DATATYPE = eval('Weber.' + experiment_type)
        X_test, y_test = test_wb_data_generation(DATATYPE, NOISE=True, test_target=100)
    else:
        raise ValueError("Unknown task_name: %r" % task_name)

    X_test = test_normalization_data(X_test)
    y_test = test_normalization_data(y_test)
    X_test -= 0.5
    return TestDataset(X_test, y_test, transform=transform, channels=True)

def data_loader(task_name, experiment_type, batch_size=64):
    transform = transforms.Compose([transforms.Resize((224, 224))])
    test_dataset = data_gen(task_name, transform, experiment_type)
    return DataLoader(test_dataset, batch_size, shuffle=True)

# --------------
# Model helpers:
# --------------
MODELS = ["CVT", "SWIN", "VIT"]

def _num_outputs_for(task_name):
    if task_name == "Elementary Perceptual Task":
        return 1
    if task_name in ("Position-Length", "Position-Angle"):
        return 5
    if task_name == "Bar and Framed Rectangle":
        return 2
    if task_name == "Webers Law":
        return 1
    raise ValueError("Unknown task_name: %r" % task_name)

def _make_model(model_name, task_name):
    n_out = _num_outputs_for(task_name)
    if model_name == "CVT":
        return CvTRegression(num_classes=n_out, channels=3)
    if model_name == "VIT":
        return ViTRegression(num_classes=n_out, channels=3)
    if model_name == "SWIN":
        return SwinRegression(num_outputs=n_out, channels=3)
    raise ValueError("Unknown model: %r" % model_name)

# ----------------
# Eval per model:
# ----------------
def test_model(model, model_name, task_name, exp_type, test_loader):
    model.to(device)
    ckpt = f"chkpt/chkpts_fromCluster/channels3/{model_name.lower()}3channels_{exp_type}.pth"
    state = torch.load(ckpt, map_location=device)
    model.load_state_dict(state)

    # Correct multi-output vs single-output handling:
    with silence_output():
        if task_name in ("Position-Length", "Position-Angle", "Bar and Framed Rectangle"):
            m_error = testingEpoch(model, test_loader, device)
        else:
            m_error = testingEpochOne(model, test_loader, device)

    os.makedirs("results", exist_ok=True)
    payload = {'Model': model_name, 'Task_name': task_name, 'Experiment_type': exp_type, 'MLAE': float(m_error)}
    with open(os.path.join("results", f"{model_name}trained_{exp_type}.json"), "w") as f:
        json.dump(payload, f, indent=2)
    return float(m_error)

def evaluate_all_models(task_name, exp_type):
    test_loader = data_loader(task_name, exp_type)
    per_model = {}
    for model_name in MODELS:
        model = _make_model(model_name, task_name)
        mlae = test_model(model, model_name, task_name, exp_type, test_loader)
        per_model[model_name] = mlae
    return per_model

# ---------------
# Run EVERYTHING:
# ---------------

rows = []
for task in TASKS:
    print(f"\n=== TASK: {task} ===")
    for exp in DATATYPE_LIST[task]:
        print(f" -> Experiment: {exp}")
        results = evaluate_all_models(task, exp)
        for model_name, mlae in results.items():
            rows.append({"Task": task, "Experiment": exp, "Model": model_name, "MLAE": mlae})

df = pd.DataFrame(rows)

# Save full table
os.makedirs("results", exist_ok=True)
csv_all = os.path.join("results", "mlae_summary_all_tasks.csv")
df.to_csv(csv_all, index=False)
print("\nSaved full MLAE table to:", csv_all)

# --------------------------
# summaries & prints:
# --------------------------
def _display(df_like, title=None):
    try:
        from IPython.display import display
        if title: print(title)
        display(df_like)
    except Exception:
        if title: print(title)
        print(df_like.to_string(index=False))

# Per-experiment sorted tables
print("\nPer-experiment MLAE (sorted ascending) — you will see one table per (Task, Experiment):")
for (task, exp), g in df.groupby(["Task", "Experiment"]):
    _display(g.sort_values("MLAE"), title=f"\n{task} / {exp}")

# Best model per experiment
best_per_exp = (df.sort_values(["Task","Experiment","MLAE"])
                  .groupby(["Task","Experiment"], as_index=False)
                  .first())
_display(best_per_exp, title="\nBest model per (Task, Experiment):")

# Per-task average MLAE per model
avg_per_task_model = (df.groupby(["Task","Model"], as_index=False)["MLAE"]
                        .mean()
                        .sort_values(["Task","MLAE"]))
_display(avg_per_task_model, title="\nAverage MLAE per Task × Model:")

# Overall average MLAE per model
overall_avg = (df.groupby("Model", as_index=False)["MLAE"]
                 .mean()
                 .sort_values("MLAE"))
_display(overall_avg, title="\nOverall average MLAE per Model (lower is better):")

print("\nDone.")


Using device: cuda

=== TASK: Elementary Perceptual Task ===
 -> Experiment: position_common_scale
Done 0.015848636627197266 seconds ( 200 iterations)
 -> Experiment: position_non_aligned_scale
Done 0.014545202255249023 seconds ( 200 iterations)
 -> Experiment: length
Done 0.014526605606079102 seconds ( 200 iterations)
 -> Experiment: direction
Done 0.020931482315063477 seconds ( 200 iterations)
 -> Experiment: angle
Done 0.021263837814331055 seconds ( 200 iterations)
 -> Experiment: area
Done 0.01786017417907715 seconds ( 200 iterations)
 -> Experiment: volume
Done 0.027119159698486328 seconds ( 200 iterations)
 -> Experiment: curvature
Done 0.02428722381591797 seconds ( 200 iterations)
 -> Experiment: shading
Done 0.11912345886230469 seconds ( 200 iterations)

=== TASK: Position-Length ===
 -> Experiment: data_to_type1
Done 0.0539395809173584 seconds ( 200 iterations)
 -> Experiment: data_to_type2
Done 0.04839468002319336 seconds ( 200 iterations)
 -> Experiment: data_to_type3
Done 0

Unnamed: 0,Task,Experiment,Model,MLAE
51,Bar and Framed Rectangle,data_to_bars,CVT,4.652828
52,Bar and Framed Rectangle,data_to_bars,SWIN,4.739925
53,Bar and Framed Rectangle,data_to_bars,VIT,5.333613



Bar and Framed Rectangle / data_to_framed_rectangles


Unnamed: 0,Task,Experiment,Model,MLAE
54,Bar and Framed Rectangle,data_to_framed_rectangles,CVT,4.789211
55,Bar and Framed Rectangle,data_to_framed_rectangles,SWIN,4.796795
56,Bar and Framed Rectangle,data_to_framed_rectangles,VIT,5.410265



Elementary Perceptual Task / angle


Unnamed: 0,Task,Experiment,Model,MLAE
13,Elementary Perceptual Task,angle,SWIN,1.786924
12,Elementary Perceptual Task,angle,CVT,4.282675
14,Elementary Perceptual Task,angle,VIT,4.713343



Elementary Perceptual Task / area


Unnamed: 0,Task,Experiment,Model,MLAE
16,Elementary Perceptual Task,area,SWIN,1.473262
17,Elementary Perceptual Task,area,VIT,4.184677
15,Elementary Perceptual Task,area,CVT,5.248511



Elementary Perceptual Task / curvature


Unnamed: 0,Task,Experiment,Model,MLAE
23,Elementary Perceptual Task,curvature,VIT,1.373911
22,Elementary Perceptual Task,curvature,SWIN,1.555559
21,Elementary Perceptual Task,curvature,CVT,4.048512



Elementary Perceptual Task / direction


Unnamed: 0,Task,Experiment,Model,MLAE
10,Elementary Perceptual Task,direction,SWIN,0.660959
11,Elementary Perceptual Task,direction,VIT,1.947473
9,Elementary Perceptual Task,direction,CVT,4.455319



Elementary Perceptual Task / length


Unnamed: 0,Task,Experiment,Model,MLAE
8,Elementary Perceptual Task,length,VIT,0.603872
7,Elementary Perceptual Task,length,SWIN,2.393634
6,Elementary Perceptual Task,length,CVT,3.599109



Elementary Perceptual Task / position_common_scale


Unnamed: 0,Task,Experiment,Model,MLAE
1,Elementary Perceptual Task,position_common_scale,SWIN,2.963425
2,Elementary Perceptual Task,position_common_scale,VIT,3.351088
0,Elementary Perceptual Task,position_common_scale,CVT,4.591968



Elementary Perceptual Task / position_non_aligned_scale


Unnamed: 0,Task,Experiment,Model,MLAE
5,Elementary Perceptual Task,position_non_aligned_scale,VIT,2.512811
4,Elementary Perceptual Task,position_non_aligned_scale,SWIN,4.244434
3,Elementary Perceptual Task,position_non_aligned_scale,CVT,4.821629



Elementary Perceptual Task / shading


Unnamed: 0,Task,Experiment,Model,MLAE
25,Elementary Perceptual Task,shading,SWIN,0.649289
26,Elementary Perceptual Task,shading,VIT,2.877321
24,Elementary Perceptual Task,shading,CVT,4.656249



Elementary Perceptual Task / volume


Unnamed: 0,Task,Experiment,Model,MLAE
19,Elementary Perceptual Task,volume,SWIN,1.353128
20,Elementary Perceptual Task,volume,VIT,2.428358
18,Elementary Perceptual Task,volume,CVT,4.267354



Position-Angle / data_to_barchart


Unnamed: 0,Task,Experiment,Model,MLAE
43,Position-Angle,data_to_barchart,SWIN,4.20557
42,Position-Angle,data_to_barchart,CVT,4.780117
44,Position-Angle,data_to_barchart,VIT,5.566006



Position-Angle / data_to_piechart


Unnamed: 0,Task,Experiment,Model,MLAE
46,Position-Angle,data_to_piechart,SWIN,4.238802
45,Position-Angle,data_to_piechart,CVT,4.475542
47,Position-Angle,data_to_piechart,VIT,5.381906



Position-Angle / data_to_piechart_aa


Unnamed: 0,Task,Experiment,Model,MLAE
49,Position-Angle,data_to_piechart_aa,SWIN,4.239131
48,Position-Angle,data_to_piechart_aa,CVT,4.666682
50,Position-Angle,data_to_piechart_aa,VIT,5.442793



Position-Length / data_to_type1


Unnamed: 0,Task,Experiment,Model,MLAE
28,Position-Length,data_to_type1,SWIN,4.809277
27,Position-Length,data_to_type1,CVT,4.907714
29,Position-Length,data_to_type1,VIT,4.987942



Position-Length / data_to_type2


Unnamed: 0,Task,Experiment,Model,MLAE
31,Position-Length,data_to_type2,SWIN,4.732032
30,Position-Length,data_to_type2,CVT,4.902887
32,Position-Length,data_to_type2,VIT,4.92666



Position-Length / data_to_type3


Unnamed: 0,Task,Experiment,Model,MLAE
34,Position-Length,data_to_type3,SWIN,4.640997
33,Position-Length,data_to_type3,CVT,4.759915
35,Position-Length,data_to_type3,VIT,4.860928



Position-Length / data_to_type4


Unnamed: 0,Task,Experiment,Model,MLAE
38,Position-Length,data_to_type4,VIT,4.694645
37,Position-Length,data_to_type4,SWIN,4.758009
36,Position-Length,data_to_type4,CVT,4.875063



Position-Length / data_to_type5


Unnamed: 0,Task,Experiment,Model,MLAE
40,Position-Length,data_to_type5,SWIN,4.687857
39,Position-Length,data_to_type5,CVT,4.819587
41,Position-Length,data_to_type5,VIT,4.905119



Webers Law / base10


Unnamed: 0,Task,Experiment,Model,MLAE
58,Webers Law,base10,SWIN,5.267455
59,Webers Law,base10,VIT,7.849272
57,Webers Law,base10,CVT,



Webers Law / base100


Unnamed: 0,Task,Experiment,Model,MLAE
62,Webers Law,base100,VIT,7.635453
61,Webers Law,base100,SWIN,8.449876
60,Webers Law,base100,CVT,



Webers Law / base1000


Unnamed: 0,Task,Experiment,Model,MLAE
65,Webers Law,base1000,VIT,4.842514
64,Webers Law,base1000,SWIN,5.527855
63,Webers Law,base1000,CVT,6.165205



Best model per (Task, Experiment):


Unnamed: 0,Task,Experiment,Model,MLAE
0,Bar and Framed Rectangle,data_to_bars,CVT,4.652828
1,Bar and Framed Rectangle,data_to_framed_rectangles,CVT,4.789211
2,Elementary Perceptual Task,angle,SWIN,1.786924
3,Elementary Perceptual Task,area,SWIN,1.473262
4,Elementary Perceptual Task,curvature,VIT,1.373911
5,Elementary Perceptual Task,direction,SWIN,0.660959
6,Elementary Perceptual Task,length,VIT,0.603872
7,Elementary Perceptual Task,position_common_scale,SWIN,2.963425
8,Elementary Perceptual Task,position_non_aligned_scale,VIT,2.512811
9,Elementary Perceptual Task,shading,SWIN,0.649289



Average MLAE per Task × Model:


Unnamed: 0,Task,Model,MLAE
0,Bar and Framed Rectangle,CVT,4.72102
1,Bar and Framed Rectangle,SWIN,4.76836
2,Bar and Framed Rectangle,VIT,5.371939
4,Elementary Perceptual Task,SWIN,1.897846
5,Elementary Perceptual Task,VIT,2.665873
3,Elementary Perceptual Task,CVT,4.441258
7,Position-Angle,SWIN,4.227834
6,Position-Angle,CVT,4.64078
8,Position-Angle,VIT,5.463568
10,Position-Length,SWIN,4.725634



Overall average MLAE per Model (lower is better):


Unnamed: 0,Model,MLAE
1,SWIN,3.735191
2,VIT,4.355908
0,CVT,4.688304



Done.
