In [1]:
import numpy as np
import pandas as pd
from helper import (
    load_dataset,
    split_data,
    run_single_experiment,
    run_all_experiments,
    tune_hyperparams
)

from cnn_model import cnn_model
from loss_functions import (
    symmetric_cross_entropy,
    forward_correction_loss,
    CoTeachingProxyLoss,
    RememberRateScheduler,
    _infer_noise_rate_from_name, 
)
from anchor_estimator import (
    estimate_T_for_dataset
)
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

import importlib


In [8]:
import anchor_estimator


importlib.reload(anchor_estimator)
T_estimated = anchor_estimator.estimate_T("./datasets/FashionMNIST0.3.npz", "FashionMNIST0.3", 10)

Processing: FashionMNIST0.3
Epoch 1/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - accuracy: 0.6303 - loss: 0.7947 - val_accuracy: 0.6703 - val_loss: 0.6909
Epoch 2/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.6785 - loss: 0.6859 - val_accuracy: 0.6797 - val_loss: 0.6713
Epoch 3/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.6870 - loss: 0.6646 - val_accuracy: 0.6825 - val_loss: 0.6615
Epoch 4/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.6890 - loss: 0.6555 - val_accuracy: 0.6833 - val_loss: 0.6573
Epoch 5/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.6903 - loss: 0.6512 - val_accuracy: 0.6839 - val_loss: 0.6544
Epoch 6/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.6909 - loss: 0.6434 - val_accuracy: 0.6856 - val_loss: 0

In [9]:
T_estimated = anchor_estimator.estimate_T("./datasets/FashionMNIST0.6.npz", "FashionMNIST0.6", 10)

Processing: FashionMNIST0.6
Epoch 1/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.3447 - loss: 1.0999 - val_accuracy: 0.3547 - val_loss: 1.0972
Epoch 2/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.3623 - loss: 1.0964 - val_accuracy: 0.3731 - val_loss: 1.0967
Epoch 3/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.3722 - loss: 1.0954 - val_accuracy: 0.3775 - val_loss: 1.0964
Epoch 4/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.3840 - loss: 1.0941 - val_accuracy: 0.3833 - val_loss: 1.0954
Epoch 5/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.3823 - loss: 1.0937 - val_accuracy: 0.3847 - val_loss: 1.0941
Epoch 6/10
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.3819 - loss: 1.0937 - val_accuracy: 0.3822 - val_loss: 1

In [2]:
datasets = ['FashionMNIST0.3', 'FashionMNIST0.6', 'CIFAR']
methods  = ['sce']

# One-time tuning per dataset × method
tuned = {}  # key: (dataset, method) -> param dict
for dataset in datasets:
    data_path = f'datasets/{dataset}.npz'
    Xtr, Str, Xts, Yts, T = load_dataset(data_path, dataset)
    input_shape = Xtr.shape[1:]

    for method in methods:
        print(f"\n=== Tuning {method.upper()} on {dataset} ===")
        best_params = tune_hyperparams(
            Xtr, Str, dataset, method, input_shape,
            n_dev_runs=3, epochs=30  # lightweight dev budget
        )
        tuned[(dataset, method)] = best_params


=== Tuning SCE on FashionMNIST0.3 ===


2025-11-04 18:26:54.228285: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-11-04 18:26:54.228309: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-11-04 18:26:54.228313: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1762241214.228328 16171413 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1762241214.228351 16171413 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-11-04 18:26:54.594201: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6811
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6807
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -4.0, 'lr': 0.001} -> val_acc=0.6808
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6819
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6805
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -4.0, 'lr': 0.001} -> val_acc=0.6738
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.05, 'beta': 0.5, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6776
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.05, 'beta': 0.5, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6806
[TUNE] dataset=FashionMNIST0.3 method=sc

In [3]:
tuned

{('FashionMNIST0.3', 'sce'): {'alpha': 0.01,
  'beta': 1.0,
  'A': -1.0,
  'lr': 0.001},
 ('FashionMNIST0.6', 'sce'): {'alpha': 0.01,
  'beta': 0.5,
  'A': -4.0,
  'lr': 0.001},
 ('CIFAR', 'sce'): {'alpha': 0.05, 'beta': 1.0, 'A': -4.0, 'lr': 0.001}}

In [3]:
datasets = ['FashionMNIST0.3', 'FashionMNIST0.6', 'CIFAR']
methods = ['coteaching'] #add more methods here

result = run_all_experiments(datasets, methods, 10, 50)

Running COTEACHING on FashionMNIST0.3...


2025-11-04 14:21:45.040511: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-11-04 14:21:45.040543: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-11-04 14:21:45.040548: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1762226505.040563 15883597 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1762226505.040585 15883597 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-11-04 14:21:45.425595: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Run 1/10: Test Accuracy = 94.13%
Run 2/10: Test Accuracy = 95.37%
Run 3/10: Test Accuracy = 96.80%
Run 4/10: Test Accuracy = 94.33%
Run 5/10: Test Accuracy = 94.10%
Run 6/10: Test Accuracy = 96.27%
Run 7/10: Test Accuracy = 92.80%
Run 8/10: Test Accuracy = 97.37%
Run 9/10: Test Accuracy = 94.80%
Run 10/10: Test Accuracy = 95.87%
Result: 95.18 ± 1.34%
Running COTEACHING on FashionMNIST0.6...
Run 1/10: Test Accuracy = 95.50%
Run 2/10: Test Accuracy = 92.43%
Run 3/10: Test Accuracy = 95.23%
Run 4/10: Test Accuracy = 89.90%
Run 5/10: Test Accuracy = 92.63%
Run 6/10: Test Accuracy = 94.80%
Run 7/10: Test Accuracy = 92.87%
Run 8/10: Test Accuracy = 94.33%
Run 9/10: Test Accuracy = 94.50%
Run 10/10: Test Accuracy = 96.50%
Result: 93.87 ± 1.82%
Running COTEACHING on CIFAR...
Run 1/10: Test Accuracy = 63.30%
Run 2/10: Test Accuracy = 63.87%
Run 3/10: Test Accuracy = 58.07%
Run 4/10: Test Accuracy = 61.03%
Run 5/10: Test Accuracy = 63.33%
Run 6/10: Test Accuracy = 58.83%
Run 7/10: Test Accuracy 

In [26]:
pivot_df = result.pivot(index='Dataset', columns='Method', values='Result')
    
print(pivot_df)

Method                    SCE
Dataset                      
CIFAR            65.57 ± 3.22
FashionMNIST0.3  98.58 ± 0.16
FashionMNIST0.6  95.83 ± 0.63
