In [1]:
import numpy as np
import pandas as pd
from helper import (
    load_dataset,
    split_data,
    run_single_experiment,
    run_all_experiments,
    tune_hyperparams
)

from cnn_model import cnn_model
from loss_functions import (
    symmetric_cross_entropy,
    forward_correction_loss,
    CoTeachingProxyLoss,
    RememberRateScheduler,
    _infer_noise_rate_from_name, 
)
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split




In [None]:
# anchor_estimator and flc (still wait for check)

In [3]:
# anchor_estimator
import numpy as np
import tensorflow as tf
from tensorflow import keras

from helper import load_dataset, split_data
from cnn_model import cnn_model
from anchor_estimator import temperature_scale_probs, estimate_T_anchor_from_probs
from loss_functions import symmetric_cross_entropy
from flc_loss import forward_correction_loss


def ensure_column_stochastic(T: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    T = np.clip(T, 0, None)
    colsum = T.sum(axis=0, keepdims=True) + eps
    return T / colsum


Xtr, Str, Xts, Yts, T_true = load_dataset("./datasets/CIFAR.npz", "CIFAR.npz")

# just avoid the loss from mismatch of onehot/float 
Str = Str.astype("int64")
Yts = Yts.astype("int64")

Xtr = Xtr.astype("float32") / 255.0
Xts = Xts.astype("float32") / 255.0

X_tr, y_tr, X_val, y_val = split_data(Xtr, Str, train_ratio=0.8, random_seed=7)

num_classes = int(np.max(Str)) + 1
input_shape = Xtr.shape[1:]

# 2. Warm-up process, after estimation, the CIFAR.npz is almost same noise 0.6
alpha, beta, A = 0.05, 4.0, -4.0   
sce_loss = symmetric_cross_entropy(alpha=alpha, beta=beta, A=A, num_classes=num_classes)

m = cnn_model(input_shape=input_shape, num_classes=num_classes)
m.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
          loss=sce_loss, metrics=["accuracy"])

m.fit(X_tr, y_tr,
      validation_data=(X_val, y_val),
      epochs=5, batch_size=128, verbose=1)

# 3. use the val datasets to get the matrix
p_val = m.predict(X_val, batch_size=512, verbose=0)              
p_val_cal, bestT = temperature_scale_probs(p_val, y_val)          
T_hat = estimate_T_anchor_from_probs(p_val_cal, top_quantile=0.99)  
T_hat = ensure_column_stochastic(T_hat).astype(np.float32)

print("T_hat col-sums:", T_hat.sum(axis=0))  

2025-11-04 13:49:04.844746: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-11-04 13:49:04.844785: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-11-04 13:49:04.844791: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1762224544.844826 15808406 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1762224544.844868 15808406 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/5


2025-11-04 13:49:05.827795: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - accuracy: 0.3324 - loss: 10.7983 - val_accuracy: 0.3370 - val_loss: 10.7187
Epoch 2/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.3377 - loss: 10.7017 - val_accuracy: 0.3483 - val_loss: 10.5849
Epoch 3/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.3427 - loss: 10.5812 - val_accuracy: 0.3547 - val_loss: 10.5176
Epoch 4/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.3470 - loss: 10.6110 - val_accuracy: 0.3557 - val_loss: 10.4575
Epoch 5/5
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.3584 - loss: 10.4031 - val_accuracy: 0.3533 - val_loss: 10.4823
T_hat col-sums: [1. 1. 1.]


In [4]:
# Forward Correction fine-tuning
flc_loss = forward_correction_loss(T_hat, num_classes=num_classes)
m_flc = cnn_model(input_shape=input_shape, num_classes=num_classes)
m_flc.set_weights(m.get_weights())
m_flc.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
              loss=flc_loss, metrics=["accuracy"])

history_flc = m_flc.fit(
    X_tr, y_tr,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=128,
    verbose=1
)

test_loss, test_acc = m_flc.evaluate(Xts, Yts, verbose=0)
print(f"[FLC] Test Accuracy: {test_acc:.4f}")

Epoch 1/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.3565 - loss: 1.1619 - val_accuracy: 0.3483 - val_loss: 1.0968
Epoch 2/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.3453 - loss: 1.0975 - val_accuracy: 0.3607 - val_loss: 1.0950
Epoch 3/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.3714 - loss: 1.0937 - val_accuracy: 0.3607 - val_loss: 1.0945
Epoch 4/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.3754 - loss: 1.0934 - val_accuracy: 0.3633 - val_loss: 1.0933
Epoch 5/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.3817 - loss: 1.0921 - val_accuracy: 0.3627 - val_loss: 1.0939
Epoch 6/10
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.3723 - loss: 1.0925 - val_accuracy: 0.3550 - val_loss: 1.0947
Epoch 7/10
[1m94/94[0m [32m━━━━

In [2]:
datasets = ['FashionMNIST0.3', 'FashionMNIST0.6', 'CIFAR']
methods  = ['sce']

# One-time tuning per dataset × method
tuned = {}  # key: (dataset, method) -> param dict
for dataset in datasets:
    data_path = f'datasets/{dataset}.npz'
    Xtr, Str, Xts, Yts, T = load_dataset(data_path, dataset)
    input_shape = Xtr.shape[1:]

    for method in methods:
        print(f"\n=== Tuning {method.upper()} on {dataset} ===")
        best_params = tune_hyperparams(
            Xtr, Str, dataset, method, input_shape,
            n_dev_runs=3, epochs=30  # lightweight dev budget
        )
        tuned[(dataset, method)] = best_params


=== Tuning SCE on FashionMNIST0.3 ===


2025-11-04 18:26:54.228285: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-11-04 18:26:54.228309: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-11-04 18:26:54.228313: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1762241214.228328 16171413 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1762241214.228351 16171413 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-11-04 18:26:54.594201: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6811
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6807
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 0.5, 'A': -4.0, 'lr': 0.001} -> val_acc=0.6808
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6819
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6805
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.01, 'beta': 1.0, 'A': -4.0, 'lr': 0.001} -> val_acc=0.6738
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.05, 'beta': 0.5, 'A': -1.0, 'lr': 0.001} -> val_acc=0.6776
[TUNE] dataset=FashionMNIST0.3 method=sce params={'alpha': 0.05, 'beta': 0.5, 'A': -2.0, 'lr': 0.001} -> val_acc=0.6806
[TUNE] dataset=FashionMNIST0.3 method=sc

In [3]:
tuned

{('FashionMNIST0.3', 'sce'): {'alpha': 0.01,
  'beta': 1.0,
  'A': -1.0,
  'lr': 0.001},
 ('FashionMNIST0.6', 'sce'): {'alpha': 0.01,
  'beta': 0.5,
  'A': -4.0,
  'lr': 0.001},
 ('CIFAR', 'sce'): {'alpha': 0.05, 'beta': 1.0, 'A': -4.0, 'lr': 0.001}}

In [3]:
datasets = ['FashionMNIST0.3', 'FashionMNIST0.6', 'CIFAR']
methods = ['coteaching'] #add more methods here

result = run_all_experiments(datasets, methods, 10, 50)

Running COTEACHING on FashionMNIST0.3...


2025-11-04 14:21:45.040511: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Max
2025-11-04 14:21:45.040543: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2025-11-04 14:21:45.040548: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
I0000 00:00:1762226505.040563 15883597 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1762226505.040585 15883597 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-11-04 14:21:45.425595: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Run 1/10: Test Accuracy = 94.13%
Run 2/10: Test Accuracy = 95.37%
Run 3/10: Test Accuracy = 96.80%
Run 4/10: Test Accuracy = 94.33%
Run 5/10: Test Accuracy = 94.10%
Run 6/10: Test Accuracy = 96.27%
Run 7/10: Test Accuracy = 92.80%
Run 8/10: Test Accuracy = 97.37%
Run 9/10: Test Accuracy = 94.80%
Run 10/10: Test Accuracy = 95.87%
Result: 95.18 ± 1.34%
Running COTEACHING on FashionMNIST0.6...
Run 1/10: Test Accuracy = 95.50%
Run 2/10: Test Accuracy = 92.43%
Run 3/10: Test Accuracy = 95.23%
Run 4/10: Test Accuracy = 89.90%
Run 5/10: Test Accuracy = 92.63%
Run 6/10: Test Accuracy = 94.80%
Run 7/10: Test Accuracy = 92.87%
Run 8/10: Test Accuracy = 94.33%
Run 9/10: Test Accuracy = 94.50%
Run 10/10: Test Accuracy = 96.50%
Result: 93.87 ± 1.82%
Running COTEACHING on CIFAR...
Run 1/10: Test Accuracy = 63.30%
Run 2/10: Test Accuracy = 63.87%
Run 3/10: Test Accuracy = 58.07%
Run 4/10: Test Accuracy = 61.03%
Run 5/10: Test Accuracy = 63.33%
Run 6/10: Test Accuracy = 58.83%
Run 7/10: Test Accuracy 

In [26]:
pivot_df = result.pivot(index='Dataset', columns='Method', values='Result')
    
print(pivot_df)

Method                    SCE
Dataset                      
CIFAR            65.57 ± 3.22
FashionMNIST0.3  98.58 ± 0.16
FashionMNIST0.6  95.83 ± 0.63
