In [1]:
import numpy as np
import pandas as pd
import helper

from cnn_model import cnn_model
from loss_functions import symmetric_cross_entropy
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split


In [23]:
RANDOM_SEED = 42

def train_model(X_train, y_train, X_val, y_val, dataset, method="fc", transition_matrix=None, epochs=50, input_shape=(28, 28, 1), num_classes=3):
    
    model = cnn_model(input_shape=input_shape, num_classes=num_classes)

    if method == "sce":
        if dataset == "FashionMNIST0.3":
            alpha = 0.01
            beta = 1
        elif dataset == "FashionMNIST0.6":
            alpha = 0.01
            beta = 1
        elif dataset == "CIFAR":
            alpha = 0.1
            beta = 1
        A=-4.0
        loss_function = symmetric_cross_entropy(alpha=alpha, beta=beta, A=A, num_classes=num_classes)
    # elif method == "forward":
    #     #forward function
    # elif method == "coteaching":
    #     #coteaching function

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss = loss_function,
        metrics=['accuracy']
    )

    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=128,
        callbacks=[early_stopping],
        verbose=0
    )

    return model

def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test, verbose=0)
    predicted_classes = np.argmax(predictions, axis=1)
    accuracy = np.mean(predicted_classes == y_test) * 100
    return accuracy

def run_single_experiment(Xtr, Str, Xts, Yts, T, dataset, method, num_runs=10, epochs=50):
    Xtr = Xtr.astype('float32') / 255.0
    Xts = Xts.astype('float32') / 255.0
    input_shape = Xtr.shape[1:] 
    
    if method == 'fc':
        if T is not None:
            transition_matrix = T
        else:
            #call estimate T function here
            pass
    else:
        transition_matrix=None

    accuracies = []

    for run in range(num_runs):
        seed = RANDOM_SEED + run

        X_train, y_train, X_val, y_val = helper.split_data(
            Xtr, Str, train_ratio=0.8, random_seed=seed
        )

        model = train_model(X_train, y_train, X_val, y_val, dataset=dataset, method=method, transition_matrix=transition_matrix, epochs=epochs, input_shape=input_shape, num_classes=3)

        accuracy = evaluate_model(model, Xts, Yts)
        accuracies.append(accuracy)

        print(f"Run {run+1}/{num_runs}: Test Accuracy = {accuracy:.2f}%")

        del model
        tf.keras.backend.clear_session()
    
    return accuracies
    
def run_all_experiments(datasets, methods, num_runs=10, epochs=50):
    results = []
    
    for dataset in datasets:
        for method in methods:
            print(f"Running {method.upper()} on {dataset}...")

            data_path = f'datasets/{dataset}.npz'

            Xtr, Str, Xts, Yts, T = helper.load_dataset(data_path, dataset) 
            accuracies = run_single_experiment(
                Xtr, Str, Xts, Yts, T, dataset, method, num_runs, epochs
            )
            mean_acc = np.mean(accuracies)
            std_acc = np.std(accuracies)

            results.append({
                'Dataset': dataset,
                'Method': method.upper(),
                'Mean': mean_acc,
                'Std': std_acc,
                'Result': f"{mean_acc:.2f} ± {std_acc:.2f}"
            })

            print(f"Result: {mean_acc:.2f} ± {std_acc:.2f}%")
    
    results_df = pd.DataFrame(results)
    
    return results_df

In [24]:
datasets = ['FashionMNIST0.3', 'FashionMNIST0.6', 'CIFAR']
methods = ['sce'] #add more methods here

result = run_all_experiments(datasets, methods, 10, 50)

Running SCE on FashionMNIST0.3...
Run 1/10: Test Accuracy = 98.53%
Run 2/10: Test Accuracy = 98.67%
Run 3/10: Test Accuracy = 98.37%
Run 4/10: Test Accuracy = 98.77%
Run 5/10: Test Accuracy = 98.80%
Run 6/10: Test Accuracy = 98.53%
Run 7/10: Test Accuracy = 98.37%
Run 8/10: Test Accuracy = 98.63%
Run 9/10: Test Accuracy = 98.73%
Run 10/10: Test Accuracy = 98.40%
Result: 98.58 ± 0.16%
Running SCE on FashionMNIST0.6...
Run 1/10: Test Accuracy = 96.13%
Run 2/10: Test Accuracy = 96.27%
Run 3/10: Test Accuracy = 96.03%
Run 4/10: Test Accuracy = 96.13%
Run 5/10: Test Accuracy = 96.43%
Run 6/10: Test Accuracy = 94.97%
Run 7/10: Test Accuracy = 96.83%
Run 8/10: Test Accuracy = 95.50%
Run 9/10: Test Accuracy = 95.17%
Run 10/10: Test Accuracy = 94.87%
Result: 95.83 ± 0.63%
Running SCE on CIFAR...
Run 1/10: Test Accuracy = 67.87%
Run 2/10: Test Accuracy = 67.60%
Run 3/10: Test Accuracy = 64.07%
Run 4/10: Test Accuracy = 62.67%
Run 5/10: Test Accuracy = 64.67%
Run 6/10: Test Accuracy = 68.50%
Run 

In [26]:
pivot_df = result.pivot(index='Dataset', columns='Method', values='Result')
    
print(pivot_df)

Method                    SCE
Dataset                      
CIFAR            65.57 ± 3.22
FashionMNIST0.3  98.58 ± 0.16
FashionMNIST0.6  95.83 ± 0.63
