In [1]:


import tensorflow as tf
from tensorflow import keras
import argparse


In [2]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/noisyloss/')




Mounted at /content/drive


In [3]:
import loss_functions

In [4]:
# crentr_loss_fn = loss_functions.crossentropy_reed_wrap(0.3)
# # Example usage:
# alpha = 0.1
# beta = 1.0
# symloss_fn = loss_functions.symmetric_cross_entropy(alpha, beta)
# lq_loss_fn = loss_functions.lq_loss_wrap(0.3)

In [5]:
# from tensorflow.keras.utils import plot_model
# from IPython.display import Image, display

# # Assuming you have created your model using the create_model function
# model = create_model()

# # Generate the plot and get the image data
# # We set to_file=None to prevent saving to a file and get bytes instead
# plot_model(model, show_shapes=True,to_file="./model_arch.jpg")


In [6]:
loss_param ='sparse_categorical_crossentropy'

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Preprocess the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:

def create_model():
  model = keras.Sequential([
      keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
      keras.layers.MaxPooling2D(pool_size=(2, 2)),
      keras.layers.Conv2D(64, (3, 3), activation='relu'),
      keras.layers.MaxPooling2D(pool_size=(2, 2)),
      keras.layers.Flatten(),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(10, activation='softmax')
  ])
  return model


## noisy labels

In [8]:
import numpy as np
NOISE_LEVEL=0.6  # what part of training labels are permuted
perm = np.array([7, 9, 0, 4, 2, 1, 3, 5, 6, 8])  # noise permutation (from Reed)

In [9]:
noise = perm[y_train]

In [None]:
# replace some of the training labels with permuted (noise) labels.
# make sure each categories receive an equal amount of noise


from sklearn.model_selection import StratifiedShuffleSplit
if NOISE_LEVEL > 0:
    _, noise_idx = next(iter(StratifiedShuffleSplit(n_splits=1,
                                                    test_size=NOISE_LEVEL,
                                                    random_state=seed).split(x_train,y_train)))
    y_train_noise = y_train.copy()
    y_train_noise[noise_idx] = noise[noise_idx]
    train_idx, val_idx = next(iter(
            StratifiedShuffleSplit(n_splits=1, test_size=0.1,
                                  random_state=seed).split(x_train, y_train_noise)))
    X_train_train = x_train[train_idx]
    y_train_correct = y_train[train_idx]
    y_train_train = y_train_noise[train_idx]
    X_train_val = x_train[val_idx]
    y_train_val = y_train_noise[val_idx]
    y_train_val_correct = y_train[val_idx]
else:
    train_idx, val_idx = next(iter(
          StratifiedShuffleSplit(n_splits=1, test_size=0.1,
                                  random_state=seed).split(x_train, y_train)))
    X_train_train = x_train[train_idx]
    y_train_train = y_train[train_idx]
    y_train_correct = y_train[train_idx]
    X_train_val = x_train[val_idx]
    y_train_val = y_train[val_idx]
    y_train_val_correct = y_train[val_idx]

In [11]:
from hyperopt import hp, fmin, tpe, Trials
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn import metrics

# Define the search space
space = {
    'learning_rate': hp.loguniform('learning_rate', np.log(0.0001), np.log(0.01)),
    'batch_size': hp.choice('batch_size', [ 128, 256, 512]),
    'agce_loss_param_a' :  hp.choice('agce_loss_param_a', [3,4,5]),
    'agce_loss_param_q' :  hp.uniform('agce_loss_param_q', 0.1, 0.5),
    'epochs': 10 # Fixed number of epochs for each trial
}

In [14]:
def objective(params):
    # Set seeds for reproducibility within each trial
    seed = 42
    tf.random.set_seed(seed)
    np.random.seed(seed)

    # Create the model with hyperparameters from the search space
    model = create_model()


    my_agce_loss = loss_functions.agce_loss_fn(num_classes=10, a=params['agce_loss_param_a'], q=params['agce_loss_param_q'], scale=1.0)
    # Compile the model
    optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
    model.compile(optimizer=optimizer,
                  loss=my_agce_loss,
                  metrics=['accuracy'])


    # Train the model
    history = model.fit(X_train_train,
                        y_train_train,
                        batch_size=int(params['batch_size']),
                        epochs=params['epochs'],
                        verbose=0, # Set verbose to 0 to reduce output during tuning
                        validation_data=(X_train_val, y_train_val_correct),
                        callbacks=[keras.callbacks.EarlyStopping(patience=4, mode='min', verbose=0)]
                        )

    # Evaluate the model on the validation set and return the loss
    val_loss = history.history['val_loss'][-1]
    val_accuracy = history.history['val_accuracy'][-1]
    return val_loss

In [None]:
# Run the optimization
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=20, # Number of trials
            trials=trials)




In [None]:

print("Best hyperparameters found:")
print(best)




best_params = {
    'learning_rate': best['learning_rate'],
    'batch_size': space['batch_size'][best['batch_size']], # Get the actual value from the choice list
    'agce_loss_param_a': space['agce_loss_param_a'][best['agce_loss_param_a']], # Get the actual value from the choice list
    'agce_loss_param_q': best['agce_loss_param_q'],
    'epochs': 10 # Use the same number of epochs as in the objective function
}



In [None]:


from keras.callbacks import EarlyStopping
from sklearn import metrics

model = create_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate'])
my_agce_loss = loss_functions.agce_loss_fn(num_classes=10, a=best_params['agce_loss_param_a'], q=best_params['agce_loss_param_q'], scale=1.0)
model.compile(optimizer=optimizer,
                    loss=my_agce_loss,
                    metrics=['accuracy'])

history = model.fit(X_train_train,
                      y_train_train,
                      batch_size=best_params["batch_size"],
                      epochs=best_params["epochs"],
                      verbose=True,
                      validation_data=(X_train_val,
                                      y_train_val_correct),
                      callbacks=
                      [EarlyStopping(patience=4,mode='min',
                                    verbose=True)]
                      )

# Evaluate the model
#loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
#y_pred = model.predict(x_test)
y_pred_probs = model.predict(x_test)  # Get predicted probabilities
y_pred = np.argmax(y_pred_probs, axis=1)  # Convert probabilities to class labels



acc = metrics.accuracy_score(y_test, y_pred)
macro_averaged_f1 = metrics.f1_score(y_test, y_pred, average = 'macro')
print("accuracy",acc)
print("macro_f1",macro_averaged_f1)


In [None]:

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

from keras.callbacks import EarlyStopping
from sklearn import metrics
import numpy as np

seeds = [42, 123, 456]
all_results = []

for seed in seeds:
    print(f"Running with seed: {seed}")
    tf.random.set_seed(seed)
    np.random.seed(seed) # Set numpy seed as well


    if NOISE_LEVEL > 0:
        _, noise_idx = next(iter(StratifiedShuffleSplit(n_splits=1,
                                                        test_size=NOISE_LEVEL,
                                                        random_state=seed).split(x_train,y_train)))
        y_train_noise = y_train.copy()
        y_train_noise[noise_idx] = noise[noise_idx]
        train_idx, val_idx = next(iter(
                StratifiedShuffleSplit(n_splits=1, test_size=0.1,
                                      random_state=seed).split(x_train, y_train_noise)))
        X_train_train = x_train[train_idx]
        y_train_correct = y_train[train_idx]
        y_train_train = y_train_noise[train_idx]
        X_train_val = x_train[val_idx]
        y_train_val = y_train_noise[val_idx]
        y_train_val_correct = y_train[val_idx]
    else:
        train_idx, val_idx = next(iter(
              StratifiedShuffleSplit(n_splits=1, test_size=0.1,
                                      random_state=seed).split(x_train, y_train)))
        X_train_train = x_train[train_idx]
        y_train_train = y_train[train_idx]
        y_train_correct = y_train[train_idx]
        X_train_val = x_train[val_idx]
        y_train_val = y_train[val_idx]
        y_train_val_correct = y_train[val_idx]

    model = create_model()
    optimizer = tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate'])
    my_agce_loss = loss_functions.agce_loss_fn(num_classes=10, a=best_params['agce_loss_param_a'], q=best_params['agce_loss_param_q'], scale=1.0)
    model.compile(optimizer=optimizer,
                        loss=my_agce_loss,
                        metrics=['accuracy'])

    history = model.fit(X_train_train,
                          y_train_train,
                          batch_size=best_params["batch_size"],
                          epochs=best_params["epochs"],
                          verbose=True,
                          validation_data=(X_train_val,
                                          y_train_val_correct),
                          callbacks=
                          [EarlyStopping(patience=4,mode='min',
                                        verbose=True)]
                          )


    # Evaluate the model
    #loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    #y_pred = model.predict(x_test)
    y_pred_probs = model.predict(x_test)  # Get predicted probabilities
    y_pred = np.argmax(y_pred_probs, axis=1)  # Convert probabilities to class labels

    acc = metrics.accuracy_score(y_test, y_pred)
    macro_averaged_f1 = metrics.f1_score(y_test, y_pred, average = 'macro')
    all_results.append(acc)
    print("accuracy",acc)
    print("macro_f1",macro_averaged_f1)


mean_accuracy = np.mean(all_results)
std_accuracy = np.std(all_results)

print(f"Mean accuracy: {mean_accuracy:.4f} +/- {std_accuracy:.4f}")