In [1]:
# We'll start with our library imports...
from __future__ import print_function

import numpy as np                 # to use numpy arrays
import tensorflow as tf            # to specify and run computation graphs
import tensorflow_datasets as tfds # to load training data
import matplotlib.pyplot as plt    # to visualize data and draw plots
from tqdm import tqdm              # to track progress of loops

import random

In [2]:
# Prepares the CIFAR-10 test dataset for testing.
def prepare_test_cifar10(): 
    test_ds = tfds.load('cifar10', split='test')
    
    prep_test_ds = []
    for data in test_ds: 
        prep_test_ds.append(data)
        
    return prep_test_ds

In [3]:
# Prepares the balanced CIFAR-10 train dataset for training. 
# NOTE: Probably did it in the most inefficient way possible. 
def prepare_train_cifar10(): 
    # Load CIFAR-10 dataset
    train_ds, info = tfds.load('cifar10', split='train', with_info=True)

    # Get class names
    class_names = info.features['label'].names

    # Create lists to hold images and labels in order from class 0 to class 9
    ordered_images = [[] for _ in range(10)]
    ordered_labels = [[] for _ in range(10)]

    # Iterate through the dataset and sort images and labels
    for example in train_ds:
        image, label = example['image'], example['label']
        ordered_images[label].append(image)
        ordered_labels[label].append(example['label'])

    # Concatenate lists
    images = []
    labels = []
    for i in range(10):
        images.extend(ordered_images[i])
        labels.extend(ordered_labels[i])

    # Convert lists to TensorFlow tensors
    images = tf.convert_to_tensor(images)
    labels = tf.convert_to_tensor(labels)
    
    prep_train_ds = []
    for i in range(50000): 
        prep_train_ds.append({
            'id': i,
            'image': images[i],
            'label': labels[i]
        })
        
    return prep_train_ds

In [4]:
#Takes the balanced CIFAR-10 train dataset as a parameter and imbalances it 
def prepare_imb_train_cifar10(bal_dataset):
    prep_imb_train_ds = []
    balanced_dataset = bal_dataset
    
    #Remove a certain amount of data for each class in CIFAR-10. The amount of data removed is incremated by a rate of 0.1.
    for i in range(10): 
        prep_imb_train_ds.append(balanced_dataset[(5000*i):(5000 *(i+1))])

        number_remove = int(5000 * (0.1*i))
        del prep_imb_train_ds[i][:number_remove]
    
    prep_imb_train_ds = sum(prep_imb_train_ds, [])
    
    return prep_imb_train_ds

In [5]:
def shuffle_list(lst):
    random.shuffle(lst)
    return lst

In [26]:
test_cifar10_ds = prepare_test_cifar10()
train_cifar10_ds = prepare_train_cifar10()
imb_train_cifar10_ds = prepare_imb_train_cifar10(train_cifar10_ds)

In [27]:
def add_gaussian_noise(image_tensor, mean=0.0, stddev=1.0):
    """
    Add Gaussian noise to the input image tensor.

    Args:
    - image_tensor: TensorFlow image tensor of shape (height, width, channels)
    - mean: Mean of the Gaussian noise
    - stddev: Standard deviation of the Gaussian noise

    Returns:
    - Noisy image tensor
    """
    noise = tf.random.normal(shape=tf.shape(image_tensor), mean=mean, stddev=stddev, dtype=tf.float32)
    noisy_image = tf.cast(image_tensor, dtype=tf.float32) + noise
    noisy_image = tf.clip_by_value(noisy_image, 0.0, 255.0)  # Clip values to [0, 255]
    noisy_image = tf.cast(noisy_image, dtype=tf.uint8)  # Convert back to uint8
    return noisy_image

In [28]:
def prepare_open_cifar10(imb_dataset): 
    increments = [0, 5000, 9500, 13500, 17000, 20000, 22500, 24500, 26000, 27000, 27500]
    classes_dataset = []
    
    #Separate the dataset into ten datasets based on their class labels
    for i in range(len(increments)): 
        init = increments[i]
        
        # Get the next increment if it exists
        if i < len(increments) - 1:
            last = increments[i + 1]
        else:
            # If we're at the last increment, break the loop
            break
        
        classes_dataset.append(imb_dataset[init:last])
    
    #Add the oversampled images and pure noise images
    for class_dataset in classes_dataset: 
        num_of_oversampled = 5000 - len(class_dataset)
        num_of_pure_noise = int(num_of_oversampled * 0.1)
        i = 0
        
        while(i < num_of_oversampled):
            if(i < (num_of_oversampled - num_of_pure_noise)): 
                class_dataset.append(class_dataset[0])
            else: 
                # Append pure noise image into class_dataset
                noisy_image = add_gaussian_noise(class_dataset[0]['image'])
                class_dataset.append({
                    'id': class_dataset[0]['id'],
                    'image': noisy_image,
                    'label': class_dataset[0]['label']
                })
                
            i+=1
    
    open_train_cifar10_ds = sum(classes_dataset, [])
    
    return open_train_cifar10_ds

In [29]:
######### MODEL ARCH #########
hidden_1 = tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=tf.nn.relu)
hidden_2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='same', activation=tf.nn.relu)
pool_1 = tf.keras.layers.MaxPool2D(padding='same')
hidden_3 = tf.keras.layers.Conv2D(filters=128, kernel_size=3, padding='same', activation=tf.nn.relu)
hidden_4 = tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same', activation=tf.nn.relu)
pool_2 = tf.keras.layers.MaxPool2D(padding='same')
flatten = tf.keras.layers.Flatten()
output = tf.keras.layers.Dense(10)
conv_classifier = tf.keras.Sequential([hidden_1, hidden_2, pool_1, hidden_3, hidden_4, pool_2, flatten, output])


optimizer = tf.keras.optimizers.Adam()

In [30]:
open_train_cifar10_ds = prepare_open_cifar10(imb_train_cifar10_ds)

def shuffle_test_train(): 
    shuffle_list(test_cifar10_ds)
    shuffle_list(train_cifar10_ds)
    shuffle_list(imb_train_cifar10_ds)
    shuffle_list(open_train_cifar10_ds)

In [31]:
def conv_model_train(dataset):
    loss_values = []
    accuracy_values = []
    # Early Stopping Parameters
    early_stopping_rounds = 200
    best_loss = float('inf')
    counter = 0
    best_accuracy = float('inf')

    # shuffle_test_train()
    shuffle_list(dataset)

    for batch in tqdm(dataset):
        with tf.GradientTape() as tape:
            # run network
            image = tf.cast(tf.expand_dims(batch['image'], axis=0), tf.float32)
            labels = batch['label']
            logits = conv_classifier(image)

            # calculate loss
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tf.squeeze(logits, axis=0), labels=labels)
        loss_values.append(loss)

        # gradient update
        grads = tape.gradient(loss, conv_classifier.trainable_variables)
        optimizer.apply_gradients(zip(grads, conv_classifier.trainable_variables))

        # calculate accuracy
        predictions = tf.argmax(logits, axis=1)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
        accuracy_values.append(accuracy)

    # accuracy
    print("Accuracy During Training:", np.mean(accuracy_values))

In [32]:
def conv_model_test(dataset): 
    loss_values_t = []
    accuracy_values_t = []
    # Early Stopping Parameters
    early_stopping_rounds = 5
    best_loss = float('inf')
    counter = 0
    best_accuracy = float('inf')
    
    shuffle_list(dataset)

    for batch in tqdm(dataset):
        with tf.GradientTape() as tape:
            # run network
            image = tf.cast(tf.expand_dims(batch['image'], axis=0), tf.float32)
            labels = batch['label']
            logits = conv_classifier(image)

            # print(tf.squeeze(logits, axis=0))
            # print(labels)

            # calculate loss
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tf.squeeze(logits, axis=0), labels=labels)
        loss_values_t.append(loss)

        # calculate accuracy
        predictions = tf.argmax(logits, axis=1)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
        accuracy_values_t.append(accuracy)

    # accuracy
    # print(accuracy_values_t)
    print("Accuracy During Training:", np.mean(accuracy_values_t))

In [33]:
'''
With train_cifar10_ds 
'''

'\nWith train_cifar10_ds \n'

In [42]:
conv_model_train(train_cifar10_ds)

100%|██████████| 50000/50000 [04:12<00:00, 198.07it/s]


Accuracy During Training: 0.28924


In [43]:
conv_model_test(test_cifar10_ds)

100%|██████████| 10000/10000 [00:21<00:00, 462.95it/s]


Accuracy During Training: 0.3365


In [44]:
'''
With imb_train_cifar10_ds 
'''

'\nWith imb_train_cifar10_ds \n'

In [45]:
conv_model_train(imb_train_cifar10_ds)

100%|██████████| 27500/27500 [02:18<00:00, 199.06it/s]


Accuracy During Training: 0.37832728


In [46]:
conv_model_test(test_cifar10_ds)

100%|██████████| 10000/10000 [00:21<00:00, 470.91it/s]


Accuracy During Training: 0.2775


In [47]:
'''
With open_train_cifar10_ds 
'''

'\nWith open_train_cifar10_ds \n'

In [48]:
conv_model_train(open_train_cifar10_ds)

100%|██████████| 50000/50000 [04:06<00:00, 203.09it/s]


Accuracy During Training: 0.64746


In [51]:
conv_model_test(test_cifar10_ds)

100%|██████████| 10000/10000 [00:21<00:00, 474.70it/s]


Accuracy During Training: 0.2612
