In [24]:
# We'll start with our library imports...
from __future__ import print_function

import numpy as np                 # to use numpy arrays
import tensorflow as tf            # to specify and run computation graphs
import tensorflow_datasets as tfds # to load training data
import matplotlib.pyplot as plt    # to visualize data and draw plots
from tqdm import tqdm              # to track progress of loops

import random

In [25]:
# Prepares the CIFAR-100 test dataset for testing.
def prepare_test_cifar100(): 
    test_ds = tfds.load('cifar100', split='test')
    
    prep_test_ds = []
    for data in test_ds: 
        prep_test_ds.append(data)
        
    return prep_test_ds

In [26]:
# Prepares the balanced CIFAR-100 train dataset for training. 
# NOTE: Probably did it in the most inefficient way possible. 
def prepare_train_cifar100(): 
    # Load CIFAR-10 dataset
    train_ds, info = tfds.load('cifar100', split='train', with_info=True)

    # Get class names
    class_names = info.features['label'].names

    # Create lists to hold images and labels in order from class 0 to class 9
    ordered_images = [[] for _ in range(100)]
    ordered_labels = [[] for _ in range(100)]

    # Iterate through the dataset and sort images and labels
    for example in train_ds:
        image, label = example['image'], example['label']
        ordered_images[label].append(image)
        ordered_labels[label].append(example['label'])

    # Concatenate lists
    images = []
    labels = []
    for i in range(100):
        images.extend(ordered_images[i])
        labels.extend(ordered_labels[i])

    # Convert lists to TensorFlow tensors
    images = tf.convert_to_tensor(images)
    labels = tf.convert_to_tensor(labels)
    
    prep_train_ds = []
    for i in range(50000): 
        prep_train_ds.append({
            'id': i,
            'image': images[i],
            'label': labels[i]
        })
        
    return prep_train_ds

In [68]:
#Takes the balanced CIFAR-10 train dataset as a parameter and imbalances it 
def prepare_imb_train_cifar100(bal_dataset):
    prep_imb_train_ds = []
    balanced_dataset = bal_dataset
    
    #Remove a certain amount of data for each class in CIFAR-100. The amount of data removed is incremated by a rate of 0.01.
    for i in range(100): 
        prep_imb_train_ds.append(balanced_dataset[(500*i):(500 *(i+1))])

        number_remove = int(500 * (0.01*i))
        del prep_imb_train_ds[i][:number_remove]
    
    prep_imb_train_ds = sum(prep_imb_train_ds, [])
    
    return prep_imb_train_ds

In [69]:
def shuffle_list(lst):
    random.shuffle(lst)
    return lst

In [70]:
test_cifar100_ds = prepare_test_cifar10()
train_cifar100_ds = prepare_train_cifar100()
imb_train_cifar100_ds = prepare_imb_train_cifar100(train_cifar100_ds)

In [170]:
def add_gaussian_noise(image_tensor, mean=0.0, stddev=1.0):
    """
    Add Gaussian noise to the input image tensor.

    Args:
    - image_tensor: TensorFlow image tensor of shape (height, width, channels)
    - mean: Mean of the Gaussian noise
    - stddev: Standard deviation of the Gaussian noise

    Returns:
    - Noisy image tensor
    """
    noise = tf.random.normal(shape=tf.shape(image_tensor), mean=mean, stddev=stddev, dtype=tf.float32)
    noisy_image = tf.cast(image_tensor, dtype=tf.float32) + noise
    noisy_image = tf.clip_by_value(noisy_image, 0.0, 255.0)  # Clip values to [0, 255]
    noisy_image = tf.cast(noisy_image, dtype=tf.uint8)  # Convert back to uint8
    return noisy_image

In [279]:
def prepare_open_cifar100(imb_dataset): 
    increments = [0, 500, 995, 1485, 1970, 2450, 2925, 3395, 3860, 4320, 
                  4775, 5225, 5670, 6110, 6545, 6975, 7400, 7820, 8235, 8645, 
                  9050, 9450, 9845, 10235, 10620, 11000, 11375, 11745, 12110, 12470, 
                  12825, 13175, 13520, 13860, 14195, 14525, 14850, 15170, 15485, 15795, 
                  16100, 16400, 16695, 16985, 17270, 17550, 17825, 18095, 18360, 18620, 
                  18875, 19125, 19370, 19610, 19845, 20075, 20300, 20520, 20735, 20945, 
                  21150, 21350, 21545, 21735, 21920, 22100, 22275, 22445, 22610, 22770, 
                  22925, 23075, 23220, 23360, 23495, 23625, 23750, 23870, 23985, 24095, 
                  24200, 24300, 24395, 24485, 24570, 24650, 24725, 24795, 24860, 24920, 
                  24975, 25025, 25070, 25110, 25145, 25175, 25200, 25220, 25235, 25245, 25250]
    classes_dataset = []
    
    #Separate the dataset into ten datasets based on their class labels
    for i in range(len(increments)): 
        init = increments[i]
        
        # Get the next increment if it exists
        if i < len(increments) - 1:
            last = increments[i + 1]
        else:
            # If we're at the last increment, break the loop
            break
        
        classes_dataset.append(imb_dataset[init:last])
    
    #Add the oversampled images and pure noise images
    for class_dataset in classes_dataset: 
        num_of_oversampled = 500 - len(class_dataset)
        num_of_pure_noise = max(1, int(num_of_oversampled * 0.1))
        i = 0
        
        while(i < num_of_oversampled):
            if(i < (num_of_oversampled - num_of_pure_noise)): 
                class_dataset.append(class_dataset[0])
            else: 
                # Append pure noise image into class_dataset
                noisy_image = add_gaussian_noise(class_dataset[0]['image'])
                class_dataset.append({
                    'id': class_dataset[0]['id'],
                    'image': noisy_image,
                    'label': class_dataset[0]['label']
                })
                
            i+=1
    
    open_train_cifar100_ds = sum(classes_dataset, [])
    
    return open_train_cifar100_ds