In [None]:
from google.colab import drive

drive.mount('/gdrive')
%cd /gdrive/My Drive/[2024-2025]AN2DL/

In [None]:
# Set seed for reproducibility
seed = 42

# Import necessary libraries
import os
import json

# Set environment variables before importing modules
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)


import tensorflow as tf
#from tensorflow import keras as tfk
import keras as tfk       #notice how I'm importing keras and not tensorflow.keras
from keras.layers import Input, Dense, Dropout, Lambda
#from tensorflow.keras.layers import Input, Dense, Dropout, Lambda
from keras import layers as tfkl


print(f"Tensorflow version -> {tf.__version__}")
print(f"Keras version -> {tfk.__version__}")
# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Print TensorFlow version
print(tf.__version__)

# Import other libraries
import requests
from io import BytesIO
import cv2
from PIL import Image
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import seaborn as sns

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

In [None]:
import numpy as np
data = np.load('training_set_cleaned.npz')

# Code for exploring npz content
print(data.files)
for key in data.files:
    array = data[key]
    print(f"Array '{key}':")
    print(f"  Shape: {array.shape}")
    print(f"  Data Type: {array.dtype}")

#arrays
X = data['images']
y = data['labels']

print(X.shape)
print(y.shape)

# Define a mapping of labels to their corresponding cell type names
labels = {
    0: 'Basophil',
    1: 'Eosinophil',
    2: 'Erythroblast',
    3: 'Immature granulocytes',
    4: 'Lymphocyte',
    5: 'Monocyte',
    6: 'Neutrophil',
    7: 'Platelet'
}
# Save unique labels
unique_labels = list(labels.values())


In [None]:
# Split data into training and validation sets, maintaining class distribution
X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.1,
    random_state=seed,
    stratify=y
)
# Print the shapes of the resulting datasets
print("Training Data Shape:", X_train.shape)
print("Training Label Shape:", y_train.shape)
print("Validation Data Shape:", X_val.shape)
print("Validation Label Shape:", y_val.shape)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def display_random_samples(X, y, grid_size=(10, 10), title="Random Samples"):
    """
    Displays random samples from the dataset in a grid.

    Parameters:
        X (numpy array): Array of images (num_samples, height, width, channels).
        y (numpy array): Corresponding labels (num_samples,).
        grid_size (tuple): Grid dimensions (rows, cols). Default is (10, 10).
        title (str): Title for the entire grid.
    """
    rows, cols = grid_size
    num_samples = rows * cols

    # Randomly select indices for the samples
    random_indices = np.random.choice(X.shape[0], num_samples, replace=False)
    selected_images = X[random_indices]
    selected_labels = y[random_indices]

    # Create the plot
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 2, rows * 2))
    fig.suptitle(title, fontsize=16)

    for i, ax in enumerate(axes.flat):
        # Display the image
        ax.imshow(selected_images[i].astype('uint8'))
        ax.set_title(f"Label: {selected_labels[i]}")
        ax.axis("off")

    plt.tight_layout()
    plt.subplots_adjust(top=0.92)  # Adjust space for the title
    plt.show()

# Example usage
# Assuming X_train and y_train are NumPy arrays
# X_train: (num_samples, height, width, channels)
# y_train: (num_samples,)
display_random_samples(X_train, y_train, grid_size=(10, 10), title="Random Training Samples")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter

labels = {
    0: 'Basophil',
    1: 'Eosinophil',
    2: 'Erythroblast',
    3: 'Immature granulocytes',
    4: 'Lymphocyte',
    5: 'Monocyte',
    6: 'Neutrophil',
    7: 'Platelet'
}
# Save unique labels
unique_labels = list(labels.values())

def show_class_distribution(X, y, labels_dict=None):
    """
    Displays the class distribution in the dataset.

    Parameters:
        X (numpy array): Array of images (num_samples, height, width, channels).
        y (numpy array): Array of labels (num_samples,).
        labels_dict (dict, optional): Mapping of class indices to class names. Default is None.

    Returns:
        None
    """
    # Count occurrences of each class
    class_counts = Counter(y.flatten())
    classes = list(class_counts.keys())
    counts = list(class_counts.values())

    # Sort classes for better visualization
    sorted_indices = np.argsort(classes)
    sorted_classes = np.array(classes)[sorted_indices]
    sorted_counts = np.array(counts)[sorted_indices]

    # Display as a table
    print("Class Distribution:")
    print("-------------------")
    for cls, count in zip(sorted_classes, sorted_counts):
        label_name = labels_dict[cls] if labels_dict else cls
        print(f"Class {label_name}: {count} samples")

    # Plot bar chart
    plt.figure(figsize=(10, 6))
    plt.bar(sorted_classes, sorted_counts, tick_label=[labels_dict[cls] if labels_dict else cls for cls in sorted_classes])
    plt.xlabel("Class")
    plt.ylabel("Number of Samples")
    plt.title("Class Distribution in Training Dataset")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

show_class_distribution(X_train, y_train, labels_dict=unique_labels)

In [None]:
import os
import numpy as np

def save_processed_data(X_train, y_train, X_val, y_val, folder_path="finalDataset_partial"):
    """
    Saves processed training and validation datasets into a specified folder.

    Parameters:
        X_train (numpy array): Training data.
        y_train (numpy array): Training labels.
        X_val (numpy array): Validation data.
        y_val (numpy array): Validation labels.
        folder_path (str): Folder path to save the data.

    Returns:
        None
    """
    # Create the folder if it doesn't exist
    os.makedirs(folder_path, exist_ok=True)

    # Save the datasets
    np.save(os.path.join(folder_path, "X_train.npy"), X_train)
    np.save(os.path.join(folder_path, "y_train.npy"), y_train)
    np.save(os.path.join(folder_path, "X_val.npy"), X_val)
    np.save(os.path.join(folder_path, "y_val.npy"), y_val)

    print(f"Processed data saved in folder: {folder_path}")

save_processed_data(X_train, y_train, X_val, y_val)

In [None]:
def load_processed_data(folder_path="processed_dataMax"):
    """
    Loads processed training and validation datasets from a specified folder.

    Parameters:
        folder_path (str): Folder path from which to load the data.

    Returns:
        X_train, y_train, X_val, y_val (numpy arrays): Loaded datasets.
    """
    # Load the datasets
    X_train = np.load(os.path.join(folder_path, "X_train.npy"))
    y_train = np.load(os.path.join(folder_path, "y_train.npy"))
    X_val = np.load(os.path.join(folder_path, "X_val.npy"))
    y_val = np.load(os.path.join(folder_path, "y_val.npy"))

    print(f"Processed data loaded from folder: {folder_path}")
    return X_train, y_train, X_val, y_val


In [None]:
X_train, y_train, X_val, y_val = load_processed_data()

print("Training Data Shape:", X_train.shape)
print("Training Label Shape:", y_train.shape)
print("Validation Data Shape:", X_val.shape)
print("Validation Label Shape:", y_val.shape)


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def show_grid_of_class_samples(X, y, target_class):
    """
    Displays a 10x10 grid of samples from the specified class in the dataset, with larger images and reduced clutter.

    Parameters:
        X (numpy array): Array of images (num_samples, height, width, channels).
        y (numpy array): Array of labels (num_samples,).
        target_class (int): The class to display in the grid.

    Returns:
        None
    """
    # Filter images that belong to the specified class
    class_indices = np.where(y == target_class)[0]
    class_images = X[class_indices]

    # Ensure there are at least 100 samples for the grid
    if len(class_images) < 100:
        print(f"Not enough samples for class {target_class}. Only {len(class_images)} available.")
        return

    # Display the 10x10 grid of the first 100 samples from the specified class
    plt.figure(figsize=(15, 15))  # Increase figure size for larger images
    for i in range(100):
        plt.subplot(10, 10, i + 1)
        plt.imshow(class_images[i].astype('uint8'))
        plt.axis("off")  # Remove axes for a cleaner look

    plt.subplots_adjust(wspace=0.1, hspace=0.1)  # Reduce spacing between images
    plt.show()

# Example usage
# Assuming X_train and y_train are your dataset and labels
target_class = 0  # Replace with the class you want to display
show_grid_of_class_samples(X_train, y_train, target_class)


In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers as tfkl

def perform_cutmix_augmix(X, y, class_label, beta=0.5, augmix_alpha=1.0):
    """
    Applies CutMix between two randomly selected images of the same class, then applies AugMix to the entire image.

    Parameters:
        X (numpy array): Array of images (num_samples, height, width, channels).
        y (numpy array): Array of labels (num_samples,).
        class_label (int): The label of the class to apply CutMix and AugMix on.
        beta (float): Parameter for the Beta distribution. Controls the size of the CutMix patch.
        augmix_alpha (float): Parameter for blending the original and augmented images.

    Returns:
        final_image (numpy array): The resulting image after CutMix and AugMix.
        class_label (int): The label of the selected class (single class).
    """
    # Filter indices of images that belong to the specified class
    class_indices = np.where(y == class_label)[0]

    if len(class_indices) < 2:
        raise ValueError(f"Not enough samples for class {class_label} to apply CutMix.")

    # Randomly select two different images from the same class for CutMix
    indices = np.random.choice(class_indices, 2, replace=False)
    img1, img2 = X[indices[0]], X[indices[1]]

    # Sample lambda from a Beta distribution for CutMix
    lam = max(0.1,0.4)
    H, W, _ = img1.shape

    # Determine the CutMix patch size and position
    cut_w = int(W * np.sqrt(1 - lam))
    cut_h = int(H * np.sqrt(1 - lam))
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    # Calculate the bounding box for the CutMix patch
    x1 = np.clip(cx - cut_w // 2, 0, W)
    x2 = np.clip(cx + cut_w // 2, 0, W)
    y1 = np.clip(cy - cut_h // 2, 0, H)
    y2 = np.clip(cy + cut_h // 2, 0, H)

    # Create the CutMix image
    cutmix_image = img1.copy()
    cutmix_image[y1:y2, x1:x2, :] = img2[y1:y2, x1:x2, :]

    # Define AugMix augmentation pipeline
    augmentation_pipeline = tf.keras.Sequential([
        tfkl.RandomRotation((0.3,0.9), fill_mode='nearest'),
    ])

    # Apply AugMix to the entire CutMix image
    aug_image = augmentation_pipeline(tf.expand_dims(cutmix_image, axis=0), training=True)
    aug_image = tf.squeeze(aug_image).numpy()

    # Blend AugMix result with the CutMix image
    lam_aug = np.random.beta(augmix_alpha, augmix_alpha)
    final_image = lam_aug * cutmix_image + (1 - lam_aug) * aug_image

    # Clip values to valid range and convert to uint8
    final_image = np.clip(final_image, 0, 255).astype(np.uint8)

    return final_image, class_label



In [None]:
def augment_and_add_to_training(X_train, y_train, samples_per_class, beta=0.5, augmix_alpha=1.0):
    """
    Generates augmented samples for each class and adds them to the existing training dataset.

    Parameters:
        X_train (numpy array): Array of training images (num_samples, height, width, channels).
        y_train (numpy array): Array of training labels (num_samples,).
        samples_per_class (dict): Dictionary specifying the number of augmented samples to generate for each class.
                                  Example: {0: 100, 1: 50, ...}
        beta (float): Parameter for the Beta distribution. Controls the size of the CutMix patch.
        augmix_alpha (float): Parameter for blending the original and augmented images.

    Returns:
        X_train (numpy array): Updated training images including augmented samples.
        y_train (numpy array): Updated training labels including augmented samples.
    """


    for class_label, num_samples in samples_per_class.items():
        print(f"Generating {num_samples} augmented samples for class {class_label}...")
        augmented_X = []
        augmented_y = []
        for _ in range(num_samples):
            try:
                aug_image, aug_label = perform_cutmix_augmix(X_train, y_train, class_label, beta, augmix_alpha)
                augmented_X.append(aug_image)
                augmented_y.append(aug_label)
            except ValueError as e:
                print(f"Skipping class {class_label}: {e}")
                break
        X_train = np.concatenate([X_train, np.array(augmented_X)],axis=0)
        y_train = np.concatenate([y_train.flatten(), np.array(augmented_y)],axis=0)

    # Convert augmented data to numpy arrays
    augmented_X = np.array(augmented_X)
    augmented_y = np.array(augmented_y)



    print(f"New training set size: {X_train.shape[0]} samples.")
    return X_train, y_train


In [None]:
# Define the number of samples to generate for each class

'''
Class Basophil: 765 samples
Class Eosinophil: 1961 samples
Class Erythroblast: 976 samples
Class Immature granulocytes: 1820 samples
Class Lymphocyte: 764 samples
Class Monocyte: 893 samples
Class Neutrophil: 2097 samples
Class Platelet: 1479 samples
'''

#3147
samples_per_class = {
    0:2382,
    1:1186,
    2:2171,
    3:1327,
    4:2383,
    5:2254,
    6:1050,
    7:1668,
}

# Assuming X_train and y_train are your dataset
X_train, y_train = augment_and_add_to_training(X_train, y_train, samples_per_class, beta=0.5, augmix_alpha=0.5)

# Check the shapes of the updated dataset
print("Updated X_train shape:", X_train.shape)
print("Updated y_train shape:", y_train.shape)


In [None]:
# Define the AugMix layer
augmix_layer = keras_cv.layers.AugMix(
    value_range=(0, 255),  # Set the value range of your images
    severity=1.0,         # Increase the severity of augmentations
    num_chains=3,         # Number of augmentation chains to combine
    chain_depth=(1, 3),   # Range for the number of augmentations per chain
    alpha=1.0,            # Strength of mixing the chains
    seed=42
    # Optional random seed for reproducibility
)

aug = keras_cv.layers.RandAugment(
    value_range=(0, 255),
    augmentations_per_image=3,
    magnitude=0.5,
    magnitude_stddev=0.15,
    rate=0.9090909090909091,
    geometric=True,
    seed=seed,

)

In [None]:
def visualize_cutmix_augmix(X, y, class_label, beta=0.5, augmix_alpha=1.0):
    """
    Applies CutMix and AugMix on images of the specified class and visualizes the original images and the resulting image.

    Parameters:
        X (numpy array): Array of images (num_samples, height, width, channels).
        y (numpy array): Array of labels (num_samples,).
        class_label (int): The label of the class to apply CutMix and AugMix on.
        beta (float): Parameter for the Beta distribution. Controls the size of the CutMix patch.
        augmix_alpha (float): Parameter for blending the original and augmented images.

    Returns:
        None
    """
    # Apply CutMix and AugMix
    final_image, selected_class_label = perform_cutmix_augmix(X, y, class_label, beta, augmix_alpha)

    # Filter indices of images that belong to the specified class
    class_indices = np.where(y == class_label)[0]
    indices = np.random.choice(class_indices, 2, replace=False)
    img1, img2 = X[indices[0]], X[indices[1]]

    # Plot the original images and the final result
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    axes[0].imshow(img1.astype(np.uint8))
    axes[0].set_title(f"Image 1 (Class {class_label})")
    axes[0].axis("off")

    axes[1].imshow(img2.astype(np.uint8))
    axes[1].set_title(f"Image 2 (Class {class_label})")
    axes[1].axis("off")

    axes[2].imshow(final_image.astype(np.uint8))
    axes[2].set_title("CutMix + AugMix Result")
    axes[2].axis("off")

    plt.tight_layout()
    plt.show()
visualize_cutmix_augmix(X_train, y_train, 7, beta=0.5, augmix_alpha=1.0)