## Connect Colab to Google Drive

In [None]:
from google.colab import drive

drive.mount('/gdrive')
%cd /gdrive/My Drive/Homework 1

Mounted at /gdrive
/gdrive/My Drive/Homework 1


In [None]:
%%writefile requirements.txt
tensorflow==2.17.0
keras==3.4.1
keras_cv

Overwriting requirements.txt


In [None]:
!pip install -r requirements.txt

Collecting tensorflow==2.17.0 (from -r requirements.txt (line 1))
  Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting keras==3.4.1 (from -r requirements.txt (line 2))
  Downloading keras-3.4.1-py3-none-any.whl.metadata (5.8 kB)
Collecting keras_cv (from -r requirements.txt (line 3))
  Downloading keras_cv-0.9.0-py3-none-any.whl.metadata (12 kB)
Collecting keras-core (from keras_cv->-r requirements.txt (line 3))
  Downloading keras_core-0.1.7-py3-none-any.whl.metadata (4.3 kB)
Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (601.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.3/601.3 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras-3.4.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras_cv-0.9.0-py3-none-any.whl (650 kB)

## Model Name

In [None]:
mark = '7'
description = 'dataAug'
version = '2'

model_name = "F_" + 'M' + mark + '_' + description + '_' + 'v' + version
print(model_name)


F_M7_dataAug_v2


## Import Libraries

In [None]:
# Set seed for reproducibility
seed = 42

# Import necessary libraries
import os
import setuptools.dist

# Set environment variables before importing modules
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np
from tqdm.auto import tqdm

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)

# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers
import keras_cv

# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Print TensorFlow version
print(tf.__version__)
print(tfk.__version__)

# Import other libraries
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import seaborn as sns
from PIL import Image
import matplotlib.gridspec as gridspec

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import albumentations as A
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

import zipfile


2.17.0
3.4.1


# Data

In [None]:
# Load data
data = np.load('./training_set_cleaned.npz')
X = data['images']
y = data['labels']

# Define the mapping for the labels
label_names = {
    0: "Basophil",
    1: "Eosinophil",
    2: "Erythroblast",
    3: "Immature granulocytes",
    4: "Lymphocyte",
    5: "Monocyte",
    6: "Neutrophil",
    7: "Platelet"
}

# Convert labels to categorical format using one-hot encoding
y = tfk.utils.to_categorical(y)


# Split the data
X_train_val, X_test, y_train_val, y_test = train_test_split(X,y, test_size=0.05, random_state=seed, stratify=y)

X_train, X_val, y_train, y_val= train_test_split(X_train_val, y_train_val, test_size=0.10, random_state=seed, stratify=y_train_val)


# Input shape for the model
input_shape = X_train.shape[1:]

# Output shape for the model
output_shape = y_train.shape[1]

del X_train_val, y_train_val, X, y


## Data Augmentation

### Support function

In [None]:
def sampler(X, y, strategy, seed):
    # Flatten the image data
    n_samples, height, width, channels = X.shape
    X_flat = X.reshape(n_samples, -1)  # Shape: (n_samples, height*width*channels)
    # Ensure labels/ are 1D
    y_flat = np.argmax(y, axis=1)  # Shape: (n_samples,)
    if strategy == 'o':
        sampler = RandomOverSampler(random_state=seed)
    elif strategy == 'u':
        sampler = RandomUnderSampler(sampling_strategy='auto', random_state=seed)
    elif strategy == 'n':
        return X, y
    else:
        raise ValueError("Invalid type. Use 'o' or 'u'.")
    X_resampled_flat, y_resampled = sampler.fit_resample(X_flat , y_flat)
    # Reshape the resampled data back to the original image dimensions
    X_resampled = X_resampled_flat.reshape(-1, height, width, channels)
    y_resampled = tfk.utils.to_categorical(y_resampled, num_classes=y.shape[1])
    return X_resampled, y_resampled

def augment_data(X,y, transform):
    dataset = {"images": tf.convert_to_tensor(X,dtype='float32'), "labels": tf.convert_to_tensor(y,dtype='float32')}
    augmented = transform(dataset)
    return augmented["images"],augmented["labels"]

def generator(X,y,transform, strategy, seed):
    Xs,ys = sampler(X, y,strategy, seed=seed)
    return augment_data(Xs, ys, transform)

def view_images(dataset):
    fig = plt.figure(figsize=(10, 10))
    for i in range(20):
        ax = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
        ax.imshow(dataset[random.randint(0, len(dataset)-1)].astype('uint8'))
    plt.show()

def cutMix_mixUp(seed):
    # Cut and mix up do be created as extra
    cutMix = keras_cv.layers.CutMix(seed=seed)
    mixUp = keras_cv.layers.MixUp(seed=seed)
    randAugment = keras_cv.layers.RandAugment(value_range=(0, 255),augmentations_per_image= 3,
                                              magnitude=0.1,magnitude_stddev=0.03, rate = 0.5,seed=seed, geometric=False)
    def apply(samples):
      samples = cutMix(samples)
      samples = mixUp(samples)
      samples = randAugment(samples)
      return samples

    return apply

def augumentation(seed):
  augMix = keras_cv.layers.AugMix(value_range=(0,255),num_chains=3,seed=seed)

  # Extra pipeline for strange augmentations
  channelShuffle = keras_cv.layers.ChannelShuffle(seed=seed)
  gridMask = keras_cv.layers.GridMask(seed=seed)
  greyScale = keras_cv.layers.Grayscale(output_channels=3)
  extraLayers = [channelShuffle,gridMask,greyScale]
  extraPipeline = keras_cv.layers.RandomAugmentationPipeline(layers=extraLayers, augmentations_per_image=1,seed=seed)

  # Take layers of random augment to reduce deformations overall
  randAugmentLayers = keras_cv.layers.RandAugment.get_standard_policy(value_range= (0,255),
                                                                      magnitude=0.5, magnitude_stddev=0.15, geometric=True, seed = seed)

  layers = randAugmentLayers + [augMix, extraPipeline]
  pipeline = keras_cv.layers.RandomAugmentationPipeline(layers=layers, augmentations_per_image=3,seed=seed)

  return pipeline

#### Training set

In [None]:
aug1 = augumentation(seed = 2)
X_t1, y_t1 = generator(X_train, y_train, aug1, 'n', seed = 2)

In [None]:
aug2 = augumentation(seed = 31)
X_t2, y_t2 = generator(X_train,y_train, aug2, 'n', seed = 31)

In [None]:
randAugmentStd = keras_cv.layers.RandAugment(value_range=(0, 255) ,seed = 37)
X_t3, y_t3 = generator(X_train, y_train, randAugmentStd, 'u', seed = 37)

In [None]:
aug3 = cutMix_mixUp(seed = 73)
X_t4, y_t4 = generator(X_train,y_train, aug3, 'n', seed = 73)

In [None]:
# concatenate X_t1 X_t2 etc
Xa_train = np.concatenate((X_train, X_t1, X_t2, X_t3, X_t4))
ya_train = np.concatenate((y_train, y_t1, y_t2, y_t3,y_t4))
np.savez('train.npz', images=Xa_train, labels=ya_train)

del X_t1, X_t2, X_t3,X_t4, y_t1, y_t2, y_t3, y_t4

#### Validation set

In [None]:
aug_val = augumentation(seed = 127)

Xt_val, yt_val = generator(X_val, y_val, aug_val,'n',seed = 127)

np.savez('val.npz', images=Xt_val, labels=yt_val)

del Xt_val, yt_val

#### Test set

In [None]:
randAugmentStd = keras_cv.layers.RandAugment(value_range=(0, 255),seed=179)
Xt_test, yt_test = generator(X_test, y_test, randAugmentStd,'n',seed = 179)

np.savez('test.npz', images=Xt_test, labels=yt_test)

del Xt_test, yt_test

In [None]:
import zipfile

# Zip the .npz files
files_to_zip = ['train.npz', 'val.npz', 'test.npz']
output_zip_file = 'datasets.zip'

with zipfile.ZipFile(output_zip_file, 'w') as zipf:
    for file in files_to_zip:
        zipf.write(file)
print(f"Created zip archive: {output_zip_file}")

Created zip archive: datasets.zip
