In [1]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from google.colab import drive
drive.mount('/content/drive')

# data path
base_dir = '/content/drive/MyDrive/Datasets/new_HAM_binary'

# class names
classes = ['malignant', 'benign']

# data augmentation
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

for class_name in classes:
    class_dir = os.path.join(base_dir, class_name)
    images = os.listdir(class_dir)
    num_images = len(images)

    if num_images < 4000:
        for i in range((4000 - num_images)//num_images + 1):
            for image_name in images:
                image = load_img(os.path.join(class_dir, image_name))  # PIL image
                x = img_to_array(image)  # Numpy array with shape (height, width, channels)
                x = x.reshape((1,) + x.shape)  # Numpy array with shape (1, height, width, channels)

                # the .flow() command generates batches of randomly transformed images
                # and saves the results to the `preview/` directory
                for batch in datagen.flow(x, batch_size=1, save_to_dir=class_dir, save_prefix='aug', save_format='jpeg'):
                    break  # we only want one augmented image for each original image
    print(f'Finished augmentation for class {class_name}.')


Mounted at /content/drive
Finished augmentation for class malignant.
Finished augmentation for class benign.


In [3]:
for class_name in classes:
    class_dir = os.path.join(base_dir, class_name)
    images = os.listdir(class_dir)
    num_images = len(images)
    print(num_images)

3877
4164
