In [None]:
import os
import shutil
import tensorflow as tf
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')   

Mounted at /content/drive


In [None]:
rootdir= '/content/drive/My Drive/Colab Notebooks/dataset/colon_image_sets/' #path of the original folder
classes = ['aca', 'n']
train_dir = os.path.join(rootdir, 'train/')

# Augmenting images and storing them in temporary directories
for img_class in classes:

    # creating temporary directories
    # creating a base directory
    os.mkdir(rootdir + 'aug_dir')
    aug_dir = os.path.join(rootdir, 'aug_dir')
    # creating a subdirectory inside the base directory for images of the same class
    img_dir = os.path.join(aug_dir, 'img_dir')
    os.mkdir(img_dir)

    img_list = os.listdir(train_dir + img_class)

    # Copy images from the class train dir to the img_dir
    for file_name in img_list:
        # path of source image in training directory
        source = os.path.join(train_dir + img_class, file_name)

        # creating a target directory to send images
        target = os.path.join(img_dir, file_name)

        # copying the image from the source to target file
        shutil.copyfile(source, target)

    # Temporary augumented dataset directory.
    source_path = aug_dir

    # Augmented images will be saved to training directory
    save_path = train_dir + img_class

    # Creating Image Data Generator to augment images
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(

        rotation_range=180,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest'

    )

    batch_size = 50

    aug_datagen = datagen.flow_from_directory(source_path, save_to_dir=save_path, save_format='jpg',
                                              target_size=(768, 768), batch_size=batch_size)

    # Generate the augmented images
    aug_images = 8000

    num_files = len(os.listdir(img_dir))
    num_batches = int(np.ceil((aug_images - num_files) / batch_size))

    # creating 8000 augmented images per class
    for i in range(0, num_batches):
        images, labels = next(aug_datagen)

    # delete temporary directory
    shutil.rmtree(aug_dir)

In [None]:
# remove augmented img
import os, glob
for filename in glob.glob("/content/drive/My Drive/Colab Notebooks/dataset/colon_image_sets/train/n/_*"):
    os.remove(filename) 
    

In [None]:
# print total dataset
base_dir = '/content/drive/My Drive/Colab Notebooks/dataset/colon_image_sets'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

train_aca = os.path.join(train_dir, 'aca')
train_n = os.path.join(train_dir, 'n')
test_aca = os.path.join(test_dir, 'aca')
test_n = os.path.join(test_dir, 'n')

print('total training aca images:', len(os.listdir(train_aca)))
print('total training n images:', len(os.listdir(train_n)))
print('total validation aca images:', len(os.listdir(test_aca)))
print('total validation n images:', len(os.listdir(test_n)))

total training aca images: 8000
total training n images: 8000
total validation aca images: 1000
total validation n images: 1000
