In [16]:
import os
# print number of images in each directory
for directory in os.listdir('Data'):
    if os.path.isdir(f'Data/{directory}'):
        print(f'{directory}: {len(os.listdir(f"Data/{directory}"))}')

AK_images: 1202
ance_images: 297
BCC_images: 3323
BKL_images: 2624
DF_images: 239
eczema_images: 322
MEL_images: 4918
NV_images: 12875
psoriasis_images: 420
sample: 0
SCC_images: 628
VASC_images: 253


In [17]:
#python version 3.11
# %pip install tensorflow
# %pip install keras
# %pip install matplotlib
# %pip install numpy
# %pip install pandas
# %pip install scikit-learn
# %pip install PIL


In [18]:
import random
import shutil

def sample_images(directory):
    shutil.rmtree(f'Data/sample/{directory}', ignore_errors=True)
    images = os.listdir(directory)
    n = 1000
    if len(images) < 1000:
        n=len(images)
    sample = random.sample(images, n)
    os.makedirs(f'Data/sample/{directory}', exist_ok=True)
    for image in sample:
        shutil.copyfile(f'{directory}/{image}', f'Data/sample/{directory}/{image}')

for directory in os.listdir('Data'):
    if os.path.isdir(f'Data/{directory}') and directory != 'sample':
        sample_images(f'Data/{directory}')
        print(f'{directory}: {len(os.listdir(f"Data/sample/Data/{directory}"))}')



AK_images: 1000
ance_images: 297
BCC_images: 1000
BKL_images: 1000
DF_images: 239
eczema_images: 322
MEL_images: 1000
NV_images: 1000
psoriasis_images: 420
SCC_images: 628
VASC_images: 253


In [19]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image


# Path to the directory containing image folders
data_dir = "Data/sample/Data"

# Desired number of images after augmentation
target_count = 1000

# Parameters for data augmentation
data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=90,
    horizontal_flip=True,
    fill_mode='nearest',
)
batch_size = 5
for folder in os.listdir(data_dir):
    if folder == 'sample':
        continue
    folder_path = os.path.join(data_dir, folder)
    if os.path.isdir(folder_path):
        print(f"Augmenting images in folder: {folder}")
        
        for filename in os.listdir(folder_path):
            batch_size = target_count//len(os.listdir(folder_path))
            if(len(os.listdir(folder_path)) >= target_count):
                break
            if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
                # Load the image
                img_path = os.path.join(folder_path, filename)
                img = image.load_img(img_path)
                x = image.img_to_array(img)
                x = np.expand_dims(x, axis=0)

                # Generate augmented images
                i = 0
                for batch in data_generator.flow(x, batch_size=batch_size, save_to_dir=folder_path, save_prefix=f'aug_{filename[:-4]}', save_format='jpg'):
                    i += 1
                    if i > 5:  # Generate 5 augmented images per original image
                        break

print("Data augmentation completed.")


Augmenting images in folder: AK_images
Augmenting images in folder: ance_images
Augmenting images in folder: BCC_images
Augmenting images in folder: BKL_images
Augmenting images in folder: DF_images
Augmenting images in folder: eczema_images
Augmenting images in folder: MEL_images
Augmenting images in folder: NV_images
Augmenting images in folder: psoriasis_images
Augmenting images in folder: SCC_images
Augmenting images in folder: VASC_images
Data augmentation completed.


In [20]:
import os
# print number of images in each directory
for directory in os.listdir('Data/sample/Data'):
    if os.path.isdir(f'Data/sample/Data/{directory}'):
        print(f'{directory}: {len(os.listdir(f"Data/sample/Data/{directory}"))}')

AK_images: 1000
ance_images: 1005
BCC_images: 1000
BKL_images: 1000
DF_images: 1001
eczema_images: 1005
MEL_images: 1000
NV_images: 1000
psoriasis_images: 1002
SCC_images: 1000
VASC_images: 1003


In [21]:
#train test split 
import os
import shutil
import random
# Create a new directory
os.makedirs('Data/sample/train', exist_ok=True)
os.makedirs('Data/sample/test', exist_ok=True)
os.makedirs('Data/sample/val', exist_ok=True)
# Iterate through each folder
for folder in os.listdir('Data/sample/Data'):
    folder_path = os.path.join('Data/sample/Data', folder)
    if os.path.isdir(folder_path):
        # Get a random sample of images
        images = os.listdir(folder_path)
        random.shuffle(images)
        split_index = int(0.8 * len(images))
        train_images = images[:split_index]
        val_images = images[split_index:]
        # Move images to train/test folders
        for image in train_images:
            source = os.path.join(folder_path, image)
            target = os.path.join('Data/sample/train', folder, image)
            os.makedirs(os.path.dirname(target), exist_ok=True)
            shutil.copyfile(source, target)
        for image in val_images:
            source = os.path.join(folder_path, image)
            target = os.path.join('Data/sample/val', folder, image)
            os.makedirs(os.path.dirname(target), exist_ok=True)
            shutil.copyfile(source, target)



