In [5]:
import os
# print number of images in each directory
for directory in os.listdir('Data'):
    if os.path.isdir(f'Data/{directory}'):
        print(f'{directory}: {len(os.listdir(f"Data/{directory}"))}')

AK: 1202
ance: 297
BCC: 3323
BKL: 2624
DF: 239
eczema: 322
MEL: 4918
NV: 12875
psoriasis: 420
SCC: 628
VASC: 253


In [6]:
#python version 3.11
# %pip install tensorflow
# %pip install keras
# %pip install matplotlib
# %pip install numpy
# %pip install pandas
# %pip install scikit-learn
# %pip install pillow


In [7]:
import random
import shutil

def sample_images(directory):
    shutil.rmtree(f'sample/{directory}', ignore_errors=True)
    images = os.listdir(directory)
    n = 1000
    if len(images) < 1000:
        n=len(images)
    sample = random.sample(images, n)
    os.makedirs(f'sample/{directory}', exist_ok=True)
    for image in sample:
        shutil.copyfile(f'{directory}/{image}', f'sample/{directory}/{image}')

for directory in os.listdir('Data'):
    if os.path.isdir(f'Data/{directory}') and directory != 'sample':
        sample_images(f'Data/{directory}')
        print(f'{directory}: {len(os.listdir(f"sample/Data/{directory}"))}')



AK: 1000
ance: 297
BCC: 1000
BKL: 1000
DF: 239
eczema: 322
MEL: 1000
NV: 1000
psoriasis: 420
SCC: 628
VASC: 253


In [8]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from PIL import Image
from tensorflow.keras import backend as K

# Path to the directory containing image folders
data_dir = "sample/Data"

# Desired number of images after augmentation
target_count = 1000

# Parameters for data augmentation
data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=90,
    horizontal_flip=True,
    fill_mode='nearest',
)
batch_size = 5
for folder in os.listdir(data_dir):
    if folder == 'sample':
        continue
    folder_path = os.path.join(data_dir, folder)
    if os.path.isdir(folder_path):
        print(f"Augmenting images in folder: {folder}")
        
        for filename in os.listdir(folder_path):
            batch_size = target_count//len(os.listdir(folder_path))
            if(len(os.listdir(folder_path)) >= target_count):
                break
            if filename.lower().endswith((".jpg", ".jpeg", ".png")):
                # Load the image
                img_path = os.path.join(folder_path, filename)
                img = image.load_img(img_path)
                x = image.img_to_array(img)
                x = np.expand_dims(x, axis=0)

                # Generate augmented images
                i = 0
                for batch in data_generator.flow(x, batch_size=batch_size, save_to_dir=folder_path, save_prefix=f'aug_{filename[:-4]}', save_format='jpg'):
                    i += 1
                    if i > 5:  # Generate 5 augmented images per original image
                        break

print("Data augmentation completed.")


Augmenting images in folder: AK
Augmenting images in folder: ance
Augmenting images in folder: BCC
Augmenting images in folder: BKL
Augmenting images in folder: DF
Augmenting images in folder: eczema
Augmenting images in folder: MEL
Augmenting images in folder: NV
Augmenting images in folder: psoriasis
Augmenting images in folder: SCC
Augmenting images in folder: VASC
Data augmentation completed.


In [9]:
import os
# print number of images in each directory
for directory in os.listdir('sample/Data'):
    if os.path.isdir(f'sample/Data/{directory}'):
        print(f'{directory}: {len(os.listdir(f"sample/Data/{directory}"))}')

AK: 1000
ance: 1005
BCC: 1000
BKL: 1000
DF: 1001
eczema: 1000
MEL: 1000
NV: 1000
psoriasis: 1001
SCC: 1000
VASC: 1003


In [10]:
#train test split 
import os
import shutil
import random
# Create a new directory
os.makedirs('sample/train', exist_ok=True)
os.makedirs('sample/test', exist_ok=True)
os.makedirs('sample/val', exist_ok=True)
# Iterate through each folder
for folder in os.listdir('sample/Data'):
    folder_path = os.path.join('sample/Data', folder)
    if os.path.isdir(folder_path):
        # Get a random sample of images
        images = os.listdir(folder_path)
        random.shuffle(images)
        split_index = int(0.8 * len(images))
        train_images = images[:split_index]
        val_images = images[split_index:]
        # Move images to train/test folders
        for image in train_images:
            source = os.path.join(folder_path, image)
            target = os.path.join('sample/train', folder, image)
            os.makedirs(os.path.dirname(target), exist_ok=True)
            shutil.copyfile(source, target)
        for image in val_images:
            source = os.path.join(folder_path, image)
            target = os.path.join('sample/val', folder, image)
            os.makedirs(os.path.dirname(target), exist_ok=True)
            shutil.copyfile(source, target)



