In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path
import random
import os

In [None]:

### Directory paths
DATA_PATH = './data/original'
AUGMENTED_PATH = './data/rnn_augmented'
RNN_BALANCED_PATH = './data/rnn_balanced'
MODELS_PATH = './models/rnn'

TRAIN_DIRECTORY='train'
TEST_DIRECTORY='test'
VALIDATION_DIRECTORY ='val'

### Sample Values
TRAIN_DATASET_SAMPLE_COUNT=1000
TEST_DATASET_SAMPLE_COUNT=200
VALIDATION_DATASET_SAMPLE_COUNT=200

### Directories containing the images
SUB_DIRECTORIES = [TRAIN_DIRECTORY, TEST_DIRECTORY, VALIDATION_DIRECTORY]
DATA_DIRECTORIES = ['Actinic keratoses', 'Basal cell carcinoma', 'Benign keratosis-like lesions', 'Chickenpox', 'Cowpox', 'Dermatofibroma', 'Healthy', 'HFMD', 'Measles', 'Melanocytic nevi', 'Melanoma', 'Monkeypox', 'Squamous cell carcinoma', 'Vascular lesions']

### Data generation properties
ROTATION_RANGE = 40
WIDTH_SHIFT_RANGE = 0.2
HEIGHT_SHIFT_RANGE = 0.2
SHEAR_RANGE = 0.2
ZOOM_RANGE = 0.2
HORIZONTAL_FLIP = True
FILL_MODE = 'nearest'

In [None]:
### This method is used to get the number of images in each of the directories in our dataset

def count_images(dir_name):
    for dataset_type in SUB_DIRECTORIES:
        dir_type = os.path.join(dir_name, dataset_type)
        print(f"{dataset_type}")
        print("----------------------------")
        for category in os.listdir(dir_type):
            category_path = os.path.join(dir_type, category)
            if not os.path.isdir(category_path):
                continue
            images = [img for img in os.listdir(category_path) if img.endswith(('jpg', 'jpeg'))]
            print(f"Number of images in {category_path.split('/')[-1]}: {len(images)}")
        print("")

In [None]:
### Execute the method

count_images(DATA_PATH)

In [None]:
### Image augmentation that lack the image count

In [None]:
def load_images_to_df(base_path, dataset_type, sample_size):
    path = os.path.join(base_path, dataset_path)
    image_dir = Path(path)

    file_paths = list(image_dir.glob(r'**/*.jpg'))
    labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_paths))

    file_paths = pd.Series(file_paths, name='Path').astype(str)
    labels = pd.Series(labels, name='Label')

    image_df = pd.concat([file_paths, labels], axis=1)

    samples =[]
    for record in image_df['Label'].unique():
        samples.append(image_df[image_df['Label'] == record].sample(sample_size, random_state=42))
    image_df = pd.concat(samples, axis=0).sample(frac=1.0, random_state=42).reset_index(drop=True)

    return image_df

In [None]:
image_df_train = load_images_to_df(DATA_PATH, TRAIN_DIRECTORY, TRAIN_DATASET_SAMPLE_COUNT)