In [None]:
# Import required libraries
import os
import re
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Reading and Processing Functions

In [2]:
# Function to get all image files belonging to image_list in a given directory
def get_folder_files(folder_path, image_list):  

    files = [os.path.join(folder_path, f"{image}.jpg") for image in image_list]
    return files

def load_and_resize_file(file_path, size, method='lanczos'):

    # Read image
    file = cv2.imread(file_path)

    # Resize image with a target size using a target method
    match method:
        case 'nearest':
            return cv2.resize(file, (size), interpolation=cv2.INTER_NEAREST)
        case 'linear':
            return cv2.resize(file, (size), interpolation=cv2.INTER_LINEAR)
        case 'cubic':
            return cv2.resize(file, (size), interpolation=cv2.INTER_CUBIC)
        case 'lanczos':
            return cv2.resize(file, (size), interpolation=cv2.INTER_LANCZOS4)
        case _:
            return cv2.resize(file, (size), interpolation=cv2.INTER_NEAREST)
        

# Function to create and write images for each file path in given directories.
def create_and_write_img(file_paths, file_ids, file_targets, target_size, target_method, save_dir_0, save_dir_1, desc=None):
    
    # Iterate over each file_path and file_id pair using zip(file_paths, file_ids), while also displaying a progress bar using tqdm.
    for file_path, file_id, file_target, in tqdm(zip(file_paths, file_ids, file_targets), ascii=True, total=len(file_ids), desc=desc, leave=True):
        
        # load the image corresponding to the current file_path
        image = load_and_resize_file(file_path=file_path, size=target_size, method=target_method)

        # Build the new file name
        new_name = file_id + "_" + str(target_size[0]) + "x" + str(target_size[1]) + "_" + target_method + ".jpg"             

        # Build the image path
        if file_target == 0:
            dst_img_path = os.path.join(save_dir_0, new_name)
        else:
            dst_img_path = os.path.join(save_dir_1, new_name)
            
        # writes the image and mask arrays to the corresponding destination paths using cv2.imwrite.
        cv2.imwrite(dst_img_path, image)
        
    return


# Prepare Images for CNN Classification

In [3]:
# Read the dataset
ROOT_DATASET_DIR = "./"
file_name = os.path.join(ROOT_DATASET_DIR,"train-metadata-eda-fe.csv")
df = pd.read_csv(file_name)

In [4]:
# Define paths for training and test image directories
ORIG_IMG_DIR = os.path.join("..","isic-2024-challenge","train-image","image")
CASE_FOLDERS = os.listdir(ORIG_IMG_DIR)
ROOT_IMAGE_DIR =  os.path.join("..","images")
ROOT_TRAIN_DIR_0 = os.path.join(ROOT_IMAGE_DIR, "train", "0")
ROOT_TRAIN_DIR_1 = os.path.join(ROOT_IMAGE_DIR, "train", "1")
ROOT_TEST_DIR_0 = os.path.join(ROOT_IMAGE_DIR, "test", "0")
ROOT_TEST_DIR_1 = os.path.join(ROOT_IMAGE_DIR, "test", "1")
     
# Create directories if not already present
os.makedirs(ROOT_TRAIN_DIR_0, exist_ok=True)
os.makedirs(ROOT_TRAIN_DIR_1, exist_ok=True)
os.makedirs(ROOT_TEST_DIR_0, exist_ok=True)
os.makedirs(ROOT_TEST_DIR_1, exist_ok=True)

TARGET_SIZE = (128, 128) # assumed 128x128
TARGET_METHOD = 'lanczos'

In [5]:
# Train-test split
X = df.drop(['target'], axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [6]:
# Process train images
ids_train = X_train.isic_id.to_list()
files_train = get_folder_files(folder_path=ORIG_IMG_DIR, image_list=ids_train)

create_and_write_img(file_paths=files_train,
                     file_ids=ids_train,
                     file_targets=y_train,
                     target_size=TARGET_SIZE,
                     target_method=TARGET_METHOD,
                     save_dir_0=ROOT_TRAIN_DIR_0,
                     save_dir_1=ROOT_TRAIN_DIR_1,
                     desc=f"Train :: {ORIG_IMG_DIR}")

Train :: ..\isic-2024-challenge\train-image\image: 100%|##########| 316242/316242 [08:55<00:00, 590.38it/s]


In [7]:
# Process test images
ids_test = X_test.isic_id.to_list()
files_test = get_folder_files(folder_path=ORIG_IMG_DIR, image_list=ids_test)

create_and_write_img(file_paths=files_test,
                     file_ids=ids_test,
                     file_targets=y_test,
                     target_size=TARGET_SIZE,
                     target_method=TARGET_METHOD,
                     save_dir_0=ROOT_TEST_DIR_0,
                     save_dir_1=ROOT_TEST_DIR_1,
                     desc=f"Train :: {ORIG_IMG_DIR}")

Train :: ..\isic-2024-challenge\train-image\image: 100%|##########| 79061/79061 [02:05<00:00, 629.51it/s]
