In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings

warnings.simplefilter("ignore")

%matplotlib inline

In [None]:
# get all files in the directory and run preprocessing

import os
from pathlib import Path
import cv2


def dullrazer(img):  # https://github.com/BlueDokk/Dullrazor-algorithm
    grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kernel = cv2.getStructuringElement(1, (9, 9))
    blackhat = cv2.morphologyEx(grayscale, cv2.MORPH_BLACKHAT, kernel)
    blur = cv2.GaussianBlur(blackhat, (3, 3), cv2.BORDER_DEFAULT)
    _, mask = cv2.threshold(blur, 10, 255, cv2.THRESH_BINARY)
    res = cv2.inpaint(img, mask, 6, cv2.INPAINT_TELEA)

    return res


def clahe(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)

    # Apply CLAHE to the L-channel
    clahe = cv2.createCLAHE(clipLimit=3.0)
    lab[:, :, 0] = clahe.apply(lab[:, :, 0])

    # Convert back to BGR color space
    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

    return img


def preprocess_image(img):
    img = cv2.resize(img, (300, 300))
    # delete until...
    # img = dullrazer(img)

    #img = clahe(img)
    
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    # here for resized

    return img


isic2019 = pd.read_csv("./datasets/ISIC2019/metadata.csv")

# 5 random
files = isic2019.sample(5)["isic_id"].values

files = ["ISIC_0060992", "ISIC_0059442", "ISIC_0053628", "ISIC_0025470", "ISIC_0025621"]

fig, ax = plt.subplots(2, 5, figsize=(10, 5))

for i, file in enumerate(files):
    print(file)
    img = cv2.imread("./datasets/ISIC2019/" + file + ".JPG")
    ax[0, i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax[0, i].axis("off")
    ax[0, i].set_title(isic2019[isic2019["isic_id"] == file]["diagnosis"].values[0])

    img = preprocess_image(img)
    ax[1, i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax[1, i].imshow(img)
    ax[1, i].axis("off")
    ax[1, i].set_title("Preprocessed Image")

In [None]:
# get all files in the directory
from multiprocessing import Process

files = []
for ext in ["*.JPG", "*.png"]:
    files.extend(Path("./datasets/").rglob(ext))


def update_files(file):
    try:
        img = cv2.imread(file)
        img = preprocess_image(img)
        cv2.imwrite(file, img)
    except Exception as e:
        print(f"Error in {img}")
        print(e)
        return None


# # download preprocess then save to same location
processes = []

for file in tqdm(files):
    p = Process(target=update_files, args=(file,))
    processes.append(p)
    p.start()

    if len(processes) == 1000:
        for p in processes:
            p.join()

        processes.clear()