In [1]:
import json
import cv2
from matplotlib import pyplot as plt
import os
import numpy as np
import pandas as pd
from prototypes.classical.dataloader.Loader import IsiCancerData
from tqdm.auto import tqdm

%load_ext autoreload
%autoreload 2

with open("../config.json", "r") as f:
    config = json.load(f)

In [2]:
train_csv = pd.read_csv(config["TRAIN_METADATA"], engine="python")

In [21]:
dict_target = pd.read_csv(config["TRAIN_METADATA"], engine="python")[["isic_id", "target"]]

In [22]:
dict_target = dict(zip(dict_target["isic_id"].values, dict_target["target"].values))

In [23]:

list(dict_target.keys())[:5]

In [24]:
import torch

dict_target["ISIC_0015670"]

In [3]:
train_csv.shape

In [4]:
train_csv.sample(n=1000)

# Data balance

- Imbalanced Dataset: Specifically refers to unequal class distribution in classification problems.
- Unbalanced Dataset: A more general term that might refer to any irregularity or inconsistency in the dataset, including but not limited to class imbalance.

In [5]:
train_csv.groupby(by="target")[["target"]].count()/len(train_csv)

This dataset is imbalance. The unhealthy class is not even 1% of the whole dataset.

In [6]:
train_csv.query("target==1").head(5)

In [7]:
from prototypes.classical.descriptors.texture import LBPTransformer, HoGTransformer, GaborTransformer

lbp_transformer = LBPTransformer(p=1, r=8)
hog_transformer = HoGTransformer(orientations=8, pixels_per_cell=(8, 8), cells_per_block=(8, 8), visualize=True)
gabor_transformer = GaborTransformer(frequency=1/100, theta=np.pi/4, sigma_x=5, sigma_y=5)

fig, ax = plt.subplots(4, 2, figsize=(15, 25))

for i in range(4):
    cancer_image_file_name = train_csv.query("target==1").to_numpy()[i][0] + ".jpg"
    non_cancer_image_file_name = train_csv.query("target==0").to_numpy()[i][0] + ".jpg"
    
    cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:,:,::-1]
    non_cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], non_cancer_image_file_name), cv2.IMREAD_COLOR)[:,:,::-1]
    
    ax[i, 0].imshow(cancer_image)
    ax[i, 0].set_title("Cancer image")
    
    ax[i, 1].imshow(non_cancer_image)
    ax[i, 1].set_title("Healthy image")

In [8]:
dataset = IsiCancerData(config)

# Texture Study

In [9]:
IMAGES = 10
fig, ax = plt.subplots(IMAGES, 8, figsize=(30, 40))

for i in range(IMAGES):
    image, labels = dataset.get_item()

    gabor_transformer = GaborTransformer(frequency=1/100, theta=np.pi/4, sigma_x=5, sigma_y=5)
    lbp_transformer = LBPTransformer(p=8, r=1)
    hog_transformer = HoGTransformer(orientations=8,
                                     pixels_per_cell=(8, 8),
                                     cells_per_block=(2, 2),
                                     visualize=True)
    
    ax[i, 0].imshow(image[:, :, ::-1])
    ax[i, 0].set_title("Original Image")
    ax[i, 1].imshow(lbp_transformer.transform(image))
    ax[i, 1].set_title("LBP Transform")
    ax[i, 2].imshow(hog_transformer.transform(image)[1])
    ax[i, 2].set_title("HoG Transform")
    
    gabor_map = gabor_transformer.transform(image)
    gabor_magnitude = np.sqrt(gabor_map[0]**2 + gabor_map[1]**2)
    
    ax[i, 3].imshow(gabor_map[0])
    ax[i, 3].set_title("Gabor Real Part")
    ax[i, 4].imshow(gabor_map[1])
    ax[i, 4].set_title("Gabor Imaginary Part")
    ax[i, 5].imshow(gabor_magnitude)
    ax[i, 5].set_title("Gabor Magnitude Part")
    
    imag_attention_map = image.copy()
    
    imag_attention_map[:, :, 0] = imag_attention_map[:, :, 0] * (gabor_map[1] > 0)
    imag_attention_map[:, :, 1] = imag_attention_map[:, :, 1] * (gabor_map[1] > 0)
    imag_attention_map[:, :, 2] = imag_attention_map[:, :, 2] * (gabor_map[1] > 0)
    
    magnitude_attention_map = image.copy()
    
    magnitude_attention_map[:, :, 0] = magnitude_attention_map[:, :, 0] * (gabor_magnitude > 0)
    magnitude_attention_map[:, :, 1] = magnitude_attention_map[:, :, 1] * (gabor_magnitude > 0)
    magnitude_attention_map[:, :, 2] = magnitude_attention_map[:, :, 2] * (gabor_magnitude > 0)
    
    ax[i, 6].imshow(imag_attention_map[:, :, ::-1])
    ax[i, 6].set_title("Gabor Imaginary Attention")
    ax[i, 7].imshow(magnitude_attention_map[:, :, ::-1])
    ax[i, 7].set_title("Gabor Magnitude Attention")

# Gabor bank feature vector

In [10]:
dataset.reset_index()

image, labels = dataset.get_item()

image = cancer_image

gabor_transformer = GaborTransformer(frequency=1/100, theta=np.pi/4, sigma_x=5, sigma_y=5)
lbp_transformer = LBPTransformer(p=8, r=1)

gabor_filter_bank = gabor_transformer.transform(image)

imag_attention_map = image.copy()
imag_attention_map[:, :, 0] = imag_attention_map[:, :, 0] * (gabor_filter_bank[1] > 0)
imag_attention_map[:, :, 1] = imag_attention_map[:, :, 1] * (gabor_filter_bank[1] > 1)
imag_attention_map[:, :, 2] = imag_attention_map[:, :, 2] * (gabor_filter_bank[1] > 2)

fig, ax = plt.subplots(1, 4, figsize=(20, 20))

ax[0].imshow(image[:, :, ::-1])
ax[1].imshow(imag_attention_map)
ax[2].imshow(lbp_transformer.transform(gabor_filter_bank[1]))
ax[3].imshow(lbp_transformer.transform(imag_attention_map))

# Gabor banks to describe malignant and benignant tumors

The idea here is to extract information about each image in terms of distribution mean, std/

Find any statistical differences (CI of 90%) that will help me to identify the anomalies (malignant tumors)

In [None]:
gabor_filter_bank = [GaborTransformer(frequency=1/100, theta=theta, sigma_x=5, sigma_y=5) for theta in [np.pi, np.pi/4, np.pi/8, np.pi/16, np.pi/32]]

IMAGE_WIDTH = IMAGE_HEIGHT = int(config["IMAGE_WIDTH"])

filter_bank_magnitude = np.zeros((len(train_csv.query("target==1")) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))
filter_bank_imaginary = np.zeros((len(train_csv.query("target==1")) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))
filter_bank_real = np.zeros((len(train_csv.query("target==1")) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))

offset = 0
for i in tqdm(range(len(train_csv.query("target==1")))):
    cancer_image_file_name = train_csv.query("target==1").to_numpy()[i][0] + ".jpg"    
    cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:, :, ::-1]
    cancer_image = cv2.resize(cancer_image, (128, 128), interpolation=cv2.INTER_CUBIC)
    
    for j, filter in enumerate(gabor_filter_bank):
        real, img = filter.transform(cancer_image) 
        filter_bank_magnitude[i+j+offset] = np.sqrt(real**2 + img**2)
        filter_bank_imaginary[i+j+offset] = img
        filter_bank_real[i+j+offset] = real
        
    offset += len(gabor_filter_bank)-1

In [None]:
filter_bank_imaginary[-1].sum()

In [None]:
fig, ax = plt.subplots(5, 3, figsize=(5, 10))

# 0 the 1st image
offset = 0
for i in range(offset, offset+5):
    ax[i, 0].imshow(filter_bank_real[i])
    ax[i, 1].imshow(filter_bank_imaginary[i])
    ax[i, 2].imshow(filter_bank_magnitude[i])

In [None]:
filter_bank_real[10].shape

In [None]:
gabor_filter_bank = [GaborTransformer(frequency=1/100, theta=theta, sigma_x=5, sigma_y=5) for theta in [np.pi, np.pi/4, np.pi/8, np.pi/16, np.pi/32]]

IMAGE_WIDTH = IMAGE_HEIGHT = int(config["IMAGE_WIDTH"])

filter_bank_magnitude_non_malignant = np.zeros((len(train_csv.query("target==0").head(1000)) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))
filter_bank_imaginary_non_malignant = np.zeros((len(train_csv.query("target==0").head(1000)) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))
filter_bank_real_non_malignant = np.zeros((len(train_csv.query("target==0").head(1000)) * len(gabor_filter_bank), IMAGE_WIDTH, IMAGE_WIDTH))

offset = 0
for i in tqdm(range(len(train_csv.query("target==0").head(1000)))):
    cancer_image_file_name = train_csv.query("target==0").to_numpy()[i][0] + ".jpg"    
    cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:, :, ::-1]
    cancer_image = cv2.resize(cancer_image, (128, 128), interpolation=cv2.INTER_CUBIC)

    for j, filter in enumerate(gabor_filter_bank):
        real, img = filter.transform(cancer_image) 
        filter_bank_magnitude_non_malignant[i+j+offset] = np.sqrt(real**2 + img**2)
        filter_bank_imaginary_non_malignant[i+j+offset] = img
        filter_bank_real_non_malignant[i+j+offset] = real

    offset += len(gabor_filter_bank)-1

In [None]:
fig, ax = plt.subplots(5, 3, figsize=(5, 10))

# 0 the 1st image
offset = 0
for i in range(offset, offset+5):
    ax[i, 0].imshow(filter_bank_real_non_malignant[i])
    ax[i, 1].imshow(filter_bank_imaginary_non_malignant[i])
    ax[i, 2].imshow(filter_bank_magnitude_non_malignant[i])

In [None]:
filter_bank_imaginary_non_malignant[filter_bank_imaginary_non_malignant>0].mean(),filter_bank_imaginary_non_malignant[filter_bank_imaginary_non_malignant>0].std()

In [None]:
filter_bank_real_non_malignant.mean(),filter_bank_real_non_malignant.std()

In [None]:
filter_bank_magnitude_non_malignant.mean(),filter_bank_magnitude_non_malignant.std()

In [None]:
filter_bank_imaginary[filter_bank_imaginary>0].mean(),filter_bank_imaginary[filter_bank_imaginary>0].std()

In [None]:
filter_bank_real.mean(),filter_bank_real.std()

In [None]:
filter_bank_magnitude.mean(),filter_bank_magnitude.std()

# remove the most common color of each image

Most of the surface of the image is covered by skin the lesion is just an small part of the image

In [None]:
cancer_image_file_name = train_csv.query("target==1").to_numpy()[100][0] + ".jpg"

cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:, : , ::-1]
cancer_image = cv2.resize(cancer_image, (128, 128), interpolation=cv2.INTER_CUBIC)

#others techniques likes otsu / clustering / other segmentation techniques in deep learning. / partial differential equation segmentation
cancer_image_gray_scale = cv2.cvtColor(cancer_image, cv2.COLOR_RGB2GRAY)
cancer_image_gray_scale =cv2.GaussianBlur(cancer_image_gray_scale, (5, 5), 0)

otsu_threshold, image_result = cv2.threshold(
    cancer_image_gray_scale, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU,
)

print("Obtained threshold: ", otsu_threshold)

segmented_image_cancer_image = cancer_image.copy()
segmented_image_cancer_image[:, :, 0] = cancer_image[:, :, 0] * (cancer_image_gray_scale<otsu_threshold)
segmented_image_cancer_image[:, :, 1] = cancer_image[:, :, 1] * (cancer_image_gray_scale<otsu_threshold)
segmented_image_cancer_image[:, :, 2] = cancer_image[:, :, 2] * (cancer_image_gray_scale<otsu_threshold)

fig, ax = plt.subplots(1, 7, figsize=(15, 15))

ax[0].imshow(cancer_image)
ax[1].imshow(cancer_image_gray_scale, "gray")
ax[2].imshow(image_result, "gray")
ax[3].imshow(segmented_image_cancer_image)
ax[4].imshow(lbp_transformer.transform(segmented_image_cancer_image), "gray")
ax[5].imshow(lbp_transformer.transform(cancer_image), "gray")
ax[6].imshow(lbp_transformer.transform(cancer_image)*image_result, "gray")

In [None]:
cancer_image_file_name = train_csv.query("target==0").to_numpy()[2][0] + ".jpg"

cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:, : , ::-1]
cancer_image = cv2.resize(cancer_image, (128, 128), interpolation=cv2.INTER_CUBIC)

#others techniques likes otsu / clustering / other segmentation techniques in deep learning. / partial differential equation segmentation
cancer_image_gray_scale = cv2.cvtColor(cancer_image, cv2.COLOR_RGB2GRAY)
cancer_image_gray_scale =cv2.GaussianBlur(cancer_image_gray_scale, (5, 5), 0)

otsu_threshold, image_result = cv2.threshold(
    cancer_image_gray_scale, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU,
)

print("Obtained threshold: ", otsu_threshold)

segmented_image_cancer_image = cancer_image.copy()
segmented_image_cancer_image[:, :, 0] = cancer_image[:, :, 0] * (cancer_image_gray_scale<otsu_threshold)
segmented_image_cancer_image[:, :, 1] = cancer_image[:, :, 1] * (cancer_image_gray_scale<otsu_threshold)
segmented_image_cancer_image[:, :, 2] = cancer_image[:, :, 2] * (cancer_image_gray_scale<otsu_threshold)

fig, ax = plt.subplots(1, 4, figsize=(15, 15))

ax[0].imshow(cancer_image)
ax[1].imshow(cancer_image_gray_scale, "gray")
ax[2].imshow(image_result, "gray")
ax[3].imshow(segmented_image_cancer_image)

# Color

Due to the variations between different skins colors this could lead to noise thus I will work with grayscale images.

# Removing black bars

In [None]:
from prototypes.classical.segmentation.transformers import BlackBarsRemover, OtsuThresholdingSegmentation

In [None]:
cancer_image.shape

In [None]:
from prototypes.classical.segmentation.transformers import BlackBarsRemover, OtsuThresholdingSegmentation


cancer_image_file_name = train_csv.query("target==1").to_numpy()[1][0] + ".jpg"
cancer_image = cv2.imread(os.path.join(config["TRAIN_IMAGES_PATH"], cancer_image_file_name), cv2.IMREAD_COLOR)[:, : , ::-1]
cancer_image = cv2.resize(cancer_image, (128, 128), interpolation=cv2.INTER_CUBIC)
cancer_image = cv2.cvtColor(cancer_image, cv2.COLOR_RGB2GRAY)


def remove_black_bars(image):
    image_without_black_bars = image.copy()
    min_width = min_height = 0

    for i in range(image.shape[0]):
        if image[i, :].sum() < image.shape[0]:
            min_height = i + 1

    for j in range(image.shape[1]):
        if image[:, j].sum() < image.shape[1]:
            min_width = j
    
    return cv2.resize(image_without_black_bars[min_height:, min_width:], (image.shape[0], image.shape[1]), interpolation=cv2.INTER_CUBIC)


br = BlackBarsRemover()
otsu_threshold = OtsuThresholdingSegmentation()

fig, ax = plt.subplots(1, 4, figsize=(15, 15))
ax[0].imshow(cancer_image, "gray")
ax[1].imshow(br.transform([cancer_image])[0], "gray")
ax[2].imshow(otsu_threshold.transform(br.transform([cancer_image]))[0], "gray")
erosion = cv2.erode(otsu_threshold.transform(br.transform([cancer_image]))[0], np.ones((3, 3)), iterations = 1)
ax[3].imshow(erosion, "gray")

# Batches

In [None]:
dataset = IsiCancerData(config_file=config)

In [None]:
batch = next(iter(dataset.get_next_batch()))

In [None]:
batch[0].shape, batch[1].shape

In [None]:
from tqdm.auto import tqdm

images_batches = []
labels_batches = []

for batch in tqdm(dataset.get_next_batch(), total=dataset.total_samples()):
    images_batches.append(batch[0])
    labels_batches.append(batch[1])

In [None]:
images_batches[0].shape

In [None]:
images_batches[0][0].max()

In [None]:
plt.imshow(images_batches[0][-1].astype(np.uint8)[:,:,::-1])

In [None]:
plt.imshow(images_batches[0][2].astype(np.uint8)[:,:,::-1])