In [None]:

bandwidth=estimate_bandwidth(X,quantile=0.2,n_samples=500)
ms=MeanShift(bandwidth=bandwidth,bin_seeding=True)
ms.fit(X)
segmented_image=ms.labels_
segmented_image.shape=image.shape
imshow(segmented_image)

In [None]:
from sklearn.cluster import MeanShift,estimate_bandwidth
def images_segmentation(train_path=train_set_path, test_path=test_set_path):

    print('-- REGION-BASED SEGMENTATION --')
    
    # Python dictionaries containing the properties of the regions representing
    # the lesions (subdivided into training and test set)
    segmented_train_set = {}
    segmented_test_set = {}
    
    if not os.path.isdir(train_path):
        train_path = train_set_path
    # get absolute path of the training set
    abs_train_path = os.path.abspath(train_path)

    if not os.path.isdir(test_path):
        test_path = test_set_path
    # get absolute path of the test set
    abs_test_path = os.path.abspath(test_path)

    # list of images in the training set
    train_imgs = [img_file for img_file in os.listdir(abs_train_path)
                  if os.path.isfile(os.path.join(abs_train_path, img_file)) and img_file.endswith('.jpg')]
    # list of images in the test set
    test_imgs = [img_file for img_file in os.listdir(abs_test_path)
                 if os.path.isfile(os.path.join(abs_test_path, img_file)) and img_file.endswith('.jpg')]

    img_list = train_imgs + test_imgs

    for idx, image_name in enumerate(img_list):
        if idx < plot_limit:
            print('{:_<100}'.format(''))
            print('Image name: {}'.format(image_name))

        # get image path
        if image_name in train_imgs:
            image_path = os.path.join(abs_train_path, image_name)
        elif image_name in test_imgs:
            image_path = os.path.join(abs_test_path, image_name)
        else:
            print('Error: Cannot find {}'.format(image_name))
            return None, None
        # read image
        image = io.imread(image_path)
        # convert the original image into grayscale
        gray_img = color.rgb2gray(image)

        # 1] Apply Sobel filter
        elevation_map = filters.sobel(gray_img)

        # 2] Build image markers using the threshold obtained through the ISODATA filter
        markers = np.zeros_like(gray_img)
        threshold = filters.threshold_isodata(gray_img)
        markers[gray_img > threshold] = 1
        markers[gray_img < threshold] = 2
        
        # 3] Apply Wathershed algorithm in order to segment the image filtered
        #    using the markers
        
        bandwidth=estimate_bandwidth(elevation_map,quantile=0.2,n_samples=500)
        ms=MeanShift(bandwidth=bandwidth,bin_seeding=True)
        ms.fit(elevation_map)
        segmented_image=ms.labels_
        segmented_image.shape=image.shape
        imshow(segmented_image)
        # 4] Improve segmantation:
        #    >  Fill small holes 
        segmented_img = ndi.binary_fill_holes(segmented_img - 1)
        #    > Remove small objects that have an area less than 800 px:
        #      this could be useful to exclude some regions that does not represent a lesion
        segmented_img = morphology.remove_small_objects(segmented_img, min_size=800)
        #    > Clear regions connected to the image borders.
        #      This operation is very useful when there are contour regions have a
        #      big area and so they can be exchanged with the lesion.
        #      However, this can also create some issues when the lesion region is
        #      connected to the image borders. In order to (try to) overcome this
        #      issue, we use a lesion identification algorithm (see below)
        img_border_cleared = segmentation.clear_border(segmented_img)

        # 5] Apply connected components labeling algorithm:
        #    it assigns labels to a pixel such that adjacent pixels of the same
        #    features are assigned the same label.
        # labeled_img, _ = ndi.label(segmented_img)
        labeled_img = morphology.label(img_border_cleared)

        if idx < plot_limit:
            # create a subplot of 3 figures in order to show elevation map,
            # markers and the segmanted image
            fig, ax = plt.subplots(1, 3, figsize=(10, 8))
            ax[0].imshow(elevation_map, cmap=plt.cm.gray)
            ax[0].set_title('elevation map')
            ax[0].set_axis_off()

            ax[1].imshow(markers, cmap=plt.cm.nipy_spectral)
            ax[1].set_title('markers')
            ax[1].set_axis_off()

            ax[2].imshow(segmented_img, cmap=plt.cm.gray)
            ax[2].set_title('segmentation')
            ax[2].set_axis_off()

            plt.tight_layout()
            plt.show();
        
        # 6] Lesion identification algorithm:
        # Compute properties of labeled image regions:
        # it will be used to automatically select the region that contains
        # the skin lesion according to area and extent
        props = measure.regionprops(labeled_img)
        # num labels -> num regions
        num_labels = len(props)
        # Get all the area of detected regions
        areas = [region.area for region in props]

        # If we have at least one region and the area of the region having the
        # biggest area is at least 1200 px, we choose it as the region that
        # contains the leson because if properly segmented (i.e., after removing
        # small objects and regions on the image contours (since in most of the
        # images, the lesion is in the center))
        if num_labels > 0 and areas[np.argmax(areas)] >= 1200:
            if idx < plot_limit:
                print('Num labels:', num_labels)
                print('Areas: {}'.format(areas))
            target_label = props[np.argmax(areas)].label
        else:
            # ... otherwise we could have one of the following two cases:
            # 1] num_labels == 0:
            #    this can happen when there is only one region (the one containing the lesion)
            #    but it has been deleted when applying the function segmentation.clear_border()
            # 2] num_labels > 0 but areas[np.argmax(areas)] < 1200:
            #    it means that there exists at least one region but all the regions have
            #    an area less than 1200 pixels.
            #    This can happen when the region containing the lesion is deleted
            #    with segmentation.clear_border() but there still other regions that
            #    could be "exhanged for" a lesion region
            #
            # Since both cases can be due to the deletion of the segmented area
            # because of the use of segmentation.clear_border(), the idea is to
            # backtrack to the original segmented image (the one obtained 
            # obtained before applying segmentation.clear_border()), apply again the
            # connected components labeling algorithm and extract the new region properties.
            # In addition, in order to find the region representing the lesion,
            # we use area and extent features by checking among the three largest regions
            # (if any because there could be only one or two regions) sorted in ascending order
            # (i.e. from the one having the largest area) the first that has an extent grater than 0.5.
            labeled_img = morphology.label(segmented_img)
            # Get new region properties
            props = measure.regionprops(labeled_img)
            # Get the new list of areas
            areas = [region.area for region in props]
            # List of regions' extent.
            # Each extent is defined as the ratio of pixels in the region  to pixels
            # in the total bounding box (computed as: area / (rows * cols))
            extents = [region.extent for region in props]
            if idx < plot_limit:
                print('Num labels: {}'.format(len(props)))
                print('Areas: {}'.format(areas))
                print('Extents: {}'.format(extents))
            # Get the index of the region having the largest area and if there are
            # more than one or two regions, find also the index of the second and
            # third most largest regions.
            region_max1 = np.argmax(areas)
            if len(props) > 1:
                areas_copy = areas.copy()
                areas_copy[region_max1] = 0
                region_max2 = np.argmax(areas_copy)
            if len(props) > 2:
                areas_copy[region_max2] = 0
                region_max3 = np.argmax(areas_copy)

            # If the largest region has an extent greater than 0.50, it is our target region
            if extents[region_max1] > 0.50:
                target_label = props[region_max1].label
            # ... else check if the extent of the second largest region is greater than 0.5,
            # and if so we have found our target region
            elif len(props) > 1 and extents[region_max2] > 0.50:
                target_label = props[region_max2].label
            # ... else if the third largest region has an extent greater than 0.50,
            # it is (more probably) the one containing the lesion
            elif len(props) > 2 and extents[region_max3] > 0.50:
                target_label = props[region_max3].label
            # ... otherwise we choose the largest region
            else:
                target_label = props[region_max1].label

            # NOTE: another possible approarch could be to select as the target region
            #       the one having the largest extent among the 3 largest regions found:
            # if len(props) > 2:
            #     extents_largest_reg = [val if idx in (region_max1, region_max2, region_max3) else 0.0
            #                            for idx, val in enumerate(extents)]
            # elif len(props) > 1:
            #     extents_largest_reg = [val if idx in (region_max1, region_max2) else 0.0
            #                            for idx, val in enumerate(extents)]
            # else:
            #     extents_largest_reg = [val if idx in (region_max1,) else 0.0
            #                            for idx, val in enumerate(extents)]
            # target_label = props[np.argmax(extents_largest_reg)].label

        # assign label 0 to all the pixels that are not in the target region (that is
        # the ragion that more probably contains the lesion)
        for row, col in np.ndindex(labeled_img.shape):
            if labeled_img[row, col] != target_label:
                labeled_img[row, col] = 0
        # Convert the labeled image into its RGB version
        image_label_overlay = color.label2rgb(labeled_img, gray_img)

        if idx < plot_limit:
            print('Chosen label: {}'.format(target_label))
            # Plot the original image ('image') in which the contours of all the
            # segmented regions are highlighted
            fig, axes = plt.subplots(1, 2, figsize=(8, 6), sharey=True)
            axes[0].imshow(image)
            axes[0].contour(segmented_img, [0.5], linewidths=1.2, colors='y')
            axes[0].axis('off')
            # Plot 'image_label_overlay' that contains the target region highlighted
            axes[1].imshow(image_label_overlay)
            axes[1].axis('off')

            plt.tight_layout()
            plt.show();
        elif idx == plot_limit:
            print('Continuing segmentation without printing the results ...')

        # Add the the found region into the proper dictionary according to whether
        # the current image belongs to the training or the test set
        if image_name in train_imgs:
            segmented_train_set[image_name] = props[target_label - 1]
        elif image_name in test_imgs:
            segmented_test_set[image_name] = props[target_label - 1]

    return segmented_train_set, segmented_test_set

In [24]:
        image = io.imread("dataset/ISIC_0024306.jpg")
        # convert the original image into grayscale
        gray_img = color.rgb2gray(image)

        # 1] Apply Sobel filter
        elevation_map = filters.sobel(gray_img)

        # 2] Build image markers using the threshold obtained through the ISODATA filter
        markers = np.zeros_like(gray_img)
        threshold = filters.threshold_isodata(gray_img)
        markers[gray_img > threshold] = 1
        markers[gray_img < threshold] = 2
        
        # 3] Apply Wathershed algorithm in order to segment the image filtered
        #    using the markers
        X=elevation_map.reshape((-1,1))
        bandwidth=estimate_bandwidth(X,quantile=0.2,n_samples=500)
        ms=MeanShift(bandwidth=bandwidth,bin_seeding=True)
        ms.fit(elevation_map)
        segmented_image=ms.labels_
        segmented_image.shape=image.shape
        imshow(segmented_image)
        # 4] Improve segmantation:
        #    >  Fill small holes 
        segmented_img = ndi.binary_fill_holes(segmented_img - 1)
        #    > Remove small objects that have an area less than 800 px:
        #      this could be useful to exclude some regions that does not represent a lesion
        segmented_img = morphology.remove_small_objects(segmented_img, min_size=800)
        #    > Clear regions connected to the image borders.
        #      This operation is very useful when there are contour regions have a
        #      big area and so they can be exchanged with the lesion.
        #      However, this can also create some issues when the lesion region is
        #      connected to the image borders. In order to (try to) overcome this
        #      issue, we use a lesion identification algorithm (see below)
        img_border_cleared = segmentation.clear_border(segmented_img)

        # 5] Apply connected components labeling algorithm:
        #    it assigns labels to a pixel such that adjacent pixels of the same
        #    features are assigned the same label.
        # labeled_img, _ = ndi.label(segmented_img)
        labeled_img = morphology.label(img_border_cleared)

        if idx < plot_limit:
            # create a subplot of 3 figures in order to show elevation map,
            # markers and the segmanted image
            fig, ax = plt.subplots(1, 3, figsize=(10, 8))
            ax[0].imshow(elevation_map, cmap=plt.cm.gray)
            ax[0].set_title('elevation map')
            ax[0].set_axis_off()

            ax[1].imshow(markers, cmap=plt.cm.nipy_spectral)
            ax[1].set_title('markers')
            ax[1].set_axis_off()

            ax[2].imshow(segmented_img, cmap=plt.cm.gray)
            ax[2].set_title('segmentation')
            ax[2].set_axis_off()

            plt.tight_layout()
            plt.show();
        
        # 6] Lesion identification algorithm:
        # Compute properties of labeled image regions:
        # it will be used to automatically select the region that contains
        # the skin lesion according to area and extent
        props = measure.regionprops(labeled_img)
        # num labels -> num regions
        num_labels = len(props)
        # Get all the area of detected regions
        areas = [region.area for region in props]

ValueError: cannot reshape array of size 450 into shape (450,600,3)

In [None]:
import matplotlib.pyplot as plt
from pylab import *
from sklearn.cluster import MeanShift,estimate_bandwidth
# %matplotlib inline
import numpy as np
import os
import shutil
import warnings
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import confusion_matrix, f1_score, precision_score, \
                            recall_score, accuracy_score, classification_report

import seaborn as sns; sns.set()

from skimage import io, exposure, morphology, filters, color, \
                    segmentation, feature, measure, img_as_float, img_as_ubyte
from skimage.color.adapt_rgb import adapt_rgb, each_channel, hsv_value
warnings.simplefilter("ignore")
from scipy import ndimage as ndi

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, \
                         Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator

from keras.callbacks import Callback

In [13]:
from skimage import io

In [None]:
def createColorHistogram(img, binCount = 256, out = plt):
   
    img=(img.astype(float))/255.0
    img_hsv = colors.rgb_to_hsv(img[…,:3])
    img_hsv=img_hsv[…,0].flatten()
    return out.hist(img_hsv*360,binCount,range=(0.0,binCount), label=’Hue’)


In [None]:
def create_prob_distr(img, histogram, min_saturation, min_value, min_prob):
    (h_values,bin_edges,n) = histogram
    h_values = min_max_scaling(h_values)
    img = (img.astype(float))/255.0
    img_hsv = colors.rgb_to_hsv(img[…,:3])
    prob_distr = np.zeros((img_hsv.shape[0], img_hsv.shape[1]))
    for i in range(img_hsv.shape[0]):
        for j in range(img_hsv.shape[1]):
        
            bin_index = np.digitize(img_hsv[i][j][0]*360,bin_edges, right=True)
            if(img_hsv[i][j][1] < min_saturation or img_hsv[i][j][2] < min_value or h_values[bin_index-1] < min_prob):
                prob_distr[i][j] = 0.0
            else:
                prob_distr[i][j] = h_values[bin_index-1]
    return prob_distr