In [None]:
import os
from skimage import io, segmentation
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import calinski_harabasz_score, silhouette_score
from utils.mask_manager import MaskManager
from utils.image_manager import ImagesManager

In [None]:
# Pretrained model
model = VGG16(weights='imagenet', include_top=False)

In [None]:
img = image.load_img('./data/debug/resized_dog.30.jpg')

In [None]:
def get_smalles_patch(img, mask, num_superpixel):
        
        idx_mask = np.where(mask == num_superpixel)
        min_idx = np.min(idx_mask, axis=-1)
        max_idx = np.max(idx_mask, axis=-1)
        slicer = []
        for min_val, max_val in zip(min_idx, max_idx):
            slice_dim = slice(min_val, max_val+1, 1)
            slicer.append(slice_dim)
            
        slicer = tuple(slicer)
        sliced_image = img[slicer]
        return img[slicer]

def get_superpixels_information(path_to_images, images_extension='jpg',
                                verbose=False,
                                kernel_size=12, max_dist=10, ratio=.3,
                                target_size=(224, 224)):
    
    images_manager = ImagesManager(location=path_to_images, images_extension=images_extension)
    images_names = images_manager.get_images_names()

    dict_masks = {}
    list_image_name = []
    list_num_superpixels = []
    list_superpixel_label = []
    list_superpixels_features = []
    list_features = []
    total_zeros = []
    for image_name in images_names:
        print('Working on image:', image_name)
        masks_manager = MaskManager(location=path_to_images, image_name=image_name)
        mask = masks_manager.get_mask(kernel_size=kernel_size, max_dist=max_dist, ratio=ratio)
        dict_masks[image_name] = mask
        current_image = images_manager.get_image(image_name)
        if verbose:
            io.imshow(segmentation.mark_boundaries(current_image, mask, color=(1,0,0), mode='inner'))
            plt.show()

        list_superpixels = np.unique(mask)
        print('Number of superpixels:', len(list_superpixels))
        list_num_superpixels.append(len(list_superpixels))
        
        for sp in list_superpixels:
            # Find the smallest superpixel
            smallest_patch = get_smalles_patch(img=current_image, mask=mask, num_superpixel=sp)
            if verbose:
                print('\t---------------------------------------------')
                print('\tBefore resizing')

                io.imshow(smallest_patch/255)
                plt.show()

            # Save the patch as image
            patch_name = '{}_{}.{}'.format(image_name, sp, images_extension)
            patch_full_path = os.path.join(path_to_images, patch_name)
            io.imsave(patch_full_path, smallest_patch)

            # Use the desiderable target_size
            smallest_patch = image.load_img(patch_full_path, target_size=target_size)
            smallest_patch = image.img_to_array(smallest_patch)
            # Delete the image
            if os.path.exists(patch_full_path):
                os.remove(patch_full_path)

            if verbose:
                print('\tAfter resizing')
                io.imshow(smallest_patch/255)
                plt.show()

            # Get the features
            smallest_patch = np.expand_dims(smallest_patch, axis=0)
            features = model(smallest_patch)
            features = np.array(features)
            features = features.flatten()
            total_zeros.append(np.sum(features <= 10**(-7)))

            #Append information
            list_image_name.append(image_name)
            list_superpixel_label.append(sp)
            list_superpixels_features.append(features)
        df_features = pd.DataFrame(np.array(list_superpixels_features))
        df_features['image_name'] = list_image_name
        df_features['superpixel_label'] = list_superpixel_label
        df_features.set_index(['image_name', 'superpixel_label'], inplace=True)
    return df_features, dict_masks, total_zeros, list_num_superpixels

In [None]:
df_features, dict_masks, total_zeros, num_superpixels = get_superpixels_information('./data/debug',
                                                                                    max_dist=5,
                                                                                    kernel_size=12,
                                                                                    ratio=0.3,
                                                                                    verbose=False)

In [None]:
df_features.head()

In [None]:
np.mean(total_zeros)

In [None]:
class Clusteriser:
    
    def __init__(self, num_superpixels, df):
        self.num_superpixels = num_superpixels
        self.df = df

    def get_fixed_clusters(self, n_cluster):
        
        cluster = AgglomerativeClustering(n_clusters=n_cluster, linkage='complete', affinity='l1')
        results = cluster.fit(self.df)
        labels = results.labels_
        #metric = calinski_harabasz_score(self.df, labels)
        metric = silhouette_score(self.df, labels, metric='euclidean')

        return metric, labels
    
    def get_mutiple_clusters(self):

        mean_superpixels = round(np.mean(self.num_superpixels))
        n_clusters = range(2, mean_superpixels)
        n_clusters_list = []
        metric_list = []

        for n_cluster in n_clusters:
            metric, _ = self.get_fixed_clusters(n_cluster)
            metric_list.append(metric)
            n_clusters_list.append(n_cluster)
            print('n_cluster:', n_cluster, 'metric:', metric)

        return n_clusters_list, metric_list
        
    def get_best_clusterisation(self):
        n_clusters_list, metric_list = self.get_mutiple_clusters()
        idx_best_metric = np.argmax(metric_list)
        best_n_clusters = n_clusters_list[idx_best_metric]

        _, labels = self.get_fixed_clusters(best_n_clusters)
        return np.array(labels)

In [None]:
cluster = Clusteriser(num_superpixels=num_superpixels, df=df_features)
new_segmentation = cluster.get_best_clusterisation()