In [None]:
import os
from skimage import io, segmentation
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import calinski_harabasz_score, silhouette_score
from utils.mask_manager import MaskManager
from utils.image_manager import ImagesManager

In [None]:
path_to_images='./data/debug'
images_extension = 'jpg'

In [None]:
# Pretrained model
model = VGG16(weights='imagenet', include_top=False)

In [None]:
# Load the model that classifies the images
classifier = keras.models.load_model('./model/model.h5')

In [None]:
def get_smalles_patch(img, mask, num_superpixel):
        
        idx_mask = np.where(mask == num_superpixel)
        min_idx = np.min(idx_mask, axis=-1)
        max_idx = np.max(idx_mask, axis=-1)
        slicer = []
        for min_val, max_val in zip(min_idx, max_idx):
            slice_dim = slice(min_val, max_val+1, 1)
            slicer.append(slice_dim)
            
        slicer = tuple(slicer)
        sliced_image = img[slicer]
        return img[slicer]

def get_superpixels_information(classifier_model, path_to_images, images_extension=images_extension,
                                verbose=False,
                                kernel_size=12, max_dist=10, ratio=.3,
                                target_size=(224, 224)):
    
    images_manager = ImagesManager(location=path_to_images, images_extension=images_extension)
    images_names = images_manager.get_images_names()

    dict_predictions = {}
    dict_masks = {}
    list_image_name = []
    list_num_superpixels = []
    list_superpixel_label = []
    list_superpixels_features = []
    list_features = []
    total_zeros = []
    print('Total images:', len(images_names))
    for image_name in images_names:
        print('Working on image:', image_name)
        img = images_manager.get_image(image_name)
        batch_image = np.expand_dims(img, axis=0)
        batch_image = batch_image/255
        pred = classifier_model.predict(batch_image)
        dict_predictions[image_name] = pred[0][0]
        masks_manager = MaskManager()
        mask = masks_manager.get_mask(img=img, kernel_size=kernel_size, max_dist=max_dist, ratio=ratio)
        dict_masks[image_name] = mask
        current_image = images_manager.get_image(image_name)
        if verbose:
            io.imshow(segmentation.mark_boundaries(current_image, mask, color=(1,0,0), mode='inner'))
            plt.show()

        list_superpixels = np.unique(mask)
        print('\tNumber of superpixels:', len(list_superpixels))
        list_num_superpixels.append(len(list_superpixels))
        
        for sp in list_superpixels:
            # Find the smallest superpixel
            smallest_patch = get_smalles_patch(img=current_image, mask=mask, num_superpixel=sp)
            if verbose:
                print('\t---------------------------------------------')
                print('\t\tBefore resizing')

                io.imshow(smallest_patch/255)
                plt.show()

            # Use the desiderable target_size
            smallest_patch = image.smart_resize(smallest_patch, size=target_size)
            if verbose:
                print('\t\tAfter resizing')
                io.imshow(smallest_patch/255)
                plt.show()

            # Get the features
            smallest_patch = np.expand_dims(smallest_patch, axis=0)
            features = model(smallest_patch)
            features = np.array(features)
            features = features.flatten()
            total_zeros.append(np.sum(features <= 10**(-7)))

            #Append information
            list_image_name.append(image_name)
            list_superpixel_label.append(sp)
            list_superpixels_features.append(features)
        df_features = pd.DataFrame(np.array(list_superpixels_features))
        df_features['image_name'] = list_image_name
        df_features['superpixel_label'] = list_superpixel_label
        df_features.set_index(['image_name', 'superpixel_label'], inplace=True)
    return df_features, dict_masks, total_zeros, list_num_superpixels, dict_predictions

In [None]:
data = get_superpixels_information(path_to_images=path_to_images,
                                   verbose=False,
                                   max_dist=15,
                                   kernel_size=12,
                                   ratio=0.3,
                                   target_size=(224, 224),
                                   classifier_model=classifier)

In [None]:
df_features = data[0]
dict_masks = data[1]
total_zeros = data[2]
list_num_superpixels = data[3]
dict_predictions = data[4]

In [None]:
df_features.head()

In [None]:
dict_predictions

In [None]:
np.mean(total_zeros)

In [None]:
class Clusteriser:
    
    def __init__(self, num_superpixels, df, verbose):
        self.num_superpixels = num_superpixels
        self.df = df
        self.verbose = verbose

    def get_fixed_clusters(self, n_cluster):
        
        cluster = AgglomerativeClustering(n_clusters=n_cluster, linkage='complete', affinity='l1')
        results = cluster.fit(self.df)
        labels = results.labels_
        #metric = calinski_harabasz_score(self.df, labels)
        metric = silhouette_score(self.df, labels, metric='euclidean')

        return metric, labels
    
    def get_mutiple_clusters(self, limit=None):
        
        mean_superpixels = round(np.mean(self.num_superpixels))
        total_clusters = mean_superpixels
        
        if limit is not None:
            if limit < mean_superpixels:
                total_clusters = limit
       
        n_clusters = range(2, total_clusters)

        n_clusters_list = []
        metric_list = []

        for n_cluster in n_clusters:
            metric, _ = self.get_fixed_clusters(n_cluster)
            metric_list.append(metric)
            n_clusters_list.append(n_cluster)
            if self.verbose:
                print('n_cluster:', n_cluster, 'metric:', metric)

        return n_clusters_list, metric_list
        
    #def get_best_clusterisation(self):
    #    n_clusters_list, metric_list = self.get_mutiple_clusters()
    #    idx_best_metric = np.argmax(metric_list)
    #    best_n_clusters = n_clusters_list[idx_best_metric]

    #    _, labels = self.get_fixed_clusters(best_n_clusters)
    #    return np.array(labels)

In [None]:
cluster = Clusteriser(num_superpixels=list_num_superpixels, df=df_features, verbose=False)
n_clusters, metric_list = cluster.get_mutiple_clusters(limit=20)

In [None]:
plt.scatter(n_clusters, metric_list)

In [None]:
metric, new_segmentation = cluster.get_fixed_clusters(n_cluster=6)
print(new_segmentation)

In [None]:
# Pintar que cae en cada cluster

In [None]:
df_features_no_index = df_features.reset_index()
df_features_no_index.head()

In [None]:
unique_new_clusters = np.unique(new_segmentation)
image_manager = ImagesManager(location=path_to_images, images_extension=images_extension)
dict_comparison = {
    'image_name': [], 
    'new_cluster': [],
    'base_prediction':[], 
    'occluded_prediction':[]
}
for new_cluster in unique_new_clusters:
    idx = np.where(new_segmentation == new_cluster)
    df_filter = df_features_no_index.loc[idx]
    
    # Take all the pairs (image, superpixel)
    image_name_superpixel = df_filter[['image_name', 'superpixel_label']]
    
    # For each pair, perform an occlusion
    for index, row in df_filter.iterrows():
        
        # Take name and superpixel
        image_name = row['image_name']
        superpixel = row['superpixel_label']

        # get the image
        img = image_manager.get_image(image_name)
        mask = dict_masks[image_name]

        # Take the positions of that superpixel
        idx_superpixel = np.where(mask == superpixel)
        
        # Occlude the image
        img_occluded = img
        img_occluded[idx_superpixel] = 0

        # Predict the probability of being classified as dog
        img_occluded = np.expand_dims(img_occluded, axis=0)
        img_occluded = img_occluded/255
        occluded_prediction = classifier.predict(img_occluded)
        occluded_prediction = occluded_prediction[0][0]
        base_prediction = dict_predictions[image_name]
        
        # Create the output
        dict_comparison['image_name'].append(image_name)
        dict_comparison['new_cluster'].append(new_cluster)
        dict_comparison['base_prediction'].append(base_prediction)
        dict_comparison['occluded_prediction'].append(occluded_prediction)

In [None]:
df_after_occlusion = pd.DataFrame(dict_comparison)

In [None]:
df_after_occlusion