In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as mplcm
import random

In [None]:
def confusion_matrix_fn(label_hat,label,n_classes,n_clusters):

    s1=n_classes
    s2=n_clusters
    confusion_matrix=np.zeros((s1,s2))
    for L in range (s1):
        for P in range (s2):
            confusion_matrix[L,P]=np.sum( (label==L) & (label_hat==P) )
    
        
    return confusion_matrix

In [None]:
def accuracy(n_clusters, confusion_matrix):
    sumP=0
    sumN=0

    for c in range(n_clusters):
        mostCommom=np.max(confusion_matrix[:,c])
        sumP+=mostCommom
        sumN+=np.sum(confusion_matrix[:,c])-mostCommom
    accuracy=sumP/np.sum(confusion_matrix)  
    err=sumN/np.sum(confusion_matrix) 
    
    return accuracy, err

In [None]:
def plot_cm(confusion_matrix,arq=None):
    import seaborn as sns



    sns.set(font_scale=3)
    plt.figure(figsize=(32, 14))
    ax=sns.heatmap(confusion_matrix.astype(int), annot=True, fmt="d", annot_kws={"size": 25, 'rotation': 0});

    plt.title("Confusion matrix", fontsize=30)
    plt.ylabel('True label', fontsize=25)
    plt.xlabel('Clustering label', fontsize=25)
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    
    if not arq==None:
        plt.savefig(arq,bbox_inches="tight")
    
    plt.show()
    
    plt.close(fig=None)

In [None]:
def silhouette_plot(y,silhouette_avg, sample_silhouette_values, n_clusters, arq=None, ax=None):
    from sklearn.metrics import silhouette_samples, silhouette_score

    if ax is None:
        ax = plt.gca()

    

    y_lower = padding = 2
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        ith_cluster_silhouette_values = sample_silhouette_values[y == i]
        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = mplcm.nipy_spectral(float(i) / n_clusters)
        ax.fill_betweenx(np.arange(y_lower, y_upper),
                         0,
                         ith_cluster_silhouette_values,
                         facecolor=color,
                         edgecolor=color,
                         alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
    #    ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i + 1))

        # Compute the new y_lower for next plot
        y_lower = y_upper + padding

    ax.set_xlabel("The silhouette coefficient values", fontsize=15)
    ax.set_ylabel("Cluster label", fontsize=15)

   # # The vertical line for average silhoutte score of all the values
   # ax.axvline(x=silhouette_avg, c='r', alpha=0.8, lw=0.8, ls='-')
   # ax.annotate('Average',
   #             xytext=(silhouette_avg, y_lower * 1.025),
   #             xy=(0, 0),
   #             ha='center',
   #             alpha=0.8,
   #             c='r')

    ax.set_yticks([])  # Clear the yaxis labels / ticks
    #ax.set_xticks([-1, -0.5, 0 , 0.5, 1])
    ax.set_ylim(0, y_upper + 1)
    #ax.set_xlim(-1, 1.0)
    ax.tick_params( axis="x", labelsize=15) 
    
    if not arq==None:
        plt.savefig(arq,bbox_inches="tight")
    
    
    
    return ax

In [None]:
def generate_indexes_epoch(n_clusters, ind_y):
    
    size_clusters = np.array([ np.sum(ind_y==k) for k in np.arange(n_clusters) ])
    
    min_c = ind_y.shape[0]
    for i in range(n_clusters):
        
        if min_c > size_clusters[i] and not(size_clusters[i]==0):
            min_c = size_clusters[i]
        
    
    
    #size_per_pseudolabel = min_c
    size_per_pseudolabel = int(len(ind_y)/n_clusters)
    
    
    res = np.array([])
    
    ind_img = np.arange(len(ind_y))

    for i in range(n_clusters):
        # skip empty clusters
        if size_clusters[i] == 0:
                continue
        indexes = np.random.choice(
            ind_img[ind_y==i],
            size_per_pseudolabel,
            replace=(len(ind_img[ind_y==i]) <= size_per_pseudolabel)
        )
        res = np.concatenate((res, indexes))
        
    
    np.random.shuffle(res)
    #if len(res) >= self.N:
     #   return res[:self.N]
    
        
    return res

In [None]:
def find_corresponded_cluster (y_pred_kmeans, last_y_pred_kmeans, n_clusters):
           
        
    c = np.arange(n_clusters)
    random.shuffle(c)
    c_corresponded = np.squeeze(np.ones((1,n_clusters))*(-1)).astype(int)
    y_pred_kmeans_new = np.copy(y_pred_kmeans)
    
    
        
    for i in range(n_clusters):
        
                
        indexes_img_in_cluster =  np.squeeze(np.argwhere( y_pred_kmeans == c[i] ))
        
        
        
        size_per_cluster = np.zeros((n_clusters))
        for j in range(n_clusters):
            
            last_indexes_img_in_cluster =  np.squeeze(np.argwhere( last_y_pred_kmeans == j )) 
            size_per_cluster[j] =  len(np.array([ ind for ind in indexes_img_in_cluster if ind in last_indexes_img_in_cluster ]))
            
        
        size_clusters = np.argsort(size_per_cluster)[::-1]    
        
        
        for j in range(n_clusters):
                        
            if not(size_clusters[j] in c_corresponded):
                
                c_corresponded[c[i]] = size_clusters[j] 
                y_pred_kmeans_new[indexes_img_in_cluster] = size_clusters[j]
                
                break
                
        
   
        
               
    
    return y_pred_kmeans_new