In [1]:
import numpy as np 
from scipy.spatial.distance import cdist

In [2]:
def medoids_init(X,k):
    return X[np.random.choice(X.shape[0],k, replace=False)]

In [15]:
def distance(X, medoids):
    m = len(X)
    medoids_shape = medoids.shape
    
    if len(medoids_shape)==1:
        medoids = medoids.reshape((1, len(medoids)))
    k= len(medoids)
    
    S = np.empty((m,k))
    
    for i in range(m):
        d_i = np.linalg.norm(X[i,:]-medoids, axis = 1)

        S[i,:] = d_i
        return S


    
    

In [16]:
def assign_labels(D):
    return np.argmin(D, axis =1)

In [17]:
def update_medoids(X,medoids):
    D= distance(X, medoids)
    labels = assign_labels(D)
    
    out_medoids = medoids
    
    for i in set(labels):
        avg_dissimilarity = np.sum(distance(X,medoids[i]))
        cluster_points = X[labels ==i]
        for datap in cluster_points:
            new_medoid = datap
            new_dissimilarity = np.sum(distance(X, datap))
            if new_dissimilarity < avg_dissimilarity:
                avg_dissimilarity = new_dissimilarity
                
                out_medoids[i] = datap
                
    return out_medoids
                
                

In [18]:
def has_converged(old_medoids, medoids):
    return set([tuple(x) for x in old_medoids]) == set([tuple(x) for x in medoids])

In [19]:
def kmedoids(X, k, max_steps = np.inf):
    
    medoids = medoids_init(X,k)
    
    converged = False
    
    labels = np.zeros(len(X))
    
    i = 1
    
    while (not converged) and (i <= max_steps):
        
        old_medoids = medoids.copy()
        
        D = distance(X,medoids)
        
        labels = assign_labels(D)
        
        medoids = update_medoids(X, medoids)
        
        converged = has_converged(old_medoids, medoids)
        
        i+=1
        
    return (medoids, labels)
    

In [20]:
from skimage import io
import numpy as np

In [21]:
image = io.imread('football.bmp')

In [22]:
rows = image.shape[0]
cols = image.shape[1]
X = image.reshape(image.shape[0]*image.shape[1],3)
k = 5

In [23]:
initial_medoids = medoids_init(X,k)

In [24]:
initial_medoids

Array([[ 56,  43,  35],
       [ 86,  93,  99],
       [196, 180, 165],
       [108, 121, 153],
       [233, 235, 234]], dtype=uint8)

In [25]:
D = distance(X,initial_medoids)

In [26]:
D.shape

(255440, 5)

In [27]:
labels = assign_labels(D)

In [None]:
(medoids, labels)= kmedoids(X,5)