In [23]:
import numpy as np
from __future__ import division

In [33]:
#7.2.1

In [34]:
def Euclidean(x,y):
    """
    This function returns the Euclidean distance between two vectors
    of a Euclidean space.
    """
    xc = np.array(x)
    yc = np.array(y)
    return np.sqrt(np.dot(xc-yc,xc-yc))

In [35]:
# sqrt[(4-1)^2+(4-2)^2]
Euclidean(np.array([4,4]), np.array([1,2]))

3.605551275463989

In [36]:
Euclidean([1],[0])

1.0

In [37]:
def mean(x):
    """
    This function takes as input a lists of the clusters and outputs
    the overall average of these clusters. This output is stored as
    a tuple so that it can be used to access the cluster index.
    """
    N = len(x)
    n = len(x[0])
    sum_vec = np.zeros(n)
    for point in x:
        sum_vec += np.array(point)
    mean_vec = sum_vec / N
    return tuple(mean_vec)

# mean([[1],[2]])
# mean([[1,2],[3,4],[5,6]])

In [38]:
def agg_(clusters, print_summary = True):
    """
    This function takes as input a dictionary of clusters in 
    Euclidean space and returns the Agglomerative clustering. 
    The key of the dictionary is the centroid of the corresponding
    cluster.
    
    Note that the clustering agglomerative clustering is done in
    place with respect to the clusters list input.
    """
    step = 1
    while len(clusters) > 1:
#     while step < 3:
        # clusters hash table (use centroids as hash keys)
        clusters_ix = {el[0]:i for i,el in enumerate(clusters)}
        # double loop to consider the minimal distance between all pairs of clusters
        n = len(clusters)
        min_dist = 2**32-1
        c1 = None
        c2 = None
        for i in range(n-1):
            for j in range(i+1,n):
                # the distance between centroids of cluster i and cluster j
                distance_ij = Euclidean(clusters[i][0], clusters[j][0])
                if distance_ij < min_dist:
                    min_dist = distance_ij
                    c1 = clusters[i]
                    c2 = clusters[j]
        # merge the two clusters that result in minimum Euclidean distance
        new_cluster = c1[1] + c2[1]
        new_centroid = mean(new_cluster)
        clusters.append([new_centroid, new_cluster])
        # remove the merged clusters from the list 
        del clusters[max(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        del clusters[min(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        if print_summary:
            print ('Step %d:' % step)
            print ('Merged clusters: %s and %s' %(str(c1[1]),str(c2[1])))
            print ('Minimum distance: %f' % min_dist)
            print ('New clusters list:')
            print ([el[1] for el in clusters]) 
            print ('New centroids:')
            print ([el[0] for el in clusters])
            print ('')
            print ('--------------------------------------------------------')
            print( '')
        step += 1

In [39]:
# an array storing centroid and cluster
clusters = [[(i**2,), [[i**2]]] for i in range(1,10)]
clusters

[[(1,), [[1]]],
 [(4,), [[4]]],
 [(9,), [[9]]],
 [(16,), [[16]]],
 [(25,), [[25]]],
 [(36,), [[36]]],
 [(49,), [[49]]],
 [(64,), [[64]]],
 [(81,), [[81]]]]

In [40]:
agg_(clusters)

Step 1:
Merged clusters: [[1]] and [[4]]
Minimum distance: 3.000000
New clusters list:
[[[9]], [[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[1], [4]]]
New centroids:
[(9,), (16,), (25,), (36,), (49,), (64,), (81,), (2.5,)]

--------------------------------------------------------

Step 2:
Merged clusters: [[9]] and [[1], [4]]
Minimum distance: 6.500000
New clusters list:
[[[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[9], [1], [4]]]
New centroids:
[(16,), (25,), (36,), (49,), (64,), (81,), (4.666666666666667,)]

--------------------------------------------------------

Step 3:
Merged clusters: [[16]] and [[25]]
Minimum distance: 9.000000
New clusters list:
[[[36]], [[49]], [[64]], [[81]], [[9], [1], [4]], [[16], [25]]]
New centroids:
[(36,), (49,), (64,), (81,), (4.666666666666667,), (20.5,)]

--------------------------------------------------------

Step 4:
Merged clusters: [[36]] and [[49]]
Minimum distance: 13.000000
New clusters list:
[[[64]], [[81]], [[9], [1], [4]], [[16],

In [41]:
#final clusters list
clusters

[[(31.666666666666668,), [[9], [1], [4], [16], [25], [36], [49], [64], [81]]]]

In [42]:
#7.2.2

In [43]:
def mins(x,y):
    """
    This function takes as input two clusters of points (i.e. vectors) each of which 
    are represented by of their own individual lists. The output of this function
    is the minimum distance between any two points one from each cluster
    """
    nx = len(x)
    ny = len(y)
    running_min = 2**32 - 1
    for pt_x in x:
        for pt_y in y:
            if Euclidean(pt_x,pt_y) < running_min:
                running_min = Euclidean(pt_x,pt_y)
    return running_min

In [44]:
Euclidean([1],[3])

2.0

In [45]:
mins([[1]],[[3],[2],[0.5]])

0.5

In [47]:
def agg_(clusters, print_summary = True, dist = 'Euclidean'):
    """
    This function takes as input a dictionary of clusters in 
    Euclidean space and returns the Agglomerative clustering. 
    The key of the dictionary is the centroid of the corresponding
    cluster.
    
    Note that the clustering agglomerative clustering is done in
    place with respect to the clusters list input.
    """
    
    # specifying the distance function used
    if dist == 'Euclidean':
        f_dist = Euclidean
        r_ = 0
    if dist == 'mins':
        f_dist = mins
        r_ = 1
    step = 1    
    while len(clusters) > 1:
#     while step < 3:
        # clusters hash table (use centroids as hash keys)
        clusters_ix = {el[0]:i for i,el in enumerate(clusters)}
        # double loop to consider the minimal distance between all pairs of clusters
        n = len(clusters)
        min_dist = 2**32-1
        c1 = None
        c2 = None
        for i in range(n-1):
            for j in range(i+1,n):
                # the distance between centroids of cluster i and cluster j
                distance_ij = f_dist(clusters[i][r_], clusters[j][r_])
                if distance_ij < min_dist:
                    min_dist = distance_ij
                    c1 = clusters[i]
                    c2 = clusters[j]
        # merge the two clusters that result in minimum Euclidean distance
        new_cluster = c1[1] + c2[1]
        new_centroid = mean(new_cluster)
        clusters.append([new_centroid, new_cluster])
        # remove the merged clusters from the list 
        del clusters[max(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        del clusters[min(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        if print_summary:
            print ('Step %d:' % step)
            print ('Merged clusters: %s and %s' %(str(c1[1]),str(c2[1])))
            print ('Minimum distance: %f' % min_dist)
            print ('New clusters list:')
                   
            print ([el[1] for el in clusters])
            print ('New centroids:')
            print ([el[0] for el in clusters])
            print ('')
            print ('--------------------------------------------------------')
            print ('')
        step += 1

In [48]:
# an array storing centroid and cluster
clusters = [[(i**2,), [[i**2]]] for i in range(1,10)]
clusters

[[(1,), [[1]]],
 [(4,), [[4]]],
 [(9,), [[9]]],
 [(16,), [[16]]],
 [(25,), [[25]]],
 [(36,), [[36]]],
 [(49,), [[49]]],
 [(64,), [[64]]],
 [(81,), [[81]]]]

In [49]:
agg_(clusters, dist = 'mins')

Step 1:
Merged clusters: [[1]] and [[4]]
Minimum distance: 3.000000
New clusters list:
[[[9]], [[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[1], [4]]]
New centroids:
[(9,), (16,), (25,), (36,), (49,), (64,), (81,), (2.5,)]

--------------------------------------------------------

Step 2:
Merged clusters: [[9]] and [[1], [4]]
Minimum distance: 5.000000
New clusters list:
[[[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[9], [1], [4]]]
New centroids:
[(16,), (25,), (36,), (49,), (64,), (81,), (4.666666666666667,)]

--------------------------------------------------------

Step 3:
Merged clusters: [[16]] and [[9], [1], [4]]
Minimum distance: 7.000000
New clusters list:
[[[25]], [[36]], [[49]], [[64]], [[81]], [[16], [9], [1], [4]]]
New centroids:
[(25,), (36,), (49,), (64,), (81,), (7.5,)]

--------------------------------------------------------

Step 4:
Merged clusters: [[25]] and [[16], [9], [1], [4]]
Minimum distance: 9.000000
New clusters list:
[[[36]], [[49]], [[64]], [[81]], 

In [50]:
def avg(x,y):
    """
    This function takes as input two clusters of points (i.e. vectors) each of which 
    are represented by of their own individual lists. The output of this function
    is the average distance between any two points one from each of the two clusters.
    """
    nx = len(x)
    ny = len(y)
    running_sum = 0
    for pt_x in x:
        for pt_y in y:
            running_sum += Euclidean(pt_x,pt_y)
    return running_sum/(nx*ny) # total number of pairs is nx*ny (i.e., by multiplication rule)

In [52]:
def agg_(clusters, print_summary = True, dist = 'Euclidean'):
    """
    This function takes as input a dictionary of clusters in 
    Euclidean space and returns the Agglomerative clustering. 
    The key of the dictionary is the centroid of the corresponding
    cluster.
    
    Note that the clustering agglomerative clustering is done in
    place with respect to the clusters list input.
    """
    
    # specifying the distance function used
    if dist == 'Euclidean':
        f_dist = Euclidean
        r_ = 0 # index to be used in the argument
    if dist == 'mins':
        f_dist = mins
        r_ = 1
    if dist == 'avg':
        f_dist = avg
        r_ = 1
    
    # start main code to conduct clustering
    step = 1    
    while len(clusters) > 1:
#     while step < 3:
        # clusters hash table (use centroids as hash keys)
        clusters_ix = {el[0]:i for i,el in enumerate(clusters)}
        # double loop to consider the minimal distance between all pairs of clusters
        n = len(clusters)
        min_dist = 2**32-1
        c1 = None
        c2 = None
        for i in range(n-1):
            for j in range(i+1,n):
                # the distance between centroids of cluster i and cluster j
                distance_ij = f_dist(clusters[i][r_], clusters[j][r_])
                if distance_ij < min_dist:
                    min_dist = distance_ij
                    c1 = clusters[i]
                    c2 = clusters[j]
        # merge the two clusters that result in minimum Euclidean distance
        new_cluster = c1[1] + c2[1]
        new_centroid = mean(new_cluster)
        clusters.append([new_centroid, new_cluster])
        # remove the merged clusters from the list 
        del clusters[max(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        del clusters[min(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        if print_summary:
            print ('Step %d:' % step)
            print ('Merged clusters: %s and %s' %(str(c1[1]),str(c2[1])))
            print ('Minimum distance: %f' % min_dist)
            print ('New clusters list:')
            print ([el[1] for el in clusters]) 
            print ('New centroids:')
            print ([el[0] for el in clusters])
            print( '')
            print( '--------------------------------------------------------')
            print ('')
        step += 1
    
# Alternatively, can use np.mean to create the new centroid
# new_centroid = tuple(np.mean(np.array(new_cluster),axis=0))

In [53]:
# an array storing centroid and cluster
clusters = [[(i**2,), [[i**2]]] for i in range(1,10)]
clusters

[[(1,), [[1]]],
 [(4,), [[4]]],
 [(9,), [[9]]],
 [(16,), [[16]]],
 [(25,), [[25]]],
 [(36,), [[36]]],
 [(49,), [[49]]],
 [(64,), [[64]]],
 [(81,), [[81]]]]

In [54]:
agg_(clusters,dist='avg')

Step 1:
Merged clusters: [[1]] and [[4]]
Minimum distance: 3.000000
New clusters list:
[[[9]], [[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[1], [4]]]
New centroids:
[(9,), (16,), (25,), (36,), (49,), (64,), (81,), (2.5,)]

--------------------------------------------------------

Step 2:
Merged clusters: [[9]] and [[1], [4]]
Minimum distance: 6.500000
New clusters list:
[[[16]], [[25]], [[36]], [[49]], [[64]], [[81]], [[9], [1], [4]]]
New centroids:
[(16,), (25,), (36,), (49,), (64,), (81,), (4.666666666666667,)]

--------------------------------------------------------

Step 3:
Merged clusters: [[16]] and [[25]]
Minimum distance: 9.000000
New clusters list:
[[[36]], [[49]], [[64]], [[81]], [[9], [1], [4]], [[16], [25]]]
New centroids:
[(36,), (49,), (64,), (81,), (4.666666666666667,), (20.5,)]

--------------------------------------------------------

Step 4:
Merged clusters: [[36]] and [[49]]
Minimum distance: 13.000000
New clusters list:
[[[64]], [[81]], [[9], [1], [4]], [[16],

In [56]:
#7.2.3

In [57]:
clusters = [[(4,10),[[4,10]]], [(7,10),[[7,10]]], [(4,8),[[4,8]]],
           [(6,8),[[6,8]]],[(3,4),[[3,4]]],[(2,2),[[2,2]]],[(5,2),[[5,2]]],
           [(12,6),[[12,6]]],[(10,5),[[10,5]]],[(11,4),[[11,4]]],[(9,3),[[9,3]]],
           [(12,3),[[12,3]]]]

In [58]:
clusters

[[(4, 10), [[4, 10]]],
 [(7, 10), [[7, 10]]],
 [(4, 8), [[4, 8]]],
 [(6, 8), [[6, 8]]],
 [(3, 4), [[3, 4]]],
 [(2, 2), [[2, 2]]],
 [(5, 2), [[5, 2]]],
 [(12, 6), [[12, 6]]],
 [(10, 5), [[10, 5]]],
 [(11, 4), [[11, 4]]],
 [(9, 3), [[9, 3]]],
 [(12, 3), [[12, 3]]]]

In [59]:
agg_(clusters)

Step 1:
Merged clusters: [[10, 5]] and [[11, 4]]
Minimum distance: 1.414214
New clusters list:
[[[4, 10]], [[7, 10]], [[4, 8]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]]]
New centroids:
[(4, 10), (7, 10), (4, 8), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5)]

--------------------------------------------------------

Step 2:
Merged clusters: [[4, 10]] and [[4, 8]]
Minimum distance: 2.000000
New clusters list:
[[[7, 10]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]], [[4, 10], [4, 8]]]
New centroids:
[(7, 10), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5), (4.0, 9.0)]

--------------------------------------------------------

Step 3:
Merged clusters: [[12, 6]] and [[10, 5], [11, 4]]
Minimum distance: 2.121320
New clusters list:
[[[7, 10]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[9, 3]], [[12, 3]], [[4, 10], [4, 8]], [[12, 6], [10, 5], [11, 4]]]

In [60]:
mean([[9, 3], [12, 3], [12, 6], [10, 5], [11, 4], [4, 10], [4, 8], [7, 10], [6, 8], [5, 2], [3, 4], [2, 2]])

(7.083333333333333, 5.416666666666667)

In [61]:
a = np.array([[9, 3], [12, 3], [12, 6], [10, 5], [11, 4], [4, 10], [4, 8], [7, 10], [6, 8], [5, 2], [3, 4], [2, 2]])

In [62]:
np.mean(a, axis = 0)

array([7.08333333, 5.41666667])

In [63]:
def radius(x,y=[]):
    """
    This function takes as input two clusters of points (i.e. vectors) each of which 
    are represented by of their own individual lists. The output of this function
    is the radius of the custer which results from the merge of x and y.
    
    If the input is simply one cluster, then the output is the radius of that
    cluster.
    """
    nx = len(x)
    ny = len(y)
    # merge two clusters x and y
    merged_clus = x + y 
    # the centroid of the new merged cluster
    merged_cent = mean(merged_clus)
    # determine the radius of this merged cluster
    # radius is the maximum distance between all the points and the centroid
    radius = 0
    for pt in merged_clus:
        if Euclidean(pt,merged_cent) > radius:
            radius = Euclidean(pt,merged_cent)
    return radius

In [64]:
# checking this function with the result of Example 7.4
radius([[12,6]],[[10,5],[11,4],[12,3],[9,3]])

2.163330765278394

In [65]:
# can also use radius on a single cluster
radius([[12,6],[10,5],[11,4],[12,3],[9,3]])

2.163330765278394

In [67]:
def agg_(clusters, print_summary = True, dist = 'Euclidean'):
    """
    This function takes as input a dictionary of clusters in 
    Euclidean space and returns the Agglomerative clustering. 
    The key of the dictionary is the centroid of the corresponding
    cluster.
    
    Note that the clustering agglomerative clustering is done in
    place with respect to the clusters list input.
    """
    
    # specifying the distance function used
    if dist == 'Euclidean':
        f_dist = Euclidean
        r_ = 0 # index to be used in the argument
    if dist == 'mins':
        f_dist = mins
        r_ = 1
    if dist == 'avg':
        f_dist = avg
        r_ = 1
    if dist == 'radius':
        f_dist = radius
        r_ = 1
    
    # start main code to conduct clustering
    step = 1    
    while len(clusters) > 1:
#     while step < 3:
        # clusters hash table (use centroids as hash keys)
        clusters_ix = {el[0]:i for i,el in enumerate(clusters)}
        # double loop to consider the minimal distance between all pairs of clusters
        n = len(clusters)
        min_dist = 2**32-1
        c1 = None
        c2 = None
        for i in range(n-1):
            for j in range(i+1,n):
                # the distance between centroids of cluster i and cluster j
                distance_ij = f_dist(clusters[i][r_], clusters[j][r_])
                if distance_ij < min_dist:
                    min_dist = distance_ij
                    c1 = clusters[i]
                    c2 = clusters[j]
        # merge the two clusters that result in minimum Euclidean distance
        new_cluster = c1[1] + c2[1]
        new_centroid = mean(new_cluster)
        clusters.append([new_centroid, new_cluster])
        # remove the merged clusters from the list 
        del clusters[max(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        del clusters[min(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        if print_summary:
            print ('Step %d:' % step)
            print ('Merged clusters: %s and %s' %(str(c1[1]),str(c2[1])))
            print ('Minimum distance: %f' % min_dist)
            print ('New clusters list:')
            print( [el[1] for el in clusters] )
            print ('New centroids:')
            print ([el[0] for el in clusters])
            print ('')
            print( '--------------------------------------------------------')
            print ('')
        step += 1
    
# Alternatively, can use np.mean to create the new centroid
# new_centroid = tuple(np.mean(np.array(new_cluster),axis=0))

In [68]:
clusters = [[(4,10),[[4,10]]], [(7,10),[[7,10]]], [(4,8),[[4,8]]],
           [(6,8),[[6,8]]],[(3,4),[[3,4]]],[(2,2),[[2,2]]],[(5,2),[[5,2]]],
           [(12,6),[[12,6]]],[(10,5),[[10,5]]],[(11,4),[[11,4]]],[(9,3),[[9,3]]],
           [(12,3),[[12,3]]]]

In [69]:
agg_(clusters, dist = 'radius')

Step 1:
Merged clusters: [[10, 5]] and [[11, 4]]
Minimum distance: 0.707107
New clusters list:
[[[4, 10]], [[7, 10]], [[4, 8]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]]]
New centroids:
[(4, 10), (7, 10), (4, 8), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5)]

--------------------------------------------------------

Step 2:
Merged clusters: [[4, 10]] and [[4, 8]]
Minimum distance: 1.000000
New clusters list:
[[[7, 10]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]], [[4, 10], [4, 8]]]
New centroids:
[(7, 10), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5), (4.0, 9.0)]

--------------------------------------------------------

Step 3:
Merged clusters: [[7, 10]] and [[6, 8]]
Minimum distance: 1.118034
New clusters list:
[[[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]], [[4, 10], [4, 8]], [[7, 10], [6, 8]]]
New centr

In [70]:
def diameter(x,y=[]):
    """
    This function takes as input two clusters of points (i.e. vectors) each of which 
    are represented by of their own individual lists. The output of this function
    is the diameter of the merged custer of x and y.
    
    If the input is simply one cluster, then the output is the diameter of that
    cluster.
    """
    # merge two clusters x and y
    merged_clus = x + y 
    n = len(merged_clus)
    # determine the diameter of this merged cluster
    # diameter is the maximum distance between any two points of the cluster
    diameter = 0
    for i in range(n-1):
        for j in range(i+1,n):
            distance_ij = Euclidean(merged_clus[i],merged_clus[j])
            if distance_ij > diameter:
                diameter = distance_ij
    return diameter

In [71]:
# checking result with Example 7.4
diameter([[12,6]],[[10,5],[11,4],[12,3],[9,3]])

4.242640687119285

In [72]:
# using only one argument
diameter([[12,6],[10,5],[11,4],[12,3],[9,3]])

4.242640687119285

In [74]:
def agg_(clusters, print_summary = True, dist = 'Euclidean'):
    """
    This function takes as input a dictionary of clusters in 
    Euclidean space and returns the Agglomerative clustering. 
    The key of the dictionary is the centroid of the corresponding
    cluster.
    
    Note that the clustering agglomerative clustering is done in
    place with respect to the clusters list input.
    """
    
    # specifying the distance function used
    # r_ = 0 implies we consider centroids of the two clusters in merge step
    # r_ = 1 means that we consider the points of the two clusters themselves in merge step
    if dist == 'Euclidean':
        f_dist = Euclidean
        r_ = 0 
    if dist == 'mins':
        f_dist = mins
        r_ = 1 
    if dist == 'avg':
        f_dist = avg
        r_ = 1
    if dist == 'radius':
        f_dist = radius
        r_ = 1
    if dist == 'diameter':
        f_dist = diameter
        r_ = 1
    
    # start main code to conduct clustering
    step = 1    
    while len(clusters) > 1:
#     while step < 3:
        # clusters hash table (use centroids as hash keys)
        clusters_ix = {el[0]:i for i,el in enumerate(clusters)}
        # double loop to consider the minimal distance between all pairs of clusters
        n = len(clusters)
        min_dist = 2**32-1
        c1 = None
        c2 = None
        for i in range(n-1):
            for j in range(i+1,n):
                # the distance between centroids of cluster i and cluster j
                distance_ij = f_dist(clusters[i][r_], clusters[j][r_])
                if distance_ij < min_dist:
                    min_dist = distance_ij
                    c1 = clusters[i]
                    c2 = clusters[j]
        # merge the two clusters that result in minimum Euclidean distance
        new_cluster = c1[1] + c2[1]
        new_centroid = mean(new_cluster)
        clusters.append([new_centroid, new_cluster])
        # remove the merged clusters from the list 
        del clusters[max(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        del clusters[min(clusters_ix[c1[0]],clusters_ix[c2[0]])]
        if print_summary:
            print ('Step %d:' % step)
            print ('Merged clusters: %s and %s' %(str(c1[1]),str(c2[1])))
            print ('Minimum distance: %f' % min_dist)
            print ('New clusters list:')
            print ([el[1] for el in clusters]) 
            print ('New centroids:')
            print ([el[0] for el in clusters])
            print ('')
            print ('--------------------------------------------------------')
            print ('')
        step += 1
    
# Alternatively, can use np.mean to create the new centroid
# new_centroid = tuple(np.mean(np.array(new_cluster),axis=0))

In [75]:
clusters = [[(4,10),[[4,10]]], [(7,10),[[7,10]]], [(4,8),[[4,8]]],
           [(6,8),[[6,8]]],[(3,4),[[3,4]]],[(2,2),[[2,2]]],[(5,2),[[5,2]]],
           [(12,6),[[12,6]]],[(10,5),[[10,5]]],[(11,4),[[11,4]]],[(9,3),[[9,3]]],
           [(12,3),[[12,3]]]]

In [76]:
agg_(clusters, dist = 'diameter')

Step 1:
Merged clusters: [[10, 5]] and [[11, 4]]
Minimum distance: 1.414214
New clusters list:
[[[4, 10]], [[7, 10]], [[4, 8]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]]]
New centroids:
[(4, 10), (7, 10), (4, 8), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5)]

--------------------------------------------------------

Step 2:
Merged clusters: [[4, 10]] and [[4, 8]]
Minimum distance: 2.000000
New clusters list:
[[[7, 10]], [[6, 8]], [[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]], [[4, 10], [4, 8]]]
New centroids:
[(7, 10), (6, 8), (3, 4), (2, 2), (5, 2), (12, 6), (9, 3), (12, 3), (10.5, 4.5), (4.0, 9.0)]

--------------------------------------------------------

Step 3:
Merged clusters: [[7, 10]] and [[6, 8]]
Minimum distance: 2.236068
New clusters list:
[[[3, 4]], [[2, 2]], [[5, 2]], [[12, 6]], [[9, 3]], [[12, 3]], [[10, 5], [11, 4]], [[4, 10], [4, 8]], [[7, 10], [6, 8]]]
New centr