### A.12 Functions to Display Results of Clustering Algorithms

In [None]:
# ==================================================================================

def plot_clustering_result(data, ax=None, algo=None):
    
    """ 
    Plot a figure based on predicted labels of a clustering algorithm
    
    
    Parameter:
    ---------
    
    data:  a tuple
    
    The first element, data matrix, is a 2 dimensional array of shape 
    (n_samples, n_attributes) and the second element, predicted clusters labels, is a one dimensional 
    array of shape (n_samples) representing the cluster labels of individual points.
    
    
    ax: axis of the figure currently active 
    
    algo: a string
    
        It represents which algorithm was used to obtain the cluster labels.
    
    
    
    Return:
    ------
    
    None
    
    """
    
    # get the axis to plot the current graph
    ax = ax or plt.gca()
    
    X, predicted_label = data 
    
    cluster_labels = np.unique(predicted_label)
    n_cluster = len(cluster_labels)
    
    colors = np.array(list(islice(cycle(['DarkGreen', 'Orange', 'Blue', 'Brown', 'Teal', 
                                         'Indigo', 'Gold', 'Lime', 'Aqua', 'Red', 'Purple', 
                                         'LightGreen', 'DarkKhaki' , 'Maroon', 'MediumAquamarine', 
                                         'Magenta', 'DeepSkyBlue', 'Grey']),
                                  int(max(predicted_label) + 1))
                          ))
    
    mark2 = np.array(list(islice(cycle(mark), int(max(predicted_label) + 1))
                         ))
    
    # no sturcture
    if n_cluster==1:
        ax.scatter(X[:, 0], X[:, 1], marker= mark[-1], s=size, c='Grey', alpha= aph)
#         plt.xlabel('Feature 1')
#         plt.ylabel('Feature 2')
#         plt.xlim(-10, 10)
#         plt.ylim(-10, 10)
#         ax.xticks(())
#         ax.yticks(())
#         plt.title(name)
    else:       
        # plot clusters
        for k in cluster_labels:
            mask = predicted_label==k
            if k == -1:
                ax.scatter(X[:,0][mask], X[:,1][mask], 
                           c='Grey', 
                           marker= mark2[k], 
                           s=size,
                           alpha= aph,
                           label='c{}'.format(k)
                          )  
            else:
                ax.scatter(X[:,0][mask], X[:,1][mask], 
                           c=colors[k], 
                           marker= mark2[k], 
                           s=size, 
                           alpha= aph,
                           label='c{}'.format(k)
                          )                
        
#         plt.xlim(-10, 10)
#         plt.ylim(-10, 10)
#         ax.xticks(())
#         ax.yticks(())
#         plt.title(name)
#         plt.xlabel("Feature 1")
#         plt.ylabel("Feature 2")
# #         plt.legend(loc="best")
#     plt.text(.99, .01, ('%.2fs' % (execution_time)).lstrip('0'),
#                  transform=plt.gca().transAxes, size=15,
#                  horizontalalignment='right')
     
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title(algo)

    return None

# ==================================================================================

def plot_n_cluster_results(D, y_pred):
    """ 
    Plot results (cluster assignments) of clustering algorithms for a given dataset
    
    
    Parameters:
    ----------
    
    D  : a tuple
            
        The first element, X, is an array shape (n_samples, n_attributes) and the second
        element, y, is an array of shape (n_samples)
        
    y_pred: a array of tuple of two elements
    
        The first element is the name of the algorithms used to cluster data, X, and the second element 
        is a one dimensional array, consisting of the predicted cluser labels obtained from the algorithms
        
    Returns:
    -------
    
    None
    
    """
    X, y = D
    
    n_fig = len(y_pred)
    
    column = 5 # numbe of figures to be displayed in columns
    row = n_fig // column + 1
    
    width, height = 3, 3
    # get the current figure
    fig = plt.figure(figsize=(width*column, height*row))
    fig.subplots_adjust(left=.1, right=0.85, bottom=0.1, top=0.9, wspace=0.02, hspace=0.2)
    # get the axes of the subplots
    ax = fig.subplots(row, column, sharex=True, sharey=True)
    
    fig_count = 0
#     algo = ['kmean', 'agglomerative', 'dbscan', 'spectral c']
    # plot original dataset
    plot_clustering_result((X, y), ax[0, 0], 'Original Dataset')
    
    for r in range(row):
        for c in range(column):
            if r == 0 and c == 0:
                pass
            elif fig_count < n_fig:
                plot_clustering_result((X, y_pred[fig_count][1]), ax[r, c], y_pred[fig_count][0])
                fig_count += 1
    plt.show() 
    
    return None  


# ==================================================================================

def plot_scores(score_algos):
    """
    
    Plot average Silhouett scores computed using Euclidean, Manhattan, and Cosine metrics. 
    Parameter indices are along the x-axis represents a combination of parameters' values for 
    an algorithm) and y-axis holds Silhouett scores. 
    
    
    Parameter:
    ---------
    
    score_algos: a dictionary
    
        'keys' in the dictionary are the name of the clustering algorithms and corresponding 'values' 
        are the silhouette scores computed by selected algorithms   
    
    Return:
    ------
    
    None
    
    """
    # number of figures to be plotted; each figure for induvidual algorithms
    n_fig = len(score_algos)

    column = 4 # numbe of figures to be displayed in columns
    row = 2
    
    width, height = 3, 3
    # get the current figure
    fig = plt.figure(figsize=(width*column, height*row))
    fig.subplots_adjust(left=.125, right=0.9, bottom=0.1, top=0.9, wspace=0.35, hspace=0.35)
#     fig.subplots(row, column, sharex=True, sharey=True)
    fig_num = 1
    
   
    for key, value in score_algos.items():
        if fig_num <= n_fig:
            plt.subplot(row, column, fig_num)
            
            plt.plot(range(0, len(value[0])), value[0], 
                     '-rd', 
                     markersize=4, 
                     linewidth=1.5,
#                      markerfacecolor='white',
                     markeredgecolor='gray',
                     markeredgewidth=1, 
                     label='l2')
                
            plt.plot(range(0, len(value[1])), value[1], 
                     '-gd', 
                     markersize=4, 
                     linewidth=1.5,
#                      markerfacecolor='white',
                     markeredgecolor='gray',
                     markeredgewidth=1, 
                     label='l1')
            plt.plot(range(0, len(value[2])), value[2], 
                     '-bd', 
                     markersize=4, 
                     linewidth=1.5,
#                      markerfacecolor='white',
                     markeredgecolor='gray',
                     markeredgewidth=1, 
                     label='cos')
            
            plt.legend(loc='upper right')
            plt.title(key)
            plt.xlabel('Parameter Grid Index')
            plt.ylabel('Silhouette Average Score')
            
            fig_num +=1
            
    plt.show()
            
    return None