### A.10 Function to Run Clustering Algorithms on a Dataset

In [1]:
# ==================================================================================

def run_algorithms(dataset, params):
    """
    Return cluster labels and performance metrics for clustering algorithms
    
    
    Parameters:
    ----------
    
    dataset: a tuple 
    
    The first element is an array, data matrix X, shape (n_samples, n_attributes) and the 
    second element is true labels, y, shape (n_samples)
    
    params:  a dictionary
        It contains the optimal parameters of all the considered algorithms
    
    
    
    Returns:
    -------
    
    result_y_pred, result_metrics: a tuple of two elements
    
        The first and the second elements of the tuple respectively contain predicted labels 
        and performance metric for all the considered clustering algorithms  
    
    """
    result_metrics = []
    result_y_pred = []
    
    # unpack dataset
    X, y_true = dataset
    
#     t0 = time.time()
    #### Kmean #####
    y_pred_km = get_cluster_label_kmean(X, params['kmean'])
    result_metrics.append(('kmean', get_performance_metrics(X, y_true, y_pred_km)))
    result_y_pred.append(('kmean',y_pred_km))
#     t1 = time.time()
  
    

#     #### DBSCAN #####

    y_pred_dbscan = get_cluster_label_dbscan(X, params['dbscan'])
    result_metrics.append(('dbscan', get_performance_metrics(X, y_true, y_pred_dbscan)))
    result_y_pred.append(('dbscan', y_pred_dbscan))
#     t7 = time.time()
    
   
    #### Spectral clustering ##### 30th May
# LinAlgError: 3-th leading minor of the array is not positive definite

    y_pred_sc = get_cluster_label_sc(X, params['spectral_c'])  
    result_metrics.append(('spectral_c', get_performance_metrics(X, y_true, y_pred_sc)))
    result_y_pred.append(('spectral_c', y_pred_sc))
#     t8 = time.time()
    
    #### Gaussina Mixture Models ##### 1st June
 
    y_pred_gmm = get_cluster_label_gmm(X, params['gmm']) 
    result_metrics.append(('gmm', get_performance_metrics(X, y_true, y_pred_gmm)))
    result_y_pred.append(('gmm', y_pred_gmm))
#     t9 = time.time()

#### Agglomerative- single #####

    y_pred_agglo_single = get_cluster_label_agglo(X, params['agglo-single'])
    result_metrics.append(('agglo-single', get_performance_metrics(X, y_true, y_pred_agglo_single)))
    result_y_pred.append(('agglo-single',y_pred_agglo_single))
#     t3 = time.time() 
    
    #### Agglomerative- complete #####

    y_pred_agglo_complete = get_cluster_label_agglo(X, params['agglo-complete'])
    result_metrics.append(('agglo-complete', get_performance_metrics(X, y_true, y_pred_agglo_complete)))
    result_y_pred.append(('agglo-complete',y_pred_agglo_complete))
#     t4 = time.time() 
    
    
    #### Agglomerative- average #####
 
    y_pred_agglo_average = get_cluster_label_agglo(X, params['agglo-average'])
    result_metrics.append(('agglo-average', get_performance_metrics(X, y_true, y_pred_agglo_average)))
    result_y_pred.append(('agglo-average',y_pred_agglo_average))
#     t5 = time.time() 
    
    
    #### Agglomerative-ward #####

    y_pred_agglo_ward = get_cluster_label_agglo(X, params['agglo-ward'])
    result_metrics.append(('agglo-ward', get_performance_metrics(X, y_true, y_pred_agglo_ward)))
    result_y_pred.append(('agglo-ward',y_pred_agglo_ward))
#     t6 = time.time() 
    
#     clustering_time = {'kmean': np.round(t1-t0,4), 
# #                        'agglo': np.round(t2-t1,4),
#                        'dbscan': np.round(t7-t6,4),
#                        'spectral_c':np.round(t8-t7,4),
#                        'gmm': np.round(t9-t8,4),
#                        'agglo-single': np.round(t3-t1,4),
#                        'agglo-complete': np.round(t4-t3,4),
#                        'agglo-average': np.round(t5-t4,4),
#                        'agglo-ward': np.round(t6-t5,4),
#                        'total time': np.round(t9-t0,4) }
    
#     return result_y_pred, result_metrics , clustering_time
    return result_y_pred, result_metrics