In [4]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import pdist
from scipy.cluster import hierarchy
from scipy.cluster.hierarchy import fcluster
from sklearn.metrics import silhouette_score


#Function that implements the k-means clustering algorithm
def kmeans_algorithm(data_frame):
    while True:
        try:
            n_clusters = input('Specify the number of clusters "k" for the k-means clustering algorithm: ')
            n_clusters = int(n_clusters)
            break
        except:
            print('Invalid input. Please try again')
    X = data_frame.iloc[:, :-1]
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(X)
    labels = kmeans.labels_
    inertia = kmeans.inertia_
    silhouette = round((silhouette_score(X, labels)*100),2)
    print(f"Silhouette Score: {silhouette}")

#Function that implements the Hierarchical clustering algorithm
def hierarchical_clustering(data, linkage='ward'):
    data = data_preprocess_for_the_clustering_algorithms(data.iloc[:, :-1])
    metrics = {
    "1": "euclidean",
    "2": "cityblock",
    "3": "minkowski",
    "4": "chebyshev" 
    }
    
    print("Available Distance Metrics:")
    print(tabulate(metrics.items(), headers=["Metric"], tablefmt="fancy_grid"))

    while True:
        try:
            metric_choice = input('Type the corresponding number of the metric you would like to use for clustering: ')
            metric_choice = int(metric_choice)
            if 1 <= metric_choice <= 4:  # Use inclusive range for valid options (1-4)
                break
            else:
                print('There are only 4 options. Please try again')
                continue
        except ValueError:
            print('Invalid input. Please enter a number between 1 and 4.')

    metric = metrics[str(metric_choice)]
            
    distance_matrix = pdist(data, metric=metric)
    cluster_result = hierarchy.linkage(distance_matrix, method=linkage)
    cluster_labels = fcluster(cluster_result, 2, criterion='maxclust')
    silhouette = round((silhouette_score(data, cluster_labels, metric=metric) * 100), 3)
    print(f"Silhouette Score: {silhouette}")