Name: Ralph Matthew M. Gobui


# K-Modes Clustering

In [1]:
import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import adjusted_rand_score as ARI, normalized_mutual_info_score as NMI
from kmodes.kmodes import KModes
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import pdist
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import NMF
from sklearn.cluster import SpectralClustering
import tensorflow as tf
from tensorflow.keras import layers, Sequential
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClusterMixin
from sklearn.utils import check_array
import itertools
import requests
import random
from itertools import combinations
from io import StringIO
from sklearn.manifold import SpectralEmbedding, TSNE

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 280)

In [3]:
dataset_sources = {
    "Soybean": "https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data",
    "Zoo": "https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.data",
    "Heart Disease": "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data",
    "Breast Cancer": "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data",
    "Dermatology": "https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data",
    "Letters (E, F)": "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data",
    "DNA": "https://archive.ics.uci.edu/ml/machine-learning-databases/molecular-biology/splice-junction-gene-sequences/splice.data",
    "Mushroom": "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data",
    "Iris": "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
}

In [4]:
n_clusters_dict = {
    "Soybean": 4,
    "Zoo": 7,
    "Heart Disease": 2,
    "Breast Cancer": 2,
    "Dermatology": 6,
    "Letters (E, F)": 2,
    "DNA": 3,
    "Mushroom": 2,
    "Iris": 3
}

In [5]:
datasets_for_ensemble = [
    "Iris"
]

In [6]:
# No. of runs for benchmarking
num_runs = 5

In [7]:
# No. of k-means runs for clustering ensemble
n_runs = 60

In [8]:
dataframes = {}

for name, source in dataset_sources.items():
    response = requests.get(source)
    data = response.text
    
    # Convert the CSV/Text data into a DataFrame
    data_io = StringIO(data)
    df = pd.read_csv(data_io, header=None)

    # Set targets and features
    if name == "Letters (E, F)":
        y = df.iloc[:, 0]
        X = df.iloc[:, 1:]
    elif name == "Mushroom":
        y = df.iloc[:, 0]
        X = df.iloc[:, 1:]
    elif name == "DNA":
        y = df.iloc[:, 0].str.strip()
        X = pd.DataFrame([list(seq.strip()) for seq in df.iloc[:, 2]])
    else:
        X, y = df.iloc[:, :-1], df.iloc[:, -1]

    # Drop columns with only 1 unique value
    for col in X.columns:
        if len(X[col].unique()) <= 1:
            X.drop(columns=[col], inplace=True) # Diregard warning as it is behaving as expected

    # Store in the dataframes dictionary
    dataframes[name] = {'features': X, 'targets': y}

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [9]:
def preprocess_datasets(dataframes):
    if 'Zoo' in dataframes:
        zoo_df = dataframes['Zoo']['features']
        zoo_df = zoo_df.drop(columns=[0])
        dataframes['Zoo']['features'] = zoo_df

    if 'Heart Disease' in dataframes:
        hd_df = dataframes['Heart Disease']['features']
        columns_to_drop = [0, 3, 4, 7, 9]
        hd_df = hd_df.drop(columns=hd_df.columns[columns_to_drop])
        dataframes['Heart Disease']['features'] = hd_df
        y_hd = dataframes['Heart Disease']['targets']
        dataframes['Heart Disease']['targets'] = y_hd.apply(lambda x: 0 if x == 0 else 1)
    
    if 'Breast Cancer' in dataframes:
        bc_df = dataframes['Breast Cancer']['features']
        bc_df = bc_df.drop(columns=bc_df.columns[0])
        dataframes['Breast Cancer']['features'] = bc_df
    
    if 'Dermatology' in dataframes:
        dt_df = dataframes['Dermatology']['features']
        dt_df = dt_df.drop(columns=dt_df.columns[-1])
        dataframes['Dermatology']['features'] = dt_df

    if 'Letters (E, F)' in dataframes:
        let_ef_df = dataframes['Letters (E, F)']['features']
        let_ef_targets = dataframes['Letters (E, F)']['targets']
        mask = let_ef_targets.isin(['E', 'F'])
        dataframes['Letters (E, F)']['features'] = let_ef_df[mask]
        dataframes['Letters (E, F)']['targets'] = let_ef_targets[mask]

    return dataframes

In [10]:
dataframes = preprocess_datasets(dataframes)

In [11]:
def run_multiple_kmeans(features, n_clusters, n_runs):
    all_labels = []
    for i in range(n_runs):
        random_state = random.randint(0, 1000)
        kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=random_state, n_init=10)
        labels = kmeans.fit_predict(features)
        all_labels.append(labels)
    return np.array(all_labels).T

In [12]:
for dataset_name in datasets_for_ensemble:
    # Extracting features and targets from the preloaded datasets
    features = dataframes[dataset_name]["features"]
    targets = dataframes[dataset_name]["targets"]

    # Converting targets to numerical labels if they aren't already
    if targets.dtype.kind in 'O':  # Check if targets are object type (e.g., strings)
        targets = LabelEncoder().fit_transform(targets)

    # Determine the number of clusters from the unique elements in targets
    n_clusters = len(np.unique(targets))

    # Run multiple k-means and collect results
    ensembled_features = run_multiple_kmeans(features, n_clusters, n_runs)
    
    # Convert numpy array to DataFrame and replace the original data
    ensembled_features_df = pd.DataFrame(ensembled_features, index=features.index)
    dataframes[dataset_name]["features"] = ensembled_features_df







In [13]:
def perform_kmodes(features, n_clusters):
    """Perform clustering using KModes algorithm."""
    km = KModes(n_clusters=n_clusters, init='random', n_init=5)
    clusters = km.fit_predict(features)
    return clusters

In [14]:
def perform_ordinal_encoding(features, true_labels, n_clusters):
    """Perform Ordinal Encoding followed by clustering."""
    encoder = LabelEncoder()
    features_encoded = features.apply(encoder.fit_transform)
    kmeans = KMeans(n_clusters=n_clusters, n_init=10)
    return kmeans.fit_predict(features_encoded, n_clusters)

In [15]:
def perform_one_hot_encoding(features, true_labels, n_clusters):
    """Perform One-Hot Encoding followed by clustering."""
    encoder = OneHotEncoder()
    features_encoded = encoder.fit_transform(features).toarray()
    kmeans = KMeans(n_clusters=n_clusters, n_init=10)
    return kmeans.fit_predict(features_encoded, n_clusters)

In [16]:
def perform_link(features, n_clusters):
    encoder = OneHotEncoder()
    features_encoded = encoder.fit_transform(features).toarray()
    # Calculate pairwise dissimilarities (1 - similarity), ensuring non-negative distances
    ochiai_distance = pdist(features_encoded, lambda u, v: max(0, 1 - ochiai_coefficient_for_link(u, v)))
    link_matrix = linkage(ochiai_distance, method='average')
    clusters = fcluster(link_matrix, t=n_clusters, criterion='maxclust')
    return clusters

In [17]:
def ochiai_coefficient_for_link(b1, b2):
    intersection = np.dot(b1, b2)
    norm_b1 = np.sqrt(np.dot(b1, b1))
    norm_b2 = np.sqrt(np.dot(b2, b2))
    denominator = (norm_b1 * norm_b2)
    if denominator == 0:
        return 0  # Return 0 if either or both vectors are all zeros
    return intersection / denominator

In [18]:
def perform_cde(features, n_clusters):
    """Perform Categorical Data Embedding and clustering using t-SNE and k-Means."""
    encoder = OneHotEncoder(sparse_output=False)
    features_encoded = encoder.fit_transform(features)
    tsne_model = TSNE(n_components=2, perplexity=30, learning_rate=200)
    features_embedded = tsne_model.fit_transform(features_encoded)
    kmeans = KMeans(n_clusters=n_clusters, n_init=10)
    clusters = kmeans.fit_predict(features_embedded)
    return clusters

In [19]:
def perform_cdc_dr(features, n_clusters, embedding_method='SE', operation='Joint'):
    """
    Perform CDC_DR algorithm with specified graph embedding method and operation.
    :param features: DataFrame of features
    :param n_clusters: Number of clusters to form
    :param embedding_method: 'NE', 'SE', 'NMF', or 'AE'
    :param operation: 'Joint' or 'Mean'
    :return: clusters - Cluster labels for each sample
    """
    
    # Construct similarity graph from features
    graph = construct_similarity_graph(features)

    # Apply graph embedding technique
    embedded_graph = graph_embedding(graph, method=embedding_method)

    value_to_index = create_value_to_index_mapping(features)
    
    # Ensure integrated_data is 2D before clustering
    if operation == 'Joint':
        integrated_data = joint_operation(embedded_graph, features, value_to_index)
    elif operation == 'Mean':
        integrated_data = mean_operation(embedded_graph, features, value_to_index)
    else:
        raise ValueError("Operation must be either 'Joint' or 'Mean'.")

    # Cluster the integrated data
    kmeans = KMeans(n_clusters=n_clusters, n_init=10)
    clusters = kmeans.fit_predict(integrated_data)

    return clusters

In [20]:
def construct_similarity_graph(features):
    """
    Construct a similarity graph from features based on categorical values.
    :param features: DataFrame of features, each row is a sample and columns are categorical features
    :return: graph - A NetworkX graph with nodes representing categorical values and weighted edges
    """
    # Step 1: Prepare all unique categorical values and their indices
    unique_values_dict = {}
    for column in features:
        unique_values = np.unique(features[column])
        for val in unique_values:
            unique_values_dict[f"{column}_{val}"] = np.where(features[column] == val)[0]
    
    # Step 2: Calculate similarity between all pairs of unique categorical values
    graph = nx.Graph()
    for (val1, indices1), (val2, indices2) in combinations(unique_values_dict.items(), 2):
        # Calculate similarity (e.g., using Ochiai coefficient)
        sim = ochiai_coefficient(indices1, indices2)  # Define this function based on your chosen similarity metric
        if sim > 0:  # If the similarity is non-zero, add an edge
            graph.add_edge(val1, val2, weight=sim)
    
    # Add all nodes explicitly in case some have no edges
    for val in unique_values_dict.keys():
        if val not in graph:
            graph.add_node(val)

    return graph

In [21]:
def ochiai_coefficient(indices1, indices2):
    """
    Calculate Ochiai coefficient between two sets of indices
    :param indices1: array-like list of indices for the first categorical value
    :param indices2: array-like list of indices for the second categorical value
    :return: Ochiai coefficient as float
    """
    set1 = set(indices1)
    set2 = set(indices2)
    intersection = len(set1.intersection(set2))
    if intersection == 0: return 0  # No overlap
    return intersection / np.sqrt(len(set1) * len(set2))  # Ochiai coefficient formula

In [22]:
def graph_embedding(graph, method='SE', dimensions=2):
    """
    Apply graph embedding method to the constructed graph.
    :param graph: NetworkX graph
    :param method: string representing the graph embedding method: 'NE', 'SE', 'NMF', 'AE'
    :param dimensions: the number of dimensions for the embedding
    :return: embedded_graph - An array-like embedded representation of the graph
    """
#     # Convert graph to adjacency matrix and then to numpy ndarray
#     adjacency_matrix = nx.to_numpy_matrix(graph)
#     adjacency_matrix = np.asarray(adjacency_matrix)

    # Convert graph to adjacency matrix and then to numpy ndarray
    adjacency_matrix = nx.convert_matrix.to_numpy_array(graph)
    adjacency_matrix = np.asarray(adjacency_matrix)
    
    if method == 'NE':
        # Directly use the adjacency matrix as features (no embedding)
        embedded_graph = adjacency_matrix

    elif method == 'SE':
        # Apply Spectral Embedding
        embedding_model = SpectralEmbedding(n_components=dimensions)
        embedded_graph = embedding_model.fit_transform(adjacency_matrix)

    elif method == 'NMF':
        # Apply Non-negative Matrix Factorization for embedding
        model = NMF(n_components=dimensions, init='random', max_iter=10000)
        embedded_graph = model.fit_transform(adjacency_matrix)

    elif method == 'AE':
        # Apply Autoencoder for graph embedding
        n_nodes = adjacency_matrix.shape[0]
        input_dim = n_nodes
        autoencoder = Sequential([
            layers.Dense(64, activation='relu', input_shape=(input_dim,)),
            layers.Dense(dimensions, activation='relu'),  # Embedding layer
            layers.Dense(64, activation='relu'),
            layers.Dense(input_dim, activation='sigmoid')
        ])
        autoencoder.compile(optimizer='adam', loss='mse')
        adjacency_matrix_norm = adjacency_matrix / np.max(adjacency_matrix)  # Normalize adjacency matrix
        autoencoder.fit(adjacency_matrix_norm, adjacency_matrix_norm, epochs=50, verbose=0)
        encoder = Sequential(autoencoder.layers[:2])  # The first two layers are the encoder
        embedded_graph = encoder.predict(adjacency_matrix_norm, verbose=0)

    else:
        raise NotImplementedError(f"Graph embedding method {method} is not implemented.")
    
    return np.array(embedded_graph)

In [23]:
def create_value_to_index_mapping(features):
    """
    Create a mapping from each unique categorical value to a unique index.
    :param features: DataFrame of features, each column is a categorical feature
    :return: Dictionary of value to index mapping
    """
    # Extracting unique values from each feature
    unique_values = set()
    for column in features.columns:
        unique_values.update(features[column].unique())

    # Creating a mapping from unique values to an index
    value_to_index = {value: idx for idx, value in enumerate(unique_values)}
    return value_to_index

In [24]:
def joint_operation(embedded_graph, features, value_to_index):
    # Concatenates the embeddings for each categorical value in each sample
    joint_embedded = []
    for _, row in features.iterrows():
        joint_vector = []
        for value in row:
            index = value_to_index[value]  # Map each categorical value to its index in the embedded graph
            joint_vector.extend(embedded_graph[index])
        joint_embedded.append(joint_vector)
    return np.array(joint_embedded)

In [25]:
def mean_operation(embedded_graph, features, value_to_index):
    # Calculates the mean of the embeddings for each categorical value in each sample
    mean_embedded = []
    for _, row in features.iterrows():
        vectors = [embedded_graph[value_to_index[value]] for value in row]
        mean_vector = np.mean(vectors, axis=0)
        mean_embedded.append(mean_vector)
    return np.array(mean_embedded)

In [26]:
def run_clustering_algorithms(dataframes, n_clusters_dict, num_runs=10):
    results_list = []
    for name, data in dataframes.items():
        print("Processing:", name)
        features = data['features']
        true_labels = data['targets'].squeeze()  # Assuming targets are in a single column
        n_clusters = n_clusters_dict.get(name, 2)  # Default to 2 clusters if not specified

        metrics = {'KModes': [], 'Ordinal': [], 'One-Hot': [], 'Link': [], 'CDE': []}  # Initialize a dictionary to store results for each method

        # Include CDC_DR methods in metrics dictionary
        embedding_methods = ['NE', 'SE', 'NMF', 'AE']  # Non-Embedding, Spectral Embedding, Nonnegative Matrix Factorization, Autoencoder
        operations = ['Joint', 'Mean']  # The two types of operations
        
        for em in embedding_methods:
            for op in operations:
                key_name = f"CDC_DR+{em} ({op})"
                metrics[key_name] = []

        for _ in range(num_runs):
            # KModes
            km_clusters = perform_kmodes(features, n_clusters)
            ari, nmi = calculate_metrics(true_labels, km_clusters)
            metrics['KModes'].append((ari, nmi))

            # Ordinal Encoding
            ord_clusters = perform_ordinal_encoding(features, true_labels, n_clusters)
            ari, nmi = calculate_metrics(true_labels, ord_clusters)
            metrics['Ordinal'].append((ari, nmi))

            # One-Hot Encoding
            oh_clusters = perform_one_hot_encoding(features, true_labels, n_clusters)
            ari, nmi = calculate_metrics(true_labels, oh_clusters)
            metrics['One-Hot'].append((ari, nmi))

            # Link with Ochiai Coefficient
            link_clusters = perform_link(features, n_clusters)
            ari, nmi = calculate_metrics(true_labels, link_clusters)
            metrics['Link'].append((ari, nmi))

            # CDE with t-SNE and k-Means
            cde_clusters = perform_cde(features, n_clusters)
            ari, nmi = calculate_metrics(true_labels, cde_clusters)
            metrics['CDE'].append((ari, nmi))

            # CDC_DR with various embedding methods and operations
            for embedding_method in ['NE', 'SE', 'NMF', 'AE']:
                for operation in ['Joint', 'Mean']:
                    cdc_dr_clusters = perform_cdc_dr(features, n_clusters, embedding_method, operation)
                    ari, nmi = calculate_metrics(true_labels, cdc_dr_clusters)
                    metrics[f"CDC_DR+{embedding_method} ({operation})"].append((ari, nmi))

        # Calculate mean and standard deviation for each method and append to results list
        for method, values in metrics.items():
            ari_vals, nmi_vals = zip(*values)
            ari_mean, ari_std = np.mean(ari_vals), np.std(ari_vals)
            nmi_mean, nmi_std = np.mean(nmi_vals), np.std(nmi_vals)
            results_list.append({
                "Dataset": name,
                "Method": method,
                "ARI": f"{ari_mean:.4f}±{ari_std:.2f}",
                "NMI": f"{nmi_mean:.4f}±{nmi_std:.2f}"
            })

    # Convert list of dictionaries to DataFrame for results
    results_df = pd.DataFrame(results_list)
    return results_df

In [27]:
def calculate_metrics(true_labels, predicted_labels):
    """Calculate clustering metrics: Adjusted Rand Index and Normalized Mutual Information."""
    return ARI(true_labels, predicted_labels), NMI(true_labels, predicted_labels)

In [28]:
def reformat_results(results_df):
    # Expanding the 'ARI' and 'NMI' columns into multiple rows with a new 'Metric' column
    expanded_df = pd.melt(results_df, id_vars=["Dataset", "Method"], value_vars=["ARI", "NMI"], var_name="Metric", value_name="Value")
    expanded_df[['Metric_Value', 'Std']] = expanded_df['Value'].str.split('±', expand=True)
    expanded_df.drop(columns=['Value'], inplace=True)  # Removing the original combined column
    
    # Convert the 'Metric_Value' and 'Std' columns to numeric types
    expanded_df['Metric_Value'] = expanded_df['Metric_Value'].astype(float)
    expanded_df['Std'] = expanded_df['Std'].astype(float)

    # Concatenate the metric value and standard deviation back into a single string
    expanded_df['Metric_Value'] = expanded_df['Metric_Value'].map('{:.4f}'.format) + "±" + expanded_df['Std'].map('{:.2f}'.format)
    
    # Ensuring the order of datasets and methods remains consistent with the original DataFrame
    dataset_order = results_df['Dataset'].unique()
    method_order = results_df['Method'].unique()

    # Creating a pivot table to restructure the DataFrame as required
    pivot_df = expanded_df.pivot_table(index=["Dataset", "Metric"], columns="Method", values="Metric_Value", aggfunc='first')
    
    # Reindexing the pivot table to maintain the original order
    pivot_df = pivot_df.reindex(dataset_order, level='Dataset')
    pivot_df = pivot_df.reindex(method_order, axis='columns')

    return pivot_df

In [29]:
results = run_clustering_algorithms(dataframes, n_clusters_dict, num_runs)

Processing: Soybean


  return fit_method(estimator, *args, **kwargs)












Processing: Zoo


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)






Processing: Heart Disease




  return fit_method(estimator, *args, **kwargs)




Processing: Breast Cancer






Processing: Dermatology


  return fit_method(estimator, *args, **kwargs)




Processing: Letters (E, F)






Processing: DNA
Processing: Mushroom
Processing: Iris


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


  return fit_method(estimator, *args, **kwargs)


In [30]:
formatted_results = reformat_results(results)

In [31]:
print(formatted_results)

Method                      KModes      Ordinal      One-Hot          Link          CDE CDC_DR+NE (Joint) CDC_DR+NE (Mean) CDC_DR+SE (Joint) CDC_DR+SE (Mean) CDC_DR+NMF (Joint) CDC_DR+NMF (Mean) CDC_DR+AE (Joint) CDC_DR+AE (Mean)
Dataset        Metric                                                                                                                                                                                                                
Soybean        ARI     0.9540±0.09  0.5476±0.00  1.0000±0.00   1.0000±0.00  0.9613±0.03       1.0000±0.00      0.0141±0.01       0.5562±0.00      0.1079±0.00        1.0000±0.00      -0.0047±0.00       0.7392±0.21     -0.0208±0.01
               NMI     0.9725±0.06  0.7167±0.00  1.0000±0.00   1.0000±0.00  0.9664±0.03       1.0000±0.00      0.1399±0.01       0.7248±0.00      0.2554±0.00        1.0000±0.00       0.0842±0.00       0.8507±0.12      0.0592±0.03
Zoo            ARI     0.7050±0.13  0.6928±0.13  0.7193±0.07   0.8893±0.00  0.65

In [54]:
# Extract K-modes results for the Soybean dataset
zoo_kmodes_results = formatted_results.loc['Soybean', 'KModes']

# Print the K-modes results for the Soybean dataset
print("K-modes results for Soybean:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Zoo dataset
zoo_kmodes_results = formatted_results.loc['Zoo', 'KModes']

print("K-modes results for Zoo:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Heart Disease dataset
zoo_kmodes_results = formatted_results.loc['Heart Disease', 'KModes']

print("K-modes results for Heart Disease:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Breast Cancer dataset
zoo_kmodes_results = formatted_results.loc['Breast Cancer', 'KModes']

print("K-modes results for Breast Cancer:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Dermatology dataset
zoo_kmodes_results = formatted_results.loc['Dermatology', 'KModes']

print("K-modes results for Dermatology:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Letters (E, F) dataset
zoo_kmodes_results = formatted_results.loc['Letters (E, F)', 'KModes']

print("K-modes results for Letters (E, F):")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the DNA dataset
zoo_kmodes_results = formatted_results.loc['DNA', 'KModes']

print("K-modes results for DNA:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Mushroom dataset
zoo_kmodes_results = formatted_results.loc['Mushroom', 'KModes']

print("K-modes results for Mushroom:")
print(zoo_kmodes_results)
print('\n')

# Extract K-modes results for the Iris dataset
zoo_kmodes_results = formatted_results.loc['Iris', 'KModes']

print("K-modes results for Iris:")
print(zoo_kmodes_results)
print('\n')

K-modes results for Soybean:
Metric
ARI    0.9540±0.09
NMI    0.9725±0.06
Name: KModes, dtype: object


K-modes results for Zoo:
Metric
ARI    0.7050±0.13
NMI    0.8053±0.05
Name: KModes, dtype: object


K-modes results for Heart Disease:
Metric
ARI    0.3788±0.00
NMI    0.2954±0.00
Name: KModes, dtype: object


K-modes results for Breast Cancer:
Metric
ARI    0.7306±0.05
NMI    0.6220±0.04
Name: KModes, dtype: object


K-modes results for Dermatology:
Metric
ARI    0.5708±0.03
NMI    0.6358±0.02
Name: KModes, dtype: object


K-modes results for Letters (E, F):
Metric
ARI    0.1952±0.05
NMI    0.1705±0.04
Name: KModes, dtype: object


K-modes results for DNA:
Metric
ARI    0.0268±0.01
NMI    0.0412±0.01
Name: KModes, dtype: object


K-modes results for Mushroom:
Metric
ARI    0.4908±0.23
NMI    0.4478±0.22
Name: KModes, dtype: object


K-modes results for Iris:
Metric
ARI    0.7302±0.00
NMI    0.7582±0.00
Name: KModes, dtype: object


