In [1]:
import warnings

import kagglehub
from tqdm import tqdm

# Download latest version
path = kagglehub.dataset_download("ziadhanyai/fashion-six-classes")
print("Path to dataset files:", path)

Path to dataset files: C:\Users\allex\.cache\kagglehub\datasets\ziadhanyai\fashion-six-classes\versions\1


In [2]:
import os

dataset_path = os.path.join(path, "Fashion")
class_to_int = {}
for idx, class_name in enumerate(os.listdir(dataset_path)):
    class_to_int[class_name] = idx
class_to_int

{'Accessories': 0,
 'Formal': 1,
 'Hoodie': 2,
 'Pants': 3,
 'Shoes': 4,
 'T-Shirt': 5}

In [3]:
import pandas as pd
import cv2 as cv
warnings.filterwarnings("ignore", category=UserWarning)  # Disable UserWarnings
tqdm.pandas()

df = pd.DataFrame(columns=["ImageName", "OriginalImage", "Label"])

for class_name, class_idx in class_to_int.items():
    # put images with labels into dataframe
    img_dir_class = os.path.join(dataset_path, class_name)
    for img_path in tqdm(os.listdir(img_dir_class)):
        full_img_path = os.path.join(img_dir_class, img_path)
        if img_path.endswith(".jpg") or img_path.endswith(".png") or img_path.endswith(".jpeg"):
            img = cv.imread(full_img_path)
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            img = cv.resize(img, (224, 224), interpolation=cv.INTER_CUBIC)
            df.loc[len(df)] = [img_path, img, class_name]

100%|██████████| 1140/1140 [00:20<00:00, 54.62it/s]
100%|██████████| 1064/1064 [00:14<00:00, 71.82it/s]
100%|██████████| 1089/1089 [00:16<00:00, 68.00it/s]
100%|██████████| 1001/1001 [00:16<00:00, 60.73it/s]
100%|██████████| 1050/1050 [00:17<00:00, 59.09it/s]
100%|██████████| 1063/1063 [00:16<00:00, 64.23it/s]


In [4]:
df.iloc[0]

ImageName                                              Image_1.png
OriginalImage    [[[255, 255, 255], [255, 255, 255], [255, 255,...
Label                                                  Accessories
Name: 0, dtype: object

# Image Representation A (Simple flattening of Gray Scaled Images)

In [5]:
df["FlattenedImage"] = df["OriginalImage"].progress_apply(lambda x: cv.cvtColor(x, cv.COLOR_RGB2GRAY).flatten() / 255.0)

100%|██████████| 6343/6343 [00:00<00:00, 6693.80it/s]


# Image Representation B (ResNet embeddings)

In [6]:
import torch
from torchvision import models, transforms

# Load pretrained ResNet model
resnet = models.resnet18(pretrained=True)
resnet.eval()  # Set to evaluation mode
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])
# Define image transformation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to extract features from image
def get_resnet_features(image_array):
    image_tensor = transform(image_array).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():  # No need to track gradients
        features = resnet(image_tensor)  # Get features from the ResNet model
    return features.squeeze().numpy()  # Remove batch dimension and convert to numpy array

# Apply ResNet feature extraction on the "Image" column
df["ResNetFeatures"] = df["OriginalImage"].progress_apply(lambda x: get_resnet_features(x))

100%|██████████| 6343/6343 [01:16<00:00, 82.58it/s]


In [7]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train_flatten, X_val_flatten, y_train_flatten, y_val_flatten = train_test_split(np.array(df['FlattenedImage'].tolist()), df['Label'], test_size=0.2, random_state=42)
X_train_resnet, X_val_resnet, y_train_resnet, y_val_resnet = train_test_split(np.array(df['ResNetFeatures'].tolist()), df['Label'], test_size=0.2, random_state=42)

In [12]:
X_train_flatten.shape

(5074, 50176)

In [15]:
from sklearn.decomposition import PCA

# Initialize PCA with the number of components you want to keep.
# For example, to reduce to 50 components:
pca = PCA(n_components=50)

# Fit the PCA on the training data and transform it
X_train_flatten_pca = pca.fit_transform(X_train_flatten)

# Optionally, you can also transform the validation set with the same PCA model
X_val_flatten_pca = pca.transform(X_val_flatten)

# Check the new shape
print("New training data shape:", X_train_flatten_pca.shape)
print("New validation data shape:", X_val_flatten_pca.shape)

New training data shape: (5074, 50)
New validation data shape: (1269, 50)


# DBSCAN

In [30]:
from sklearn.cluster import DBSCAN
from sklearn.metrics import adjusted_rand_score, silhouette_score, davies_bouldin_score, calinski_harabasz_score

# Function to evaluate DBSCAN performance for a given combination of hyperparameters
def evaluate_dbscan(model, X, y):
    y_pred = model.labels_
    
    # Ignore noise points (label -1)
    mask = y_pred != -1
    X_filtered = X[mask]
    y_filtered = y[mask]
    y_pred_filtered = y_pred[mask]
    
    nr_of_removed_labels = len(y) - len(y_filtered)
    print(f"Removed {nr_of_removed_labels} out of {len(y)} as noise (-1 label)")
    if nr_of_removed_labels > len(y) / 2: # half of labels
        print(f"HALF OF DATA IS REMOVED!")
        return -1, -1, -1, -1
    
    silhouette_avg = -1
    davies_bouldin = -1
    calinski_harabasz = -1
    ari = -1
    
    if len(np.unique(y_pred_filtered)) > 1:
        # Silhouette Coefficient (higher is better)
        silhouette_avg = silhouette_score(X_filtered, y_pred_filtered)
        print(f"Silhouette Coefficient: {silhouette_avg:.4f}")
        
        # Davies-Bouldin Index (lower is better)
        davies_bouldin = davies_bouldin_score(X_filtered, y_pred_filtered)
        print(f"Davies-Bouldin Index: {davies_bouldin:.4f}")
        
        # Calinski-Harabasz Index (higher is better)
        calinski_harabasz = calinski_harabasz_score(X_filtered, y_pred_filtered)
        print(f"Calinski-Harabasz Index: {calinski_harabasz:.4f}")
        
        # Optionally, print ARI for comparison with true labels
        ari = adjusted_rand_score(y_filtered, y_pred_filtered)
        print(f"Adjusted Rand Index (ARI): {ari:.4f}")
        
        print(f"Number of clusters detected: {len(np.unique(y_pred_filtered))}")
    else:
        print("Metrics cannot be computed (only 1 cluster found)")
    
    return silhouette_avg, davies_bouldin, calinski_harabasz, ari

# DBSCAN + Representation A

In [31]:
from sklearn.model_selection import ParameterGrid

# Define the hyperparameter grid for tuning
param_grid = {
    'eps': [30, 35, 40, 45, 50, 60, 70 ,80 ,90, 100, 125, 150, 175],  # Range for eps
    'min_samples': [3, 5, 7, 10, 15]  # Range for min_samples
}

# Convert the grid into parameter combinations
grid = ParameterGrid(param_grid)

# Iterate over each parameter combination and evaluate
best_silhouette_avg = -1  # Track the best ARI score
best_params = None  # Track the best hyperparameters
results_dbscan_respresentation_A = {
    "min_samples": [],
    "eps": [],
    "silhouette_avg": [],
    "davies_bouldin": [],
    "calinski_harabasz": [],
    "ari": [],
}

for params in grid:
    eps = params['eps']
    min_samples = params['min_samples']
    
    print(f"Evaluating: eps={eps}, min_samples={min_samples}")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    dbscan.fit_predict(X_train_flatten)
    silhouette_avg, davies_bouldin, calinski_harabasz, ari = evaluate_dbscan(dbscan, X_train_flatten, y_train_flatten)
    
    results_dbscan_respresentation_A['min_samples'].append(min_samples)
    results_dbscan_respresentation_A['eps'].append(eps)
    results_dbscan_respresentation_A['silhouette_avg'].append(silhouette_avg)
    results_dbscan_respresentation_A['davies_bouldin'].append(davies_bouldin)
    results_dbscan_respresentation_A['calinski_harabasz'].append(calinski_harabasz)
    results_dbscan_respresentation_A['ari'].append(ari)
    print()
    
    if silhouette_avg > best_silhouette_avg:
        best_silhouette_avg = silhouette_avg
        best_params = params

print(f"\nBest Silhouette: {best_silhouette_avg} with parameters: {best_params}")

Evaluating: eps=30, min_samples=3
Removed 4333 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=5
Removed 4646 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=7
Removed 4721 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=10
Removed 4731 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=15
Removed 4742 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=3
Removed 4059 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=5
Removed 4364 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=7
Removed 4442 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=10
Removed 4478 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=15
Removed 4514 out 

In [36]:
df_results_A = pd.DataFrame(results_dbscan_respresentation_A)
df_results_A

Unnamed: 0,min_samples,eps,silhouette_avg,davies_bouldin,calinski_harabasz,ari
0,3,30,-1.0,-1.0,-1.0,-1.0
1,5,30,-1.0,-1.0,-1.0,-1.0
2,7,30,-1.0,-1.0,-1.0,-1.0
3,10,30,-1.0,-1.0,-1.0,-1.0
4,15,30,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...
60,3,175,-1.0,-1.0,-1.0,-1.0
61,5,175,-1.0,-1.0,-1.0,-1.0
62,7,175,-1.0,-1.0,-1.0,-1.0
63,10,175,-1.0,-1.0,-1.0,-1.0


# DBSCAN + Representation B

In [37]:
from sklearn.model_selection import ParameterGrid

# Define the hyperparameter grid for tuning
param_grid = {
    'eps': [30, 35, 40, 45, 50, 60, 70 ,80 ,90, 100, 125, 150, 175],  # Range for eps
    'min_samples': [3, 5, 7, 10, 15]  # Range for min_samples
}

# Convert the grid into parameter combinations
grid = ParameterGrid(param_grid)

# Iterate over each parameter combination and evaluate
best_silhouette_avg = -1  # Track the best ARI score
best_params = None  # Track the best hyperparameters
results_dbscan_respresentation_B = {
    "min_samples": [],
    "eps": [],
    "silhouette_avg": [],
    "davies_bouldin": [],
    "calinski_harabasz": [],
    "ari": [],
    }

for params in grid:
    eps = params['eps']
    min_samples = params['min_samples']
    
    print(f"Evaluating: eps={eps}, min_samples={min_samples}")

    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    dbscan.fit_predict(X_train_flatten)
    silhouette_avg, davies_bouldin, calinski_harabasz, ari = evaluate_dbscan(dbscan, X_train_flatten, y_train_flatten)
    
    results_dbscan_respresentation_B['min_samples'].append(min_samples)
    results_dbscan_respresentation_B['eps'].append(eps)
    results_dbscan_respresentation_B['silhouette_avg'].append(silhouette_avg)
    results_dbscan_respresentation_B['davies_bouldin'].append(davies_bouldin)
    results_dbscan_respresentation_B['calinski_harabasz'].append(calinski_harabasz)
    results_dbscan_respresentation_B['ari'].append(ari)
    print()
    
    if silhouette_avg > best_silhouette_avg:
        best_silhouette_avg = silhouette_avg
        best_params = params

print(f"\nBest Silhouette: {best_silhouette_avg} with parameters: {best_params}")

Evaluating: eps=30, min_samples=3
Removed 4333 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=5
Removed 4646 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=7
Removed 4721 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=10
Removed 4731 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=30, min_samples=15
Removed 4742 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=3
Removed 4059 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=5
Removed 4364 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=7
Removed 4442 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=10
Removed 4478 out of 5074 as noise (-1 label)
HALF OF DATA IS REMOVED!

Evaluating: eps=35, min_samples=15
Removed 4514 out 

In [38]:
df_results_B = pd.DataFrame(results_dbscan_respresentation_B)
df_results_B

Unnamed: 0,min_samples,eps,silhouette_avg,davies_bouldin,calinski_harabasz,ari
0,3,30,-1.0,-1.0,-1.0,-1.0
1,5,30,-1.0,-1.0,-1.0,-1.0
2,7,30,-1.0,-1.0,-1.0,-1.0
3,10,30,-1.0,-1.0,-1.0,-1.0
4,15,30,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...
60,3,175,-1.0,-1.0,-1.0,-1.0
61,5,175,-1.0,-1.0,-1.0,-1.0
62,7,175,-1.0,-1.0,-1.0,-1.0
63,10,175,-1.0,-1.0,-1.0,-1.0


In [48]:
from sklearn.metrics import adjusted_rand_score, silhouette_score, davies_bouldin_score, calinski_harabasz_score

# Function to evaluate DBSCAN performance for a given combination of hyperparameters
def evaluate_kmeans(model, X, y):
    y_pred = model.labels_
    
    silhouette_avg = -1
    davies_bouldin = -1
    calinski_harabasz = -1
    ari = -1
    
    if len(np.unique(y_pred)) > 1:
        # Silhouette Coefficient (higher is better)
        silhouette_avg = silhouette_score(X, y_pred)
        print(f"Silhouette Coefficient: {silhouette_avg:.4f}")
        
        # Davies-Bouldin Index (lower is better)
        davies_bouldin = davies_bouldin_score(X, y_pred)
        print(f"Davies-Bouldin Index: {davies_bouldin:.4f}")
        
        # Calinski-Harabasz Index (higher is better)
        calinski_harabasz = calinski_harabasz_score(X, y_pred)
        print(f"Calinski-Harabasz Index: {calinski_harabasz:.4f}")
        
        # Optionally, print ARI for comparison with true labels
        ari = adjusted_rand_score(y, y_pred)
        print(f"Adjusted Rand Index (ARI): {ari:.4f}")
    else:
        print("Metrics cannot be computed (only 1 cluster found)")
    
    return silhouette_avg, davies_bouldin, calinski_harabasz, ari

# K-Means + Representation A

In [49]:
from sklearn.cluster import KMeans
from sklearn.model_selection import ParameterGrid

# Define parameter grid for KMeans
param_grid = {
    'n_clusters': range(2, 24, 1),  # testing different number of clusters
    'init': ['k-means++', 'random'],  # testing different initialization methods
    'max_iter': [25, 50, 100, 200, 300, 500, 1000],  # different maximum iterations
    'tol': [1e-4, 1e-3],  # different tolerance values
}

# Convert the grid into parameter combinations
grid = ParameterGrid(param_grid)

# Iterate over each parameter combination and evaluate
best_silhouette_avg = -1  # Track the best ARI score
best_params = None  # Track the best hyperparameters
results_kmeans_respresentation_A = {
    "n_clusters": [],
    "init": [],
    "max_iter": [],
    "tol": [],
    "silhouette_avg": [],
    "davies_bouldin": [],
    "calinski_harabasz": [],
    "ari": [],
    }

for params in grid:
    n_clusters = params['n_clusters']
    init = params['init']
    max_iter = params['max_iter']
    tol = params['tol']
    
    print(f"Evaluating: n_clusters={n_clusters}, init={init}, max_iter={max_iter}, tol={tol}")

    kmeans = KMeans(n_clusters=n_clusters, init=init, max_iter=max_iter, tol=tol)
    kmeans.fit_predict(X_train_flatten_pca)
    silhouette_avg, davies_bouldin, calinski_harabasz, ari = evaluate_kmeans(kmeans, X_train_flatten_pca, y_train_flatten)
    
    results_kmeans_respresentation_A['n_clusters'].append(n_clusters)
    results_kmeans_respresentation_A['init'].append(init)
    results_kmeans_respresentation_A['max_iter'].append(max_iter)
    results_kmeans_respresentation_A['tol'].append(tol)
    results_kmeans_respresentation_A['silhouette_avg'].append(silhouette_avg)
    results_kmeans_respresentation_A['davies_bouldin'].append(davies_bouldin)
    results_kmeans_respresentation_A['calinski_harabasz'].append(calinski_harabasz)
    results_kmeans_respresentation_A['ari'].append(ari)
    print()
    
    if silhouette_avg > best_silhouette_avg:
        best_silhouette_avg = silhouette_avg
        best_params = params

print(f"\nBest Silhouette: {best_silhouette_avg} with parameters: {best_params}")

Evaluating: n_clusters=2, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.2619
Davies-Bouldin Index: 1.4934
Calinski-Harabasz Index: 2069.9773
Adjusted Rand Index (ARI): 0.0131

Evaluating: n_clusters=2, init=k-means++, max_iter=25, tol=0.001
Silhouette Coefficient: 0.2625
Davies-Bouldin Index: 1.4900
Calinski-Harabasz Index: 2069.9347
Adjusted Rand Index (ARI): 0.0129

Evaluating: n_clusters=3, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.2082
Davies-Bouldin Index: 1.7066
Calinski-Harabasz Index: 1634.4182
Adjusted Rand Index (ARI): 0.0248

Evaluating: n_clusters=3, init=k-means++, max_iter=25, tol=0.001
Silhouette Coefficient: 0.2076
Davies-Bouldin Index: 1.7057
Calinski-Harabasz Index: 1634.3557
Adjusted Rand Index (ARI): 0.0248

Evaluating: n_clusters=4, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.1828
Davies-Bouldin Index: 1.8462
Calinski-Harabasz Index: 1284.1103
Adjusted Rand Index (ARI): 0.0321

Evaluating: n_clusters

In [50]:
df_kmeans_results_A = pd.DataFrame(results_kmeans_respresentation_A)
df_kmeans_results_A

Unnamed: 0,n_clusters,init,max_iter,tol,silhouette_avg,davies_bouldin,calinski_harabasz,ari
0,2,k-means++,25,0.0001,0.261888,1.493444,2069.977264,0.013146
1,2,k-means++,25,0.0010,0.262539,1.489977,2069.934675,0.012899
2,3,k-means++,25,0.0001,0.208240,1.706579,1634.418205,0.024788
3,3,k-means++,25,0.0010,0.207600,1.705732,1634.355689,0.024771
4,4,k-means++,25,0.0001,0.182760,1.846216,1284.110327,0.032065
...,...,...,...,...,...,...,...,...
611,21,random,1000,0.0010,0.108744,2.244437,399.009818,0.059204
612,22,random,1000,0.0001,0.095698,2.180422,388.110424,0.058929
613,22,random,1000,0.0010,0.097057,2.211215,385.293972,0.058779
614,23,random,1000,0.0001,0.098960,2.218563,374.610602,0.054345


# K-Means + Representation B

In [51]:
from sklearn.cluster import KMeans
from sklearn.model_selection import ParameterGrid

# Define parameter grid for KMeans
param_grid = {
    'n_clusters': range(2, 24, 1),  # testing different number of clusters
    'init': ['k-means++', 'random'],  # testing different initialization methods
    'max_iter': [25, 50, 100, 200, 300, 500, 1000],  # different maximum iterations
    'tol': [1e-4, 1e-3],  # different tolerance values
}

# Convert the grid into parameter combinations
grid = ParameterGrid(param_grid)

# Iterate over each parameter combination and evaluate
best_silhouette_avg = -1  # Track the best ARI score
best_params = None  # Track the best hyperparameters
results_kmeans_respresentation_B = {
    "n_clusters": [],
    "init": [],
    "max_iter": [],
    "tol": [],
    "silhouette_avg": [],
    "davies_bouldin": [],
    "calinski_harabasz": [],
    "ari": [],
    }

for params in grid:
    n_clusters = params['n_clusters']
    init = params['init']
    max_iter = params['max_iter']
    tol = params['tol']
    
    print(f"Evaluating: n_clusters={n_clusters}, init={init}, max_iter={max_iter}, tol={tol}")

    kmeans = KMeans(n_clusters=n_clusters, init=init, max_iter=max_iter, tol=tol)
    kmeans.fit_predict(X_train_resnet)
    silhouette_avg, davies_bouldin, calinski_harabasz, ari = evaluate_kmeans(kmeans, X_train_resnet, y_train_resnet)
    
    results_kmeans_respresentation_B['n_clusters'].append(n_clusters)
    results_kmeans_respresentation_B['init'].append(init)
    results_kmeans_respresentation_B['max_iter'].append(max_iter)
    results_kmeans_respresentation_B['tol'].append(tol)
    results_kmeans_respresentation_B['silhouette_avg'].append(silhouette_avg)
    results_kmeans_respresentation_B['davies_bouldin'].append(davies_bouldin)
    results_kmeans_respresentation_B['calinski_harabasz'].append(calinski_harabasz)
    results_kmeans_respresentation_B['ari'].append(ari)
    print()
    
    if silhouette_avg > best_silhouette_avg:
        best_silhouette_avg = silhouette_avg
        best_params = params

print(f"\nBest Silhouette: {best_silhouette_avg} with parameters: {best_params}")

Evaluating: n_clusters=2, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.1156
Davies-Bouldin Index: 3.0747
Calinski-Harabasz Index: 483.5906
Adjusted Rand Index (ARI): 0.2160

Evaluating: n_clusters=2, init=k-means++, max_iter=25, tol=0.001
Silhouette Coefficient: 0.1156
Davies-Bouldin Index: 3.0747
Calinski-Harabasz Index: 483.5906
Adjusted Rand Index (ARI): 0.2160

Evaluating: n_clusters=3, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.0832
Davies-Bouldin Index: 3.4581
Calinski-Harabasz Index: 362.5132
Adjusted Rand Index (ARI): 0.2085

Evaluating: n_clusters=3, init=k-means++, max_iter=25, tol=0.001
Silhouette Coefficient: 0.1064
Davies-Bouldin Index: 3.2455
Calinski-Harabasz Index: 371.7638
Adjusted Rand Index (ARI): 0.2366

Evaluating: n_clusters=4, init=k-means++, max_iter=25, tol=0.0001
Silhouette Coefficient: 0.0728
Davies-Bouldin Index: 3.1525
Calinski-Harabasz Index: 332.4024
Adjusted Rand Index (ARI): 0.2723

Evaluating: n_clusters=4, i

In [52]:
df_kmeans_results_B = pd.DataFrame(results_kmeans_respresentation_B)
df_kmeans_results_B

Unnamed: 0,n_clusters,init,max_iter,tol,silhouette_avg,davies_bouldin,calinski_harabasz,ari
0,2,k-means++,25,0.0001,0.115645,3.074748,483.590603,0.216006
1,2,k-means++,25,0.0010,0.115645,3.074748,483.590603,0.216006
2,3,k-means++,25,0.0001,0.083205,3.458083,362.513168,0.208474
3,3,k-means++,25,0.0010,0.106354,3.245460,371.763807,0.236597
4,4,k-means++,25,0.0001,0.072788,3.152497,332.402354,0.272277
...,...,...,...,...,...,...,...,...
611,21,random,1000,0.0010,0.048210,3.246216,121.424392,0.316741
612,22,random,1000,0.0001,0.049012,3.197041,118.644462,0.294944
613,22,random,1000,0.0010,0.040671,3.316715,117.651077,0.278785
614,23,random,1000,0.0001,0.044530,3.348399,113.353082,0.277901
