In [2]:
import tensorflow as tf
import pandas as pd 
import cv2
import numpy as np
from matplotlib import pyplot as plt

# Getting data

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
X_train.shape

(60000, 28, 28)

# Scaling data

In [5]:
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

# Transforming & flattening data into data frame

In [6]:
df_train = pd.DataFrame( X_train.reshape(-1, 28*28), columns=[f"px{i}" for i in range(28 * 28)] )
df_train['y'] = y_train

In [7]:
X_train, y_train = df_train.drop('y', axis=1), df_train['y']

In [8]:
df_test = pd.DataFrame( X_test.reshape(-1, 28*28), columns=[f"px{i}" for i in range(28 * 28)] )
df_test['y'] = y_test

In [9]:
X_test, y_test = df_test.drop('y', axis=1), df_test['y']

# Fitting the K Means

In [19]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=10, init="k-means++", n_init=100)

kmeans.fit( X_train )

# Scoring the K Means

In [22]:
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, fowlkes_mallows_score

predicted_labels = kmeans.predict(X_train)

# Calculate clustering validation scores
ari_score = adjusted_rand_score(y_train, predicted_labels)
nmi_score = normalized_mutual_info_score(y_train, predicted_labels)
fmi_score = fowlkes_mallows_score(y_train, predicted_labels)

print("Adjusted Rand Index:", ari_score)
print("Normalized Mutual Information:", nmi_score)
print("Fowlkes-Mallows Index:", fmi_score)

Adjusted Rand Index: 0.3922472450825891
Normalized Mutual Information: 0.5176740543097533
Fowlkes-Mallows Index: 0.4554319985571223


In [21]:
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, fowlkes_mallows_score

predicted_labels = kmeans.predict(X_test)

# Calculate clustering validation scores
ari_score = adjusted_rand_score(y_test, predicted_labels)
nmi_score = normalized_mutual_info_score(y_test, predicted_labels)
fmi_score = fowlkes_mallows_score(y_test, predicted_labels)

print("Adjusted Rand Index:", ari_score)
print("Normalized Mutual Information:", nmi_score)
print("Fowlkes-Mallows Index:", fmi_score)

Adjusted Rand Index: 0.40393953912270153
Normalized Mutual Information: 0.5292296333518569
Fowlkes-Mallows Index: 0.4658984808645735
