## Import Liberies

In [1]:
import pandas as pd
import numpy as np
from numpy.linalg import norm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.cluster import KMeans

## Load mnist data online

In [2]:
X,y = fetch_openml('mnist_784', version=1, return_X_y=True)

## Standarzise dataset

In [18]:
X = X/255


In [12]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

In [13]:
class Kmeans:
    def __init__(self, n_clusters, max_iter=100, random_state=123):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.random_state = random_state

    # Initialize with random centriods centers
    def initializ_centroids(self, X):
        np.random.RandomState(self.random_state)
        random_idx = np.random.permutation(X.shape[0])
        centroids = X[random_idx[:self.n_clusters]]
        return centroids

    # Compute the centroids
    def compute_centroids(self, X, labels):
        centroids = np.zeros((self.n_clusters, X.shape[1]))
        for k in range(self.n_clusters):
            centroids[k, :] = np.mean(X[labels == k, :], axis=0)
        return centroids

    # compute the Euclidean distance between cluster and dataset
    def compute_distance(self, X, centroids):
        distance = np.zeros((X.shape[0], self.n_clusters))
        for k in range(self.n_clusters):
            row_norm = norm(X - centroids[k, :], axis=1)
            distance[:, k] = np.square(row_norm)
        return distance

    # Find the closest clustor
    def find_closest_cluster(self, distance):
        return np.argmin(distance, axis=1)
    
    # main fit function
    def fit(self, X):
        self.centroids = self.initializ_centroids(X)
        for i in range(self.max_iter):
            old_centroids = self.centroids
            distance = self.compute_distance(X, old_centroids)
            self.labels = self.find_closest_cluster(distance)
            self.centroids = self.compute_centroids(X, self.labels)
            if np.all(old_centroids == self.centroids):
                break
    
    # Return the latest centroids
    def cluster_centers(self):
        return self.centroids

## Centroid of the clusters for k=3

In [14]:
# Build the model for Kmean
k_mean = Kmeans(n_clusters=3)
# process the Kmean function
k_mean.fit(X_train)
# Get the cluster centers
k_mean.cluster_centers()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

## Finding Centroid for varing k from 1 to 7.

In [15]:
# Varing the k from 1 to 7 for finding kmean clustors.
for k in range(1,7):
    k_mean = Kmeans(n_clusters=k)
    k_mean.fit(X_train)
    print("Centers of Centroids for k={}\n".format(k), k_mean.cluster_centers())

Centers of Centroids for k=1
 [[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  3.22952710e-08 1.20466487e-07 5.53633218e-08 2.30680507e-09
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  4.10098680e-09 1.40971421e-08 1.42765603e-07 3.73446110e-07
  6.72305524e-07 9.85774702e-07 1.84749455e-06 2.47186979e-06
  2.67871332e-06 2.72869409e-06 2.91144432e-06 2.67820069e-06
  2.87479175e-06 2.36293733e-06 1.53966423e-06 1.09547610e-06
  8.27630399e-07 3.28591567e-07 1.55068563e-07 5.43380751e-08
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 1.64039472e-08 1.07650903e-08
  1.06881968e-07 8.43265411e-08 7.243367

Centers of Centroids for k=2
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids for k=3
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids for k=4
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids for k=5
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids for k=6
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## Using Sklearn for varing k from 1 to 7

In [16]:
# Varing the k from 1 to 7 for finding kmean clustors.
for k in range(1,7):
        kmeans = KMeans(n_clusters=k)
        kmeans = kmeans.fit(X_train)
        print("Centers of Centroids using Sklearn for k={}\n".format(k), kmeans.cluster_centers_)

Centers of Centroids using Sklearn for k=1
 [[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  3.22952710e-08 1.20466487e-07 5.53633218e-08 2.30680507e-09
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  4.10098680e-09 1.40971421e-08 1.42765603e-07 3.73446110e-07
  6.72305524e-07 9.85774702e-07 1.84749455e-06 2.47186979e-06
  2.67871332e-06 2.72869409e-06 2.91144432e-06 2.67820069e-06
  2.87479175e-06 2.36293733e-06 1.53966423e-06 1.09547610e-06
  8.27630399e-07 3.28591567e-07 1.55068563e-07 5.43380751e-08
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 1.64039472e-08 1.07650903e-08
  1.06881968e-07 8.4326541

Centers of Centroids using Sklearn for k=2
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids using Sklearn for k=3
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids using Sklearn for k=4
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids using Sklearn for k=5
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Centers of Centroids using Sklearn for k=6
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
