In [1]:
import matplotlib.pyplot as plt  
import numpy as np     
import torch
from load import test_x

Downloading train-images-idx3-ubyte.gz ... 
Done
Downloading train-labels-idx1-ubyte.gz ... 
Done
Downloading t10k-images-idx3-ubyte.gz ... 
Done
Downloading t10k-labels-idx1-ubyte.gz ... 
Done
Converting train-images-idx3-ubyte.gz to NumPy Array ...
Done
Converting train-labels-idx1-ubyte.gz to NumPy Array ...
Done
Converting t10k-images-idx3-ubyte.gz to NumPy Array ...
Done
Converting t10k-labels-idx1-ubyte.gz to NumPy Array ...
Done
Creating pickle file ...
Done


In [2]:
# Sneak peak at using GPUs for computation! (Will only work if you have a cuda enabled GPU)
# device = "cpu"
gpu_indx = 0
device = torch.device(gpu_indx if torch.cuda.is_available() else "cpu")
print("cuda or cpu?:", device)

cuda or cpu?: cpu


In [None]:
# Number of datapoint
num_img = 10000  
# Number of cluster centers, 10 because the dataset contains 10 classes eg: digit 0 to 9
num_means = 10   
# We'll perform this many iterations of the algorithm
iterations = 100
# Each image is 28*28 pixels, which has been flattened to a vector 0f 784 values
data_size = 28*28
# The images are 8 bit greyscale images (values range from 0-255)
# We'll rescale the pixel values to be between 0-1 (We don't REALLY need to do this for k-means)
test_x_tensor = torch.FloatTensor((test_x.astype(float) / 255)).to(device)

<h3> KMeans Initialization </h3>
Here we'll initialise the cluster centers to random values by creating a 10*784 matrix (2D Tensor) by randomly sampling 10 points from the dataset

In [None]:
# Randomly generate K indices for k datapoints from the dataset (indicies need to be int)
means = test_x_tensor[np.random.randint(0, num_img, num_means)]
eye_mat = torch.eye(num_means, device=device)

In [None]:
means = torch.mm(eye_mat, means)

In [None]:
plt.figure(1, figsize=(20, 10))
img = means.float().numpy().reshape(num_means, 28, 28).transpose((1, 0, 2)).reshape(28, num_means*28)
_ = plt.imshow(img)

<h3> KMeans Algorithm </h3>

In [None]:
with torch.no_grad():
    for i in range(iterations):
        # Add on a dimension in the right place and use broadcasting to find the differences
        diff_from_means = means.unsqueeze(0) - test_x_tensor.unsqueeze(1)

        # Using absolute sum of differences here
        dist_to_means = diff_from_means.pow(2).mean(2)

        # Expand dims is anther way to add a dimension
        indx_of_means = dist_to_means.argmin(1)

        # Create a one hot coded vector per datapoint
        a = eye_mat[indx_of_means].t()
        # Multiply to get the sums of each cluster then divide by elements per cluster to get means
        means = torch.mm(a, test_x_tensor) / a.sum(1, keepdims=True)

<h3>Lets visualise the the cluster centers!</h3>

In [None]:
plt.figure(1, figsize=(20, 10))
img = means.cpu().float().numpy().reshape(num_means, 28, 28).transpose((1, 0, 2)).reshape(28, num_means*28)
_ = plt.imshow(img)