In [None]:
import numpy as np
from scipy.io import loadmat
from scipy.io import savemat
import matplotlib.pyplot as plt 

def kMeans(X, K, maxIters = 20, plot_progress = None):

    centroids = X[np.random.choice(len(X), K)]
    for i in range(maxIters):
        # Cluster Assignment step
        C = np.array([np.argmin([(x_i-y_k)@(x_i-y_k) for y_k in centroids]) for x_i in X])
        # Update centroids step 
        centroids = []
        for k in range(K):
            if (C == k).any():
                centroids.append( X[C == k].mean(axis = 0) )
            else: # if there are no data points assigned to this certain centroid
                centroids.append( X[np.random.choice(len(X))] )
        if plot_progress != None: plot_progress(X, C, np.array(centroids))
    return np.array(centroids) , C

# Load data for activity
#
in_data = loadmat('Period 10 Activity.mat')
A = in_data['A']

rows = np.array(A.shape)[0]
cols = np.array(A.shape)[1]

In [None]:
# k-means with 1 cluster
centroids, C = kMeans(A.transpose(), K = 1)
print('A = ')
print(A)
print('centroid assigned = ',C)
print('centroids')
print(centroids.transpose())

In [None]:
# Construct rank-1 approximation using cluster
Ahat_1 = centroids.transpose()@np.ones((1,cols),float)

print('Rank-1 Approximation')
print(Ahat_1)

In [None]:
# k-means with 2 clusters
centroids, C = kMeans(A.transpose(), K = 2)

print('A = ')
print(A)
print('centroid assigned = ',C)
print('centroids')
print(centroids.transpose())

In [None]:
# Construct rank-2 approximation using clusters

Ahat_2 = np.zeros((rows,cols),float)
for i in range(cols):
    Ahat_2[:,i]=centroids.transpose()[:,C[i]]
    
print('Rank-2 Approximation')
print(Ahat_2)