In [1]:
import os
import cv2
import numpy as np
from numpy import random
import matplotlib
from matplotlib import image
import matplotlib.pyplot as plt
import math

In [2]:
file = np.load('pca_man_reduced.npy')
file.shape
file

array([[[ 1.13892514e+03, -2.08308136e+03,  3.23506705e+03],
        [ 7.69876527e+02, -1.07213014e+03,  1.27724589e+03],
        [-6.22113309e+02,  3.18935683e+02, -1.02000328e+02],
        ...,
        [ 9.17404453e+00,  1.83438972e+01,  4.22698598e+01],
        [ 1.47300766e+01, -3.96913877e+01, -4.28000466e+00],
        [-8.14537568e+00,  7.35941657e+01, -1.11720711e+02]],

       [[ 2.03186521e+03, -2.96502380e+03,  4.11129653e+03],
        [ 6.34447544e+02, -9.62611379e+02,  1.20612731e+03],
        [-6.29388894e+02,  3.58067537e+02, -2.19792498e+02],
        ...,
        [-4.74890450e+01, -6.35484993e+00,  6.23519760e+01],
        [-1.45321967e+02, -2.39436356e+01,  1.15964647e+02],
        [ 1.18190817e+01, -8.93602237e+01, -8.32712073e+00]],

       [[ 2.16709996e+03, -3.13209307e+03,  4.32562670e+03],
        [ 1.27326408e+02, -3.01322783e+02,  4.81195031e+02],
        [ 2.73285827e+02, -4.00895223e+02,  4.79411585e+02],
        ...,
        [ 3.38638138e-01,  5.18222054e+01,

In [3]:
file.shape

(8355, 150, 3)

In [4]:
file = file.reshape(8355, 450)
file.shape

(8355, 450)

In [5]:
def plotCurrent(X, Rnk, Kmus):
    N, D = np.shape(X)
    K = np.shape(Kmus)[0]

    InitColorMat = np.array([[1, 0, 0],
                             [0, 1, 0],
                             [0, 0, 1],
                             [0, 0, 0],
                             [1, 1, 0],
                             [1, 0, 1],
                             [0, 1, 1]])

    KColorMat = InitColorMat[0:K,:]

    colorVec = np.dot(Rnk, KColorMat)
    muColorVec = np.dot(np.eye(K), KColorMat)
    plt.scatter(X[:,0], X[:,1], c=colorVec)

    plt.scatter(Kmus[:,0], Kmus[:,1], s=200, c=muColorVec, marker='d')
    plt.axis('equal')
    plt.show()

def calcSqDistances(X, Kmus):
    K = Kmus.shape[0]
    N, D = np.shape(X)
    sdist = np.zeros((N,K))
    
    for i in range(N):
        for j in range(K):
            sdist[i,j] = np.linalg.norm(X[i]-Kmus[j])**2
    return sdist   

def determineRnk(sqDmat):
    return np.eye(sqDmat.shape[1])[np.argmin(sqDmat,axis=1)]

def recalcMus(X, Rnk): 
    return (np.divide(np.dot(X.T, Rnk),np.sum(Rnk, axis=0))).T

def runKMeans(K, X):
    
    
  # For loop was meant to go across entire binary image set
  # With the current set-up, it took over an hour and a half
  # to run on this set before I interrupted it, so should most likely
  # test on one binary image
# The image data we run clustering on 
        #X = X.reshape((X.shape[1]*X.shape[0],3))
        #X = X.reshape((X.shape[1]*X.shape[0]),3)

    N = np.shape(X)[0]
    D = np.shape(X)[1]

    Kmus = np.zeros((K,D))

    rand_inds = np.random.permutation(N)
    Kmus = X[rand_inds[0:K],:]
        

    maxiters = 1000

    for iter in range(maxiters):
        #assign each data vector to closest mu vector as per Bishop (9.2)
        #do this by first calculating a squared distance matrix where the n,k entry
        #contains the squared distance from the nth data vector to the kth mu vector

        #sqDmat will be an N-by-K matrix with the n,k entry as specfied above
        sqDmat = calcSqDistances(X, Kmus)

        #given the matrix of squared distances, determine the closest cluster
        #center for each data vector

        #R is the "responsibility" matrix
        #R will be an N-by-K matrix of binary values whose n,k entry is set as
        #per Bishop (9.2)
        #Specifically, the n,k entry is 1 if point n is closest to cluster k,
        #and is 0 otherwise
        Rnk = determineRnk(sqDmat)

        KmusOld = Kmus
        #plotCurrent(X, Rnk, Kmus)
        #plt.show()

        #recalculate mu values based on cluster assignments as per Bishop (9.4)
        Kmus = recalcMus(X, Rnk)


        #check to see if the cluster centers have converged.  If so, break.
        if np.sum(np.abs(KmusOld.reshape((-1, 1)) - Kmus.reshape((-1, 1)))) < 1e-6:
            print(iter)
            break
        return X, Kmus, Rnk
    #plotCurrent(X, Rnk, Kmus)




In [6]:
X, Kmus, Rnk = runKMeans(100, file)

In [7]:
X

array([[ 1.13892514e+03, -2.08308136e+03,  3.23506705e+03, ...,
        -8.14537568e+00,  7.35941657e+01, -1.11720711e+02],
       [ 2.03186521e+03, -2.96502380e+03,  4.11129653e+03, ...,
         1.18190817e+01, -8.93602237e+01, -8.32712073e+00],
       [ 2.16709996e+03, -3.13209307e+03,  4.32562670e+03, ...,
        -2.98523506e+01, -1.00950180e+02,  3.91238845e+01],
       ...,
       [ 5.24272764e+03, -4.44508379e+03,  3.59448896e+03, ...,
         3.16866356e+01,  6.86722767e+01,  5.91784668e+01],
       [ 3.85826746e+03, -4.66300313e+03,  3.37067573e+03, ...,
         1.85582778e+01,  2.24556606e+00,  2.15015599e+01],
       [ 3.91214341e+03, -1.90625378e+03,  2.85969790e+02, ...,
         4.96236270e+01, -3.58459929e+01,  4.31360419e+01]])

In [8]:
X.shape

(8355, 450)

In [9]:
len(X)

8355

In [10]:
Kmus

array([[ 3.11776312e+03, -2.69458458e+03,  1.17745534e+03, ...,
        -9.61105751e+00, -1.84946843e+00,  2.83982197e+00],
       [-1.51431738e+02, -6.49999439e+01,  9.02116311e+02, ...,
         4.36808354e+01, -1.49283462e+01,  1.96237027e+01],
       [-1.78127100e+03,  1.91638370e+03, -1.77287342e+03, ...,
        -1.84796841e+01,  1.33755424e+00, -1.77984969e+01],
       ...,
       [ 1.50451184e+03, -1.46221567e+03,  1.27933944e+03, ...,
        -1.88258245e+01,  4.39054462e+00, -1.50898963e+01],
       [ 4.12193234e+02, -1.45230117e+03,  1.88734004e+03, ...,
        -5.55861752e+00, -7.06110249e+00,  4.66288961e-01],
       [ 6.83667319e+02, -4.29629868e+01, -5.63801791e+02, ...,
         7.89711555e+00, -7.30675366e+00,  7.10721552e-01]])

In [11]:
Rnk

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [12]:
Rnk.shape

(8355, 100)

In [13]:
labels = Rnk.T
labels.shape

(100, 8355)

In [14]:
labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [15]:
len(labels)

100

In [16]:
N, D = labels.shape
N

100

In [17]:
check = []
check.append([9,0,8])
print(check)

[[9, 0, 8]]


In [18]:
check

[[9, 0, 8]]

In [19]:
def grouping(labels):
    groups = []
    N, D = labels.shape
    for i in labels: 
        g = np.where(i==1)
        groups.append(g)
    
    return groups
            
            
            
        

In [20]:
g = grouping(labels)

In [21]:
g

[(array([ 110,  131,  138,  162,  183,  196,  231,  255,  312,  344,  354,
          390,  579,  605,  640,  667,  722,  768,  792,  906,  921,  954,
         1031, 1250, 1396, 1417, 1421, 1456, 1464, 1564, 1586, 1810, 1829,
         1861, 1940, 1978, 2014, 2021, 2049, 2107, 2192, 2240, 2354, 2757,
         2765, 2769, 2852, 2912, 2931, 3011, 3013, 3041, 3069, 3077, 3083,
         3094, 3108, 3140, 3151, 3159, 3160, 3165, 3200, 3222, 3280, 3298,
         3316, 3355, 3362, 3595, 3605, 3678, 3700, 3702, 3742, 3763, 3765,
         3767, 3774, 3779, 3790, 3819, 3825, 3839, 3840, 3895, 3906, 3974,
         3975, 3993, 4032, 4033, 4037, 4051, 4054, 4057, 4083, 4084, 4184,
         4298, 4300, 4301, 4306, 4316, 4347, 4348, 4376, 4384, 4431, 4472,
         4474, 4534, 4563, 4578, 4617, 4683, 4716, 4728, 4769, 4806, 4832,
         4863, 4882, 4908, 4944, 4953, 5043, 5051, 5054, 5061, 5150, 5278,
         5325, 5331, 5333, 5355, 5359, 5372, 5380, 5384, 5392, 5394, 5401,
         5410, 5413, 5430