In [10]:
import numpy as np
import cuml
from cuml import KMeans
from cuml.cluster import KMeans
import cudf
import sys
import pandas as pd

# Load original and predicted

In [19]:
processedData = np.load('/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/origAndPredictedSVD001_block7.npz')
data = processedData['arr_0']
#predicted = processedData['arr_1']
#residual = predicted - data

residual = np.diff(data)
nChannel, nSample = data.shape
residual = residual.transpose()
print(residual.shape)

(1100366, 19)


# Prepare unit norm residuals

In [12]:
residualNormalized = np.zeros_like(residual)
norms = np.linalg.norm(residual, axis=1)
for i in range(nSample):
    for j in range(nChannel):
        residualNormalized[i,j] = residual[i,j]/norms[i] 
print(residualNormalized.shape)

  residualNormalized[i,j] = residual[i,j]/norms[i]


(1100367, 19)


# Fit kmeans model

In [20]:
%%time
kmeansInputData = residual
n_clusters = 2**8

kmeans = KMeans(n_clusters=n_clusters, verbose=6)
kmeans.fit(kmeansInputData)
centroids = kmeans.cluster_centers_
print(kmeans.n_iter_)
#kmeans.labels_

101
CPU times: user 936 ms, sys: 22 ms, total: 958 ms
Wall time: 959 ms


# Save KMeans model

In [4]:
path = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/centroids_001_block7.npy'
np.save(path, centroids)

path = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/labels_001_block7.npy'
np.save(path, kmeans.labels_)

# Load CUML KMeans model

In [None]:
path = '/content/drive/MyDrive/NeuroResearch/Data/eegCompress/processedData/centroids_001_block7.npy'
centroids = np.load(path)
nCentroids,_ = centroids.shape
kmeansCuml = KMeans(n_clusters=nCentroids, init=centroids, n_init=1)

# Reconstruct the data

In [21]:
%%time
numSampleInput = 1
numSampleOutput = 1
_,failureIndex = data.shape

predicted = np.zeros_like(data)
predicted[:,0:numSampleInput] = data[:,0:numSampleInput]

for i in range(numSampleInput, nSample):
    if i % 100000 == 0:
        print(i)
        
    thisOutput = predicted[:,i-1]
    thisResidual = data[:,i] - thisOutput
    #thisResidual = thisResidual/np.linalg.norm(thisResidual) ####  here
    index = int(kmeans.predict(np.expand_dims(thisResidual, 0))[0])
    
    predicted[:,i] = thisOutput + centroids[index,:]
    #predicted[:,i] = thisOutput + (centroids[index,:] * np.linalg.norm(thisResidual)) # here
    
    if np.any(np.isnan(predicted[:,i])):
        failureIndex = i
        print("nan: " + str(failureIndex))
    

100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
CPU times: user 14min 3s, sys: 94.9 ms, total: 14min 3s
Wall time: 14min 7s


In [22]:
np.mean(np.abs(data - predicted)), np.max(np.abs(data - predicted))

(1.0253046, 7.686386)

In [16]:
np.mean(np.abs(data - predicted)), np.max(np.abs(data - predicted))

(0.044626124, 0.6939485)

# Save original and predicted

In [23]:
path = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/origAndPredictedLossy.npz'
dataToSaveList = [data[:,0:failureIndex], predicted[:, 0:failureIndex]]
np.savez_compressed(path, *dataToSaveList)

# Scratch