In [11]:
%matplotlib widget
import numpy as np
import cuml
from cuml import KMeans
from cuml.cluster import KMeans
import cudf
import sys
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import pytz
timeZone = pytz.timezone('America/Los_Angeles')

import torch

import myUtils
import torchModels

In [2]:
def reconstruct(toPredict, predictor, kmeansModel, nStack):
    predicted = np.zeros_like(toPredict)
    predicted[:,1] = toPredict[:,1]

    #stackedSamples, _ = residualStack.shape
    _, nSamples = toPredict.shape
    counter = 1

    while counter + nStack <= nSamples:
        if counter % 100000 == 0:
            myPrint("predicting sample: " + str(counter))

        thisToPredict = toPredict[:, counter:counter + nStack].flatten(order='F')
        thisPredictor = np.repeat(predicted[:, counter - 1], nStack)
        thisResidual = thisToPredict - thisPredictor
        
        #kmeansModel.fit(np.expand_dims(thisResidual,0))
        
        index = int(kmeans.predict(np.expand_dims(thisResidual, 0))[0])
        thisCentroid = centroids[index,:]
        for j in range(nStack):
            predicted[:, counter] = data[:, counter - 1 - j] + thisCentroid[j * nChannel: (j+1) * nChannel].transpose()
            counter += 1
            
    return predicted, kmeansModel


def prepareResiduals(nStack, toPredict, predictor):

    _, nSample = toPredict.shape
    residualStack = np.zeros((nSample, nStack * nChannel))
    counter = 0
    for i in range(0, nSample, nStack):
        if i + nStack >= nSample:
            break
        thisPredictor = np.repeat(predictor[:,i].transpose(), nStack)
        thisToPredict = toPredict[:,i + 1: i + 1 + nStack].flatten(order='F').transpose()
        thisResidual = thisToPredict - thisPredictor
        residualStack[counter, :] = thisResidual
        counter += 1

    residualStack = residualStack[0:counter,:]
    return residualStack

# Load data

In [9]:
dataMultiply = 10**5
sFreq = 256
arrayInCompressedFile = 'arr_7'

data = np.load('/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/elimPeaksSVD001.npz')[arrayInCompressedFile]
nChannel, nSample = data.shape
data = (data * dataMultiply).astype('float32')
dataTensor = torch.tensor(data)
if torch.cuda.is_available():
    dataTensor = dataTensor.to('cuda')
    print("Data has been moved to GPU")
else:
    print("Data is on CPU")
print(data.shape)

Data has been moved to GPU
(19, 1100367)


In [14]:
# Set parameters
modelType = 'kmeans'
loadBool = 1
modelPath = '/blue/gkalamangalam/jmark.ettinger/eegCompress/models/savedModel_04-16 00:52_-0.023.pt'
numSampleInput = 20
numSampleOutput = 1

path = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/kmeansModels/kmeansModel_001_block7_1stack.npz'
npzfile = np.load(path)
centroids = npzfile['arr_0']
initDict = {'kmeansInit': centroids, 'dataTensor': dataTensor, 'numSampleInput':numSampleInput}

model, dataset, loss_function = torchModels.makeModel(modelType, initDict)

In [25]:
centroids = model.kmeans.float().detach().numpy()
nCentroids,_ = centroids.shape
kmeans = KMeans(n_clusters=nCentroids, init=centroids, n_init=1)

# Reconstruct the data from stacked residuals and centroids

In [8]:
predicted = data

In [9]:
%%time
iterations = 5
nStack = 1

for i in range(iterations):
    myPrint("reconstruct iteration: " + str(i))
    predicted, kmeans = reconstruct(data, predicted, kmeans, nStack)
    
    myPrint(str((np.mean(np.abs(data - predicted)), np.max(np.abs(data - predicted)))))
    
    '''
    residuals = (data[:,1:] - predicted[:,0:-1]).transpose()
    if i == 0:
        residuals = residuals[n_clusters,:]
    '''
    
    kmeans.fit(residuals)
    
    # save kmeans model
    directory = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/kmeansModels/'
    time = str(datetime.datetime.now().astimezone(timeZone).strftime('%m-%d %H:%M'))
    filename = 'kmeansModel_stack' + str(nStack) + '_' +  time + '.npz'
    path = directory + filename
    centroids = kmeans.cluster_centers_
    labels = kmeans.labels_
    np.savez(path, centroids, labels)
    
    # save reconstructed data
    filename = 'predicted_' +  time + '.npz'
    path = directory + filename
    np.savez(path, predicted)
    

04-12 13:14: reconstruct iteration: 0
04-12 13:17: predicting sample: 100000
04-12 13:19: predicting sample: 200000
04-12 13:22: predicting sample: 300000
04-12 13:24: predicting sample: 400000
04-12 13:27: predicting sample: 500000
04-12 13:30: predicting sample: 600000
04-12 13:32: predicting sample: 700000
04-12 13:35: predicting sample: 800000
04-12 13:37: predicting sample: 900000
04-12 13:40: predicting sample: 1000000
04-12 13:42: predicting sample: 1100000
04-12 13:42: (0.086687125, 18.781786)
04-12 13:43: reconstruct iteration: 1
04-12 13:46: predicting sample: 100000
04-12 13:48: predicting sample: 200000
04-12 13:51: predicting sample: 300000
04-12 13:53: predicting sample: 400000
04-12 13:56: predicting sample: 500000
04-12 13:58: predicting sample: 600000
04-12 14:01: predicting sample: 700000
04-12 14:03: predicting sample: 800000
04-12 14:06: predicting sample: 900000
04-12 14:08: predicting sample: 1000000
04-12 14:11: predicting sample: 1100000
04-12 14:11: (0.10178884

In [None]:
np.mean(np.abs(data - predicted)), np.max(np.abs(data - predicted))

 current best: (0.044626124, 0.6939485)

In [None]:
channel = 0

plt.figure()
plt.plot(predicted[channel,:], label='predicted')
plt.plot(data[channel,:], label='original')
residual = data - predicted
#plt.plot(residual[channel,:])
plt.legend()
plt.show()

# Save original and predicted

In [None]:
path = '/blue/gkalamangalam/jmark.ettinger/eegCompress/processedData/origAndPredictedLossy.npz'
dataToSaveList = [data[:,0:failureIndex], predicted[:, 0:failureIndex]]
np.savez_compressed(path, *dataToSaveList)

# Scratch