In [2]:
import pandas as pd
import pickle

from __future__ import print_function
import numpy as np
import h5py
import glob
import math
import os
import shutil
from scipy import signal
from scipy.signal import butter, lfilter
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Conv1D, MaxPooling1D, UpSampling1D, Flatten, Dropout, Reshape, GaussianNoise, Cropping2D 
from keras.layers import Bidirectional, BatchNormalization, ZeroPadding1D, ZeroPadding2D, Conv2DTranspose
from keras.models import Model, Sequential
from keras import backend as K
from tensorflow.keras.optimizers import SGD, Adam
from keras import regularizers
from tensorflow.keras.layers import Layer, InputSpec
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping
from keras.initializers import VarianceScaling
from keras.callbacks import CSVLogger
from scipy.optimize import linear_sum_assignment as linear_assignment
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import metrics
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig

In [3]:
time = np.arange(0, 86.4, 0.00001);
sine = np.sin(time)
noise = np.random.normal(0,0.1,8640000)
noisy_sine = sine + noise

zeros = [0] * 8640000
sine_noise_by_time = [[noisy_sine[i], zeros[i], zeros[i]] for i in range(len(noisy_sine))]

stream = [noisy_sine, zeros, zeros]

# step_size = 1000 # originally 16, try to increase to 1000 to represent 10 seconds
# batched_stream_arr_by_time = [sine_noise_by_time[i:i + step_size] for i in range(0, len(sine_noise_by_time), step_size)]

In [4]:
from scipy import signal
freq, time, stft = signal.stft(noisy_sine, padded=False, nperseg = 2000 - 1)

In [5]:
stft_stream = np.asarray([np.asarray([np.asarray([stft[i][j], 0, 0]) for j in range(len(stft[i]))]) for i in range(len(stft))])
# stft_stream

In [6]:
len(stft_stream[0])

8640

In [7]:
step_size = len(stft)
step_size

1000

In [8]:
len(stft[0])

8640

##### documentation: https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.conv2d

In [14]:
input_shape = (len(stft[0]), 3, 1)
batch_size = 150
no_epochs = 5
train_test_split = 0.3
validation_split = 0.2
verbosity = 1
max_norm_value = 2.0

In [13]:
# model = Sequential()
# model.add(Conv1D(128, kernel_size=3, kernel_constraint=max_norm(max_norm_value), activation='relu', kernel_initializer='he_uniform', input_shape=input_shape))
# model.add(Conv1D(32, kernel_size=3, kernel_constraint=max_norm(max_norm_value), activation='relu', kernel_initializer='he_uniform'))
# model.add(Conv1DTranspose(32, kernel_size=3, kernel_constraint=max_norm(max_norm_value), activation='relu', kernel_initializer='he_uniform'))
# model.add(Conv1DTranspose(128, kernel_size=3, kernel_constraint=max_norm(max_norm_value), activation='relu', kernel_initializer='he_uniform'))
# model.add(Conv1D(1, kernel_size=3, kernel_constraint=max_norm(max_norm_value), activation='sigmoid', padding='same'))

# inp = Input(shape)
model = Sequential()
model.add(Conv2D(8, (3, 3), activation='relu', padding='same', input_shape=input_shape))
# model.add(MaxPooling2D((10, 3), padding='same'))
model.add(Conv2D(4, (3, 3), activation='relu', padding='same'))
# model.add(MaxPooling2D((8, 1), padding='same'))
model.add(Conv2D(2, (3, 3), activation='relu', padding='same'))
# model.add(MaxPooling2D((6, 1), padding='same'))

model.add(Conv2DTranspose(2, (3, 3), activation='relu', padding='same'))
# model.add(UpSampling2D((6, 1)))
model.add(Conv2DTranspose(4, (3, 3), activation='relu', padding='same'))
# model.add(UpSampling2D((8, 1)))
model.add(Conv2DTranspose(8, (3, 3), activation='relu', padding='same'))
# model.add(UpSampling2D((10, 3)))
model.add(Conv2D(1, kernel_size=(3, 3), activation='sigmoid', padding='same'))


model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 67501, 3, 8)       80        
                                                                 
 conv2d_4 (Conv2D)           (None, 67501, 3, 4)       292       
                                                                 
 conv2d_5 (Conv2D)           (None, 67501, 3, 2)       74        
                                                                 
 conv2d_transpose_3 (Conv2DT  (None, 67501, 3, 2)      38        
 ranspose)                                                       
                                                                 
 conv2d_transpose_4 (Conv2DT  (None, 67501, 3, 4)      76        
 ranspose)                                                       
                                                                 
 conv2d_transpose_5 (Conv2DT  (None, 67501, 3, 8)     

In [20]:
inp = Input(shape=(len(stft[0]), 3, 1))  

e = GaussianNoise(0.2)(inp)
# e = Cropping2D(cropping=((0, 0), (1, 0)), data_format = None)(e)

e = Conv2D(8, (3, 3), activation='selu', padding='same')(e)
e = MaxPooling2D((2, 3), padding='same')(e)
e = Conv2D(4, (3, 3), activation='selu', padding='same')(e)
e = MaxPooling2D((2, 1), padding='same')(e)
e = Conv2D(2, (3, 3), activation='selu', padding='same')(e)
e = MaxPooling2D((2, 1), padding='same')(e)

shape_before_flattening = K.int_shape(e)
encoded = Flatten()(e)
# encoded = e
d = Reshape(shape_before_flattening[1:])(encoded)
# d = encoded

d = Conv2D(2, (3, 3), activation='selu', padding='same')(d)
d = UpSampling2D((2, 1))(d)
d = Conv2D(4, (3, 3), activation='selu', padding='same')(d)
d = UpSampling2D((2, 1))(d)
d = Conv2D(8, (3, 3), activation='selu', padding='same')(d)
d = UpSampling2D((2, 3))(d)
# d = ZeroPadding2D((0, 1))(d)
# d = Cropping2D(cropping=((0, 0), (2499, 0)), data_format = None)(d)

decoded = Conv2D(1, (3, 3), padding='same')(d)


autoencoder = Model(inputs=inp, outputs=decoded, name='autoencoder')
encoder = Model(inputs=inp, outputs=encoded, name='encoder')

autoencoder.compile(loss="mse", optimizer='adam')
csv_logger = CSVLogger('pretrain_log.csv')

autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 8640, 3, 1)]      0         
                                                                 
 gaussian_noise_2 (GaussianN  (None, 8640, 3, 1)       0         
 oise)                                                           
                                                                 
 conv2d_14 (Conv2D)          (None, 8640, 3, 8)        80        
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 4320, 1, 8)       0         
 2D)                                                             
                                                                 
 conv2d_15 (Conv2D)          (None, 4320, 1, 4)        292       
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 2160, 1, 4)       

In [21]:
autoencoder.fit(stft_stream, stft_stream, batch_size=128, epochs=2, callbacks=[csv_logger])

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f9f7f9772e0>

In [None]:
pred = encoder.predict(stft_stream)


In [18]:
kmeans = KMeans(n_clusters = 2)
kmeans.fit(pred)

ValueError: Found array with dim 4. Estimator expected <= 2.

In [14]:
class ClusteringLayer(Layer):
    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=4)

    def build(self, input_shape):
        assert len(input_shape) == 4
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) 
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 4
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    
print('...Finetuning...')
n_clusters = 2
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=[encoder.input], outputs=[clustering_layer, autoencoder.output])
model.compile(loss='binary_crossentropy', optimizer='adam')

# from keras.utils import plot_model
# plot_model(model, to_file='model.png', show_shapes=True)
# from IPython.display import Image
# Image(filename='model.png')

...Finetuning...


ValueError: Exception encountered when calling layer "clustering" (type ClusteringLayer).

in user code:

    File "<ipython-input-14-504be8e1be60>", line 22, in call  *
        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))

    ValueError: Dimensions must be equal, but are 2 and 1080 for '{{node clustering/sub}} = Sub[T=DT_FLOAT](clustering/ExpandDims, clustering/sub/ReadVariableOp)' with input shapes: [?,1,1080,1,2], [2,1080].


Call arguments received:
  • inputs=tf.Tensor(shape=(None, 1080, 1, 2), dtype=float32)
  • kwargs={'training': 'None'}