In [1]:
import keras
import keras.backend as K
from keras.layers import LSTM,Dropout,Dense,TimeDistributed,Conv1D,MaxPooling1D,Flatten
from keras.models import Sequential
import tensorflow as tf
import gc
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
import pyarrow.parquet as pq
import pandas as pd
import numpy as np

In [3]:
train_set = pq.read_pandas('C:\\Users\\srikanthkarri\\Downloads\\train.parquet').to_pandas()

In [6]:
def feature_extractor(x, n_part=1000):
    length = len(x)
    pool = np.int32(np.ceil(length/n_part))
    output = np.zeros((n_part,))
    for j, i in enumerate(range(0,length, pool)):
        if i+pool < length:
            k = x[i:i+pool]
        else:
            k = x[i:]
        output[j] = np.max(k, axis=0) - np.min(k, axis=0)
    return output

In [4]:
meta_train = pd.read_csv('C:\\Users\\srikanthkarri\\Downloads\\metadata_train.csv')

In [23]:
x_train = []
y_train = []
for i in tqdm(meta_train.signal_id):
    idx = meta_train.loc[meta_train.signal_id==i, 'signal_id'].values.tolist()
    y_train.append(meta_train.loc[meta_train.signal_id==i, 'target'].values)
    x_train.append(abs(feature_extractor(train_set.iloc[:, idx].values, n_part=400)))

100%|██████████| 8712/8712 [01:28<00:00, 98.17it/s] 


In [8]:
y_train_1 = np.array(y_train).reshape(-1,)
X_train_1 = np.array(x_train).reshape(-1,x_train[0].shape[0])

In [9]:
n_signals = 1 #So far each instance is one signal. We will diversify them in next step
n_outputs = 1 #Binary Classification

In [10]:
#Build the model
verbose, epochs, batch_size = True, 15, 16
n_steps, n_length = 40, 10
X_train_1 = X_train_1.reshape((X_train_1.shape[0], n_steps, n_length, n_signals))
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_signals)))
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(100))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))

In [11]:
model.compile(loss='binary_crossentropy', optimizer='adam')

In [12]:
model.fit(X_train_1, y_train_1, epochs=epochs, batch_size=batch_size, verbose=verbose)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1ab86fb5af0>

In [33]:
train_set

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,8702,8703,8704,8705,8706,8707,8708,8709,8710,8711
0,18,1,-19,-16,-5,19,-15,15,-1,-16,...,18,-22,12,8,13,6,-21,-15,-9,20
1,18,0,-19,-17,-6,19,-17,16,0,-15,...,17,-21,12,8,14,7,-19,-15,-8,21
2,17,-1,-20,-17,-6,19,-17,15,-3,-15,...,16,-21,13,8,15,8,-18,-14,-8,22
3,18,1,-19,-16,-5,20,-16,16,0,-15,...,16,-21,12,8,15,8,-19,-14,-7,23
4,18,0,-19,-16,-5,20,-17,16,-2,-14,...,17,-22,12,8,15,8,-18,-14,-8,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
799995,19,2,-18,-15,-4,21,-16,16,-1,-17,...,18,-20,12,9,14,6,-20,-14,-7,22
799996,19,1,-19,-15,-4,20,-17,15,-3,-18,...,18,-20,13,9,14,6,-20,-13,-7,23
799997,17,0,-19,-15,-4,21,-16,14,-2,-18,...,18,-20,13,9,13,6,-20,-14,-7,23
799998,19,1,-18,-14,-3,22,-16,17,-1,-17,...,19,-19,13,10,14,6,-20,-14,-8,22


In [13]:
import numpy as np
import pandas as pd
from scipy import signal
import librosa
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Dropout, Flatten, TimeDistributed

In [14]:
def create_spectrogram(data, window_size=32, step_size=16, fs=100):
    f, t, Sxx = signal.spectrogram(data, fs=fs, window=('tukey', window_size), nperseg=window_size, noverlap=window_size - step_size)
    return Sxx.T

In [15]:
def generate_spectrogram_dataset(df, window_size=32, step_size=16):
    X = []
    for i in range(len(df)):
        signal_data = df[i]
        spectrogram = create_spectrogram(signal_data, window_size=window_size, step_size=step_size)

        X.append(spectrogram)    
    return np.array(X)

In [16]:
X_final = generate_spectrogram_dataset(x_train)

In [17]:
len(X_final)

8712

In [56]:
X_train_final = X_train_final.reshape((X_train_final.shape[0], X_train_final.shape[1], X_train_final.shape[2], 1))

In [57]:

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train CNN-LSTM model
#model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))


In [58]:
X_train_final = X_final[:7842]
X_test = X_final[7842:]

In [59]:
model.fit(X_train_final, y_train_final, epochs=10, batch_size=batch_size, verbose=verbose,validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ab98a4b430>