In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler

In [5]:
A = pd.read_csv("../assets/data/cleaned/A.csv", index_col=0)
B = pd.read_csv("../assets/data/cleaned/B.csv", index_col=0)
C = pd.read_csv("../assets/data/cleaned/C.csv", index_col=0)
D = pd.read_csv("../assets/data/cleaned/D.csv", index_col=0)
E = pd.read_csv("../assets/data/cleaned/E.csv", index_col=0)

In [62]:
def train_test_val_split(X, y, test_size, val_size=0.4):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=val_size)
    return X_train, X_test, X_val, y_train, y_test, y_val

def reshapeInput(X):
    X_ = []
    for x in X:
        X_.append(x.reshape(-1,1))
    return np.array(X_)

def trend(X, span = None):
    '''
    Return EWMA trend for signal samples
    '''
    if not span:
        span = X.shape[1]
    return pd.DataFrame(X).apply(lambda x: x.ewm(span=span).mean(), axis=1)

In [129]:
X = pd.concat([A,B,C,D,E], axis=0)
Y = [0]*A.__len__() + [1]*B.__len__() + [2]*C.__len__() + [3]*D.__len__() + [4]*E.__len__()

In [130]:
X_train, X_test, X_val, y_train, y_test, y_val = train_test_val_split(X, tf.keras.utils.to_categorical(Y), test_size=0.2)

In [131]:
np.sum(y_train, axis=0)

array([ 970.,  901., 1054.,  808., 1123.], dtype=float32)

In [133]:
X_train, y_train = RandomUnderSampler(random_state=42).fit_resample(X_train, y_train)



In [134]:
np.sum(y_train, axis=0)

array([808, 808, 808, 808, 808])

In [135]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20), input_shape=(24, 1)))
model.add(tf.keras.layers.Dense(12, activation='relu'))
model.add(tf.keras.layers.Dense(5, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [153]:
model.fit(reshapeInput(X_train.values), y_train, 
          epochs=120, batch_size=20,
          validation_data=(reshapeInput(X_val.values), y_val),
         callbacks=tf.keras.callbacks.EarlyStopping(patience=50, restore_best_weights=True))

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120


<tensorflow.python.keras.callbacks.History at 0x2d7fa1508c8>

In [154]:
print(confusion_matrix(np.argmax(y_test, axis=1),model.predict_classes(reshapeInput(X_test.values))))

[[125  26   3   5   0]
 [ 33  58   9  15   4]
 [  4   9 140  21   0]
 [  1  23  10  77  19]
 [  1  10   1  22 112]]




In [155]:
print(accuracy_score(np.argmax(y_test, axis=1),model.predict_classes(reshapeInput(X_test.values))))

0.7032967032967034




model.save("../assets/models/finalABCDE")

<b>
1. So the final model has an accuracy of 70 %.<br/><br/>
2. This was achieved primarily using data cleaning; Given more time, I would have liked to research dsp technique effectiveness instead of using a temporal modeling approach.<br/><br/>
3. We just need to load the model, and since the model learnt on cleaner data, we dont need to perform any feature extraction pipelines and can process on the input signal.<br/><br/>
4. We can use the autoencoders to first understand the signal pattern and bucket the signal; this can be used to re-inforce the model prediction.<br/><br/>
5. Lastly this was a lot of fun😁. Thanks.<br/>
</b>