In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from matplotlib import pyplot as plt
from matplotlib import animation, rc
from IPython.display import HTML
import random
import json
import keras
from keras.models import Model, Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Input, Dropout, GlobalAveragePooling1D, Reshape
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

Using TensorFlow backend.


In [2]:
with open("raw_data_marcelo_ab_14022019_1303.json", 'r') as f:
    raw_data = json.load(f)
    Accx = raw_data['Accx']
    Accy = raw_data['Accy']
    Accz = raw_data['Accz']
    Gyrx = raw_data['Gyrx']
    Gyry = raw_data['Gyry']
    Gyrz = raw_data['Gyrz']
    labels = raw_data['labels']

Checking normality.

In [3]:
from scipy import stats
for i in range(0, len(Accx)):
    print(stats.kstest(Accx[i], 'norm'))

KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=0.9787234042553191, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=0.9775280898876404, pvalue=0.0)
KstestResult(statistic=0.9871794871794872, pvalue=0.0)
KstestResult(statistic=0.9777777777777777, pvalue=0.0)
KstestResult(statistic=0.9868421052631579, pvalue=0.0)
KstestResult(statistic=0.9818181818181818, pvalue=0.0)
KstestResult(statistic=0.9873417721518988, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=0.9866666666666667, pvalue=0.0)
KstestResult(statistic=0.9893617021276596, pvalue=0.0)
KstestResult(statistic=0.9878048780487805, pvalue=0.0)
KstestResult(statistic=0.9878048780487805, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=0.9893617021276596, pvalue=0.0)
KstestResult(statistic=0.9864864864864865, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
KstestResult(statistic=1.0, pvalue=0.0)
Kstes

Normalizing to N(0,1)

In [4]:
Accx_calib = []
Accy_calib = []
Accz_calib = []
Gyrx_calib = []
Gyry_calib = []
Gyrz_calib = []
for i in range(0, len(Accx)):
    Accx_calib.append((Accx[i]-np.mean(Accx[i]))/np.std(Accx[i]))
    Accy_calib.append((Accy[i]-np.mean(Accy[i]))/np.std(Accy[i]))
    Accz_calib.append((Accz[i]-np.mean(Accz[i]))/np.std(Accz[i]))
    Gyrx_calib.append((Gyrx[i]-np.mean(Gyrx[i]))/np.std(Gyrx[i]))
    Gyry_calib.append((Gyry[i]-np.mean(Gyry[i]))/np.std(Gyry[i]))
    Gyrz_calib.append((Gyrz[i]-np.mean(Gyrz[i]))/np.std(Gyrz[i]))

In [5]:
longest_signal = 0
for signal in Accx_calib:
    if len(signal) > longest_signal:
        longest_signal = len(signal)

Increasing signals length to match the longest length.

In [6]:
processed_data = []
for i in range(0, len(Accx_calib)):
    len_current_signal = len(Accx_calib[i])
    for j in range(0, longest_signal - len_current_signal):
        rand_index = random.randint(1, len(Accx_calib[i])-1)
        Accx_calib[i] = np.insert(Accx_calib[i], rand_index, Accx_calib[i][rand_index])
        Accy_calib[i] = np.insert(Accy_calib[i], rand_index, Accy_calib[i][rand_index])
        Accz_calib[i] = np.insert(Accz_calib[i], rand_index, Accz_calib[i][rand_index])
        Gyrx_calib[i] = np.insert(Gyrx_calib[i], rand_index, Gyrx_calib[i][rand_index])
        Gyry_calib[i] = np.insert(Gyry_calib[i], rand_index, Gyry_calib[i][rand_index])
        Gyrz_calib[i] = np.insert(Gyrz_calib[i], rand_index, Gyrz_calib[i][rand_index])
    processed_data.append(np.concatenate([Accx_calib[i], Accy_calib[i], Accz_calib[i],
                                          Gyrx_calib[i], Gyry_calib[i], Gyrz_calib[i]]))

In [7]:
x = np.array(processed_data)
y = np.array(labels)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

Balancing training base.

In [8]:
sm = SMOTE()
X_train, y_train = sm.fit_sample(X_train, y_train)

In [9]:
pd.Series(y_train).value_counts()

B    101
A    101
dtype: int64

In [10]:
letters = np.unique(y_train).tolist()

In [11]:
y_train_encoded = []
for value in y_train:
    encoded_target = np.zeros(len(letters))
    encoded_target[letters.index(value)] = 1
    y_train_encoded.append(encoded_target)
y_train_encoded = np.array(y_train_encoded)

In [12]:
y_test_encoded = []
for value in y_test:
    encoded_target = np.zeros(len(letters))
    encoded_target[letters.index(value)] = 1
    y_test_encoded.append(encoded_target)
y_test_encoded = np.array(y_test_encoded)

In [13]:
y_train_encoded.shape

(202, 2)

In [14]:
model_m = Sequential()
model_m.add(Reshape((int(processed_data[0].shape[0]/6), 6), input_shape=processed_data[0].shape))
model_m.add(Conv1D(100, 10, activation='relu'))
model_m.add(Conv1D(100, 10, activation='relu'))
model_m.add(MaxPooling1D(3))
model_m.add(Conv1D(160, 10, activation='relu'))
model_m.add(Conv1D(160, 10, activation='relu'))
model_m.add(GlobalAveragePooling1D())
model_m.add(Dropout(0.5))
model_m.add(Dense(len(letters), activation='softmax'))
print(model_m.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_1 (Reshape)          (None, 132, 6)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 123, 100)          6100      
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 114, 100)          100100    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 38, 100)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 29, 160)           160160    
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 20, 160)           256160    
_________________________________________________________________
global_average_pooling1d_1 ( (None, 160)               0         
__________

In [15]:
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5',
        monitor='val_loss', save_best_only=True),
    keras.callbacks.EarlyStopping(monitor='val_acc', patience=10)
]

model_m.compile(loss='binary_crossentropy',
                optimizer='adam', metrics=['accuracy'])

BATCH_SIZE = 20
EPOCHS = 50

history = model_m.fit(X_train,
                      y_train_encoded,
                      batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=callbacks_list,
                      validation_split=0.2,
                      verbose=1)

Train on 161 samples, validate on 41 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50


In [16]:
predictions = model_m.predict(X_test)

In [17]:
predictions_labels = []
expected_labels = []
for i, prediction in enumerate(predictions):
    predictions_labels.append(np.argmax(prediction))
    expected_labels.append(np.argmax(y_test_encoded[i]))

In [18]:
print(classification_report(expected_labels, predictions_labels, target_names=letters))

             precision    recall  f1-score   support

          A       0.83      0.56      0.67         9
          B       0.88      0.97      0.92        31

avg / total       0.87      0.88      0.87        40

