In [43]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import plotly.express as px
import emd
tf.keras.mixed_precision.set_global_policy('mixed_float16')

BATCH_SIZE = 32
CHANNEL_NUMBER = 3
WINDOW_SIZE = 100
SLIDING_STEP = int(WINDOW_SIZE * 0.4)
KEY_CLASS = {0:'undefined action', 1:'up', 2:'down', 3:'left', 4:'right', 5:'quick touch'} 
CLASS_NUMBER = 5 # 0 is not a class
NUM_IMF = 3
LABEL_THRESHOLD = 0.7
BELIEF_THRESHOLD = 0.5

if CLASS_NUMBER < 2:
    CLASS_NUMBER = 2

In [172]:
def slicing(x, y):
    totalLength = x.shape[0]
    assert totalLength == y.shape[0], "Data numbers not matching with that of labels."
    if totalLength <= WINDOW_SIZE:
        return x, y

    label_dict = one_hot_label(y)
    reverse_label_dict = reverse_label(label_dict)
    y = one_hot(y, label_dict = label_dict)
    
    thresholdWindow = LABEL_THRESHOLD * WINDOW_SIZE
    retx = None
    rety = None
    retUnknown = None
    
    i = 0
    while (totalLength - i) > WINDOW_SIZE:
        new = (x[i:(i + WINDOW_SIZE), :])[np.newaxis, :]
        
        classSum = np.sum(y[i:(i + WINDOW_SIZE)], axis = 0)
        maxIdx = np.argmax(classSum)
        if maxIdx > 0 and classSum[maxIdx] > thresholdWindow:
            if not isinstance(retx, np.ndarray):
                retx = new.copy()
                rety = [reverse_label_dict[maxIdx]]
            retx = np.concatenate([retx, new], axis=0)
            rety.append(reverse_label_dict[maxIdx])
        else:
            if not isinstance(retUnknown, np.ndarray):
                retUnknown = new.copy()
            retUnknown = np.concatenate([retUnknown, new], axis=0)

        i += SLIDING_STEP
        
    ret_dict = {}
    for key in label_dict:
        if key != 0:
            ret_dict[key] = label_dict[key] - 1
        
    return retx, one_hot(rety, ret_dict), retUnknown, ret_dict, reverse_label_dict

def one_hot_label(arr):
    label_dict = {}
    index = 1
    for val in arr:
        if not label_dict.get(val, False):
            label_dict[val] = index
            index += 1
    return label_dict

def reverse_label(label_dict):
    reverse_label_dict = {}
    for key in label_dict:
        reverse_label_dict[label_dict[key] - 1] = key
    return reverse_label_dict

def one_hot(arr, label_dict = None):
    if not isinstance(label_dict, dict):
        label_dict = one_hot_label(arr)

    ret = []
    key_num = len(list(label_dict.keys()))
    for val in arr:
        tmp = [0] * key_num
        tmp[label_dict[val] - 1] = 1
        ret.append(np.array(tmp))
        
    return np.array(ret)    

def emdSignal(sig):
    dataNumber = sig.shape[0]
    channel = sig.shape[-1]
    ret = None
    
    for i in range(dataNumber):
        temp = None
        
        for c in range(channel):
            raw = sig[i, :, c]
            imf = emd.sift.sift(raw, max_imfs=NUM_IMF, imf_opts={'sd_thresh': 0.1})
            
            if imf.shape[-1] < NUM_IMF:
                compensate = np.zeros((WINDOW_SIZE, NUM_IMF - imf.shape[-1]))
                imf = np.concatenate([imf, compensate], axis = 1)
            
            if not type(temp) == np.ndarray: 
                temp = imf
            else: 
                temp = np.concatenate([temp, imf], axis = 1)
            
        if type(temp) == np.ndarray: 
            if not type(ret) == np.ndarray: 
                ret = temp[np.newaxis, :]
            else: 
                ret = np.concatenate([ret, temp[np.newaxis, :]], axis = 0)
                
    return ret

def buildModel(shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape = shape),
        tf.keras.layers.Conv1D(int(WINDOW_SIZE * 0.8 // 3), int(WINDOW_SIZE // 3), padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.MaxPool1D(padding='same'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv1D(int(WINDOW_SIZE // 3), int(WINDOW_SIZE * 1.2 // 3), padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.MaxPool1D(padding='same'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Conv1D(int(WINDOW_SIZE * 1.2 // 3), int(WINDOW_SIZE / 1.2 * 1.4 // 3), padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(axis=1),
        tf.keras.layers.MaxPool1D(padding='same'),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense((WINDOW_SIZE // 100) * 256, activation='relu'),
        tf.keras.layers.Dense(CLASS_NUMBER, activation='softmax')]
    )
    model.compile(optimizer='adam',
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=[tf.keras.metrics.CategoricalAccuracy()])
                        #  tf.keras.metrics.Precision(thresholds = 0.5),
                        #  tf.keras.metrics.Recall(thresholds= 0.5)])
    return model

In [173]:
#Load, preprocess and split record files
trainSignalFiles = ["./data/230202_l5m6r7_record_X.npy", "./data/230202_l5m6r7_record_2_X.npy"]
trainLabelFiles = ["./data/230202_l5m6r7_record_y.npy", "./data/230202_l5m6r7_record_2_y.npy"]

trainSignal, trainLabel = None, None
for sfp, lfp in zip(trainSignalFiles, trainLabelFiles):
    trainSignal = np.load(sfp) if not isinstance(trainSignal, np.ndarray) else np.concatenate([trainSignal, np.load(sfp)], axis=0)
    trainLabel = np.load(lfp) if not isinstance(trainLabel, np.ndarray) else np.concatenate([trainLabel, np.load(lfp)], axis=0)

X, y, X_unknown, label_dict, reverse_label_dict = slicing(trainSignal, trainLabel)

In [175]:
X_emd = emdSignal(X)
X_unknown_emd = emdSignal(X_unknown)

X_train, X_test, y_train, y_test = train_test_split(X_emd, y, test_size=0.2, random_state=777)

print(X_emd.shape)
print(X_unknown_emd.shape)
print(label_dict)
print(reverse_label_dict)

(1033, 100, 9)
(1992, 100, 9)
{3: 1, 4: 2, 5: 3, 1: 4, 2: 5}
{0: 0, 1: 3, 2: 4, 3: 5, 4: 1, 5: 2}


In [188]:
# (Optional) Transfer reverse_label_dict to text
for key in reverse_label_dict:
    reverse_label_dict[key] = KEY_CLASS[reverse_label_dict[key]]
reverse_label_dict

{0: 'undefined action',
 1: 'left',
 2: 'right',
 3: 'quick touch',
 4: 'up',
 5: 'down'}

In [128]:
ydf = pd.DataFrame(y)

In [129]:
ydf[lambda x:x[1] == 1].sample()

Unnamed: 0,0,1,2,3,4
556,0,1,0,0,0


In [37]:
px.line(X[63])

In [161]:
#Training
model = buildModel(X_train.shape[1:])
model.summary()
history = model.fit(x=X_train,
                    y=y_train,
                    batch_size=BATCH_SIZE,
                    epochs=20,
                    validation_data=[X_test, y_test])

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_18 (Conv1D)          (None, 100, 26)           7748      
                                                                 
 batch_normalization_18 (Bat  (None, 100, 26)          400       
 chNormalization)                                                
                                                                 
 max_pooling1d_18 (MaxPoolin  (None, 50, 26)           0         
 g1D)                                                            
                                                                 
 dropout_18 (Dropout)        (None, 50, 26)            0         
                                                                 
 conv1d_19 (Conv1D)          (None, 50, 33)            34353     
                                                                 
 batch_normalization_19 (Bat  (None, 50, 33)          

In [126]:
model = tf.keras.models.load_model('./model/LickingPark')

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [184]:
All_X = np.concatenate([X_emd, X_unknown_emd])
All_y = np.concatenate([y, np.array([[0] * CLASS_NUMBER for _ in range(X_unknown.shape[0])])])
res = model.predict(All_X)



In [190]:
pd.DataFrame([reverse_label_dict[np.argmax(x) + 1] if any(x) else reverse_label_dict[0] for x in All_y]).value_counts()

undefined action    1992
down                 247
left                 242
right                232
up                   216
quick touch           96
dtype: int64

In [191]:
pd.DataFrame([reverse_label_dict[np.argmax(x) + 1 if x[np.argmax(x)] > BELIEF_THRESHOLD else 0] for x in res]).value_counts()

down                903
up                  802
right               466
left                448
quick touch         250
undefined action    156
dtype: int64

In [193]:
gj = 0
bj = 0
for r, p in zip(All_y, res):
    rm = reverse_label_dict[np.argmax(r) + 1 if any(r) else 0]
    pm = reverse_label_dict[np.argmax(p) + 1 if p[np.argmax(p)] > BELIEF_THRESHOLD else 0]
    if rm == pm:
        gj += 1
    else:
        bj += 1
print("True: {}, False: {}, Accuracy: {:.4f}".format(gj, bj, gj / (gj + bj)))

True: 1067, False: 1958, Accuracy: 0.3527


In [162]:
f1 = np.array(history.history['loss']).flatten()
valf1 = np.array(history.history['val_loss']).flatten()
px.line(pd.DataFrame(np.array([f1, valf1]).T, columns=['loss', 'val_loss'])).show()

In [47]:
model.save("./model/LickingPark", save_format="tf")