In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import os
import itertools
import keras
from keras.models import load_model, Model, Sequential
from keras.layers import LSTM, Dense, Conv2D, Flatten, Dropout, BatchNormalization, MaxPooling2D
from keras.models import Sequential
from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import numpy as np

Using TensorFlow backend.


In [20]:
def get_intersection(lst1, lst2): 
    return [value for value in lst1 if value in lst2] 

In [21]:
def get_scaled_data(X_train, X_test, data_shape):
    
    scaler = preprocessing.StandardScaler().fit(X_train.reshape(X_train.shape[0], -1))
    X_train = scaler.transform(X_train.reshape(X_train.shape[0], -1))
    X_test = scaler.transform(X_test.reshape(X_test.shape[0], -1))
    X_train = X_train.reshape(X_train.shape[0], data_shape[1], data_shape[2])
    X_test = X_test.reshape(X_test.shape[0], data_shape[1], data_shape[2])
    
    return X_train, X_test

In [22]:
def get_true_cls(labels, sel_cls):
    
    for i, lbl in enumerate(sel_cls):
        labels[labels == i] = lbl
        
    return labels

In [23]:
def lbls_for_cls(labels, lbls_list=None):
    new_labels = [i for i in range(len(lbls_list))]
    for i, lbl in enumerate(lbls_list):
        labels[labels == lbl] = new_labels[i]

    return labels

In [24]:
def get_model_str(sel_cls, hidden_1=64, data_mode='amp', win_len=512):
    model_str = ''
    for cls in sel_cls:
        model_str += str(cls) + '_'
    
    model_str += data_mode + '_' + str(win_len) + '_hidden_' + str(hidden_1) 
    #model_str += '_' + str(int(validation_split*100)) if validation_split else '_None' 
    return model_str

In [25]:
def get_cls_data(data, labels, sel_cls):
    labels_idx = []
    for cls in sel_cls:
        labels_idx += np.argwhere(labels == cls).flatten().tolist()

    sel_labels = [labels[idx] for idx in labels_idx]
    sel_data = [data[idx] for idx in labels_idx]
    labels = np.array(sel_labels)
    data = np.array(sel_data)
    data = data.astype(np.float32) 
    
    return data, labels

In [36]:
def get_downsampled_points(data, labels, target_cls=[1, 2, 3]):
    
    subset_idx = {}
    downsampled_idx = []
    
    model_dir = '/scratch/sk7898/pedbike/models/lstm/'
    cls_str_list = ['1_2', '1_3', '2_3']
    sel_cls_list = [[1, 2], [1, 3], [2, 3]]
     
    for idx, (cls_str, sel_cls) in enumerate(zip(cls_str_list, sel_cls_list)):
        X, y = get_cls_data(data, labels, sel_cls)

        model_str = os.path.join(cls_str + '_amp_512_hidden_128/best_model.h5')
        model_path = os.path.join(model_dir, model_str)

        # Load the model to predict the count class
        model = load_model(model_path) 
        pred = model.predict(x=X)
        cls_pred = np.argmax(pred, axis = 1)
        y_pred = get_true_cls(cls_pred, sel_cls)
        
        for cls in sel_cls:
            if cls not in subset_idx.keys():
                subset_idx[cls] = []
                subset_idx[cls] = [i for i, (x, y) in enumerate(zip(y, y_pred)) if x == y]
            else:
                c_idx = [i for i, (x, y) in enumerate(zip(y, y_pred)) if x == y]
                subset_idx[cls] = get_intersection(subset_idx[cls], c_idx)
                
    for key in subset_idx.keys():
        downsampled_idx += subset_idx[key]
    
    downsampled_idx = list(dict.fromkeys(downsampled_idx))
    sel_labels = [labels[idx] for idx in downsampled_idx]
    sel_data = [data[idx] for idx in downsampled_idx]
    labels = np.array(sel_labels)
    data = np.array(sel_data).astype(np.float32) 
    
    return data, labels

In [39]:
def get_data(sel_cls,
             data_mode='amp', 
             task_type='cls',
             downsample=True,
             scaling=True):
    
    data_dir = '/scratch/sk7898/pedbike/fft_data'
    data_path = os.path.join(data_dir, 'Data_win_fft.npy')
    labels_path = os.path.join(data_dir, 'label_win_fft.npy')
    seqs_path = os.path.join(data_dir, 'seqs_fft.npy')
    data = np.load(data_path, allow_pickle=True) #shape: (18642, 256, 5)
    labels = np.load(labels_path, allow_pickle=True) #shape: (18642,)

    n_data = data.swapaxes(1, 2)
    amp_data = np.absolute(n_data)
    phase_data = np.angle(n_data)
    power_data = np.absolute(n_data)**2
    real_data = np.real(n_data)
    imag_data = np.imag(n_data)
    
    if data_mode == 'amp':
        data = amp_data
    elif data_mode == 'phase':
        data = phase_data
    elif data_mode == 'power':
        data == power_data

    data, labels = get_cls_data(data, labels, sel_cls)

    if downsample:
        X_train, X_test, y_train, y_test = train_test_split(data,
                                                            labels,
                                                            test_size=0.1,
                                                            random_state=42)
        if scaling:
            X_train, X_test = get_scaled_data(X_train, X_test, data_shape=n_data.shape)
        
        X_train, y_train = get_downsampled_points(X_train,
                                                  y_train,
                                                  target_cls=sel_cls)
                
        y_train = lbls_for_cls(y_train, lbls_list=sel_cls)
        y_test = lbls_for_cls(y_test, lbls_list=sel_cls)
        y_train = y_train.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

    else:
        labels = lbls_for_cls(labels, lbls_list=sel_cls)
        labels = labels.reshape(-1, 1)

        X_train, X_test, y_train, y_test = train_test_split(data,
                                                            labels,
                                                            test_size=0.1,
                                                            random_state=42)
    
        if scaling:
            X_train, X_test = get_scaled_data(X_train, X_test, data_shape=n_data.shape)

    return X_train, X_test, y_train, y_test

In [41]:
def build_lstm_fft_model(hidden_1,
                         counting_dense_1,
                         counting_dense_2,
                         kernel_initializer='normal',
                         dropout_1=None,
                         dropout_2=None,
                         optimizer=None,
                         input_shape=(5, 256),
                         n_classes=2):
    
    model = Sequential()
    model.add(LSTM(hidden_1, return_sequences=False, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dense(counting_dense_1, activation='relu', name='counting_dense_1'))
    model.add(Dropout(dropout_1))
    model.add(Dense(counting_dense_2, activation='relu', name='counting_dense_2'))
    model.add(Dropout(dropout_2))
    model.add(Dense(n_classes, activation='softmax', name='output'))
    model.compile(loss='sparse_categorical_crossentropy', 
                  optimizer=optimizer, 
                  metrics=['sparse_categorical_accuracy'])
        
    return model

In [42]:
def build_conv2d_fft_model(filters_1,
                           counting_dense_1,
                           counting_dense_2,
                           kernel_initializer='normal',
                           dropout=None,
                           optimizer=None,
                           input_shape=(5, 256, 1),
                           task_type=None,
                           n_classes=2):
    
    model = Sequential()
    model.add(Conv2D(filters_1, kernel_size=(2, 8), strides=(1, 8), data_format='channels_last', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(counting_dense_1, activation='relu', name='counting_dense_1'))
    model.add(Dropout(dropout))
    model.add(Dense(counting_dense_2, activation='relu', name='counting_dense_2'))
    model.add(Dense(n_classes, activation='softmax', name='output'))
    model.compile(loss='sparse_categorical_crossentropy', 
                  optimizer=optimizer, 
                  metrics=['sparse_categorical_accuracy'])
        
    return model

In [47]:
epochs = 60
batch_size = 64
learning_rate = 1e-4
dropout_1 = 0.3
dropout_2 = 0.3
hidden_1 = 128
filters_1 = 64
counting_dense_1 = 256
counting_dense_2 = 64

cls_list = [[1, 2, 3]] #[2, 4], [1, 2, 3], [1, 2, 3, 4]
model_type = 'lstm'
data_mode = 'amp'
model_dir = '/scratch/sk7898/pedbike/models'
downsample = True

for sel_cls in cls_list:
    model_str = get_model_str(sel_cls, hidden_1=hidden_1, data_mode='amp', win_len=512)
    model_path = os.path.join(model_dir, model_type, model_str)
    
    if not os.path.isdir(model_path):
        os.makedirs(model_path)
                
    X_train, X_test, y_train, y_test = get_data(sel_cls=sel_cls,
                                                data_mode='amp',
                                                downsample=downsample)

    optimizer = keras.optimizers.Adam(lr=learning_rate)
    if model_type == 'conv':
        X_train = X_train[:, :, :, np.newaxis]
        X_test = X_test[:, :, :, np.newaxis]
    
        model = build_conv2d_fft_model(filters_1,
                                       counting_dense_1,
                                       counting_dense_2,
                                       dropout_1=dropout_1,
                                       optimizer=optimizer,
                                       n_classes=len(sel_cls),
                                       input_shape=(5, 256, 1))
    else:
        model = build_lstm_fft_model(hidden_1,
                                     counting_dense_1,
                                     counting_dense_2,
                                     dropout_1=dropout_1,
                                     dropout_2=dropout_2,
                                     optimizer=optimizer,
                                     input_shape=(5, 256),
                                     n_classes=len(sel_cls))    
    
    H_train = model.fit(x=X_train,
                        y=y_train,
                        batch_size=batch_size,
                        validation_split=0.1,
                        epochs=epochs,
                        shuffle=True)
    
    
    model.save(os.path.join(model_path, 'latest_model_downsample.h5'))
    evaluations = model.evaluate(x=X_test, y=y_test)    
    pred = model.predict(x=X_test)
    cls_pred = np.argmax(pred, axis = 1)
    mae = mean_absolute_error(y_test, cls_pred)
    print('Accuracy: {} MAE: {}'.format(evaluations[1], mae))

Train on 4092 samples, validate on 455 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60


Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Accuracy: 0.7117465138435364 MAE: 0.34853168469860896
