In [None]:
%config IPCompleter.greedy=True

In [None]:
from IPython.display import SVG, display

from keras import backend as K
from keras.initializers import glorot_uniform
from keras.models import Model 
from keras.layers import Input, Dense 
from keras.utils import np_utils
from keras.utils.vis_utils import model_to_dot
from keras.callbacks import LambdaCallback

import math
import numpy as np
import os
import pandas as pd
from pathlib import Path
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf

BASE_FOLDER = Path('../data')
sessions = {0: 22, 1: 153, 2: 153}

RANDOM_STATE = 42

## Reproducibility

In [None]:
# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
tf.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

my_init = glorot_uniform(seed_value)

## Helper functions

In [None]:
def get_csv(session, split):
    return 'zju_gaitaccel_session_' + str(session) + '_' + str(split) + '.csv'

def load_session_data(session, split):
    filename = get_csv(session, split)
    df = pd.read_csv(BASE_FOLDER.joinpath(Path(filename)), header=None)
    y = df[df.columns[59]].values
    df.drop([df.columns[59]], axis=1, inplace=True)
    return df.values, y

## Training

In [None]:
# Dataset parameters
SESSION = 1
SPLIT = 128
INPUT_SIZE = 59

X, y = load_session_data(SESSION, SPLIT)
num_classes = sessions[SESSION]
X = np_utils.normalize(X)
y = LabelEncoder().fit_transform(y)

In [None]:
input_frame = Input(shape=(INPUT_SIZE, ))
encoded = Dense(32, activation='relu')(input_frame)
encoded = Dense(16, activation='relu')(encoded)
encoded = Dense(8, activation='relu', name="encoded")(encoded)
decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(32, activation='relu')(decoded)
decoded = Dense(INPUT_SIZE, activation='sigmoid')(decoded)

## Separator loss function

In [None]:
# https://stackoverflow.com/questions/53235029/issue-of-batch-sizes-when-using-custom-loss-functions-in-keras
def generate_batches(X, batch_size):
    currIdx = 0
    try:
        Xrows, Xcols = X.shape
    except ValueError:
        Xrows = X.shape[0]
        Xcols = 1
    batch_x = np.zeros((batch_size, Xcols))
    while True:
        for i in range(batch_size):
            batch_x[i] = X[currIdx]
            currIdx += 1
            if currIdx >= Xrows:
                currIdx = 0
        yield batch_x, batch_x

# https://stackoverflow.com/questions/45961428/make-a-custom-loss-function-in-keras
def sep_loss(inter_layer):
    def sep(X_true, X_pred):
        # Get the lower dimensional representation of the data
        X_kdim = inter_layer.output
        
        # Get batch labels        
        y = K.flatten(labels)
        unique_y, _ = tf.unique(y)
        
        def diffs_per_label(label):
            all_class = tf.boolean_mask(X_kdim, K.flatten((y == label)))
            print(all_class)
            return K.foldl(lambda diff, x: 
                           diff + K.reshape(K.sum(K.batch_dot(all_class - x, all_class - x, axes=1), axis=0), ()), 
                           all_class, 
                           initializer=0.0)
        
        intra = K.foldl(lambda acc, label: acc + diffs_per_label(label), unique_y, initializer=0.0)
        
        mse = K.mean(K.square(X_true - X_pred))
        return intra + mse
    return sep

In [None]:
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=RANDOM_STATE)

BATCH_SIZE = 128
NUM_TRAIN_SAMPLES = X_train.shape[0]
NUM_TEST_SAMPLES = X_test.shape[0]

autoencoder = Model(input_frame, decoded)
print(autoencoder.summary())
# display(SVG(model_to_dot(autoencoder).create(prog='dot', format='svg')))
inter_layer = autoencoder.get_layer('encoded')
# display(SVG(model_to_dot(x_model).create(prog='dot', format='svg')))

labels = K.variable(np.empty((BATCH_SIZE, 1)), dtype='int32', name='labels')
train_labels = generate_batches(y_train, BATCH_SIZE)
test_labels = generate_batches(y_test, BATCH_SIZE)
def changeLabels(epoch, logs):
    y_input = K.in_train_phase(train_labels, test_labels)
    y, _ = next(y_input)
    K.set_value(labels, y)

labelChanger = LambdaCallback(on_epoch_start=changeLabels)
autoencoder.compile(optimizer='sgd',
                    loss=sep_loss(inter_layer))

In [None]:
history = autoencoder.fit_generator(generate_batches(X_train, BATCH_SIZE),
                epochs=10,
                steps_per_epoch=math.ceil(NUM_TRAIN_SAMPLES / BATCH_SIZE),
                callbacks = [labelChanger],
                validation_data=generate_batches(X_test, BATCH_SIZE),
                validation_steps=math.ceil(NUM_TEST_SAMPLES / BATCH_SIZE))

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()