In [5]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

all_data_df = pd.read_csv('./datasets/heterogenity/original/dataset_50_2.5.csv', header=None)
all_label_df = pd.read_csv('./datasets/heterogenity/original/dataset_labels_50_2.5.csv', names=["user", "model", "gt"])

dataset_df = pd.concat([all_data_df,all_label_df], axis=1)

train_dataset_df = dataset_df.loc[(dataset_df['user'] != 'a') & (dataset_df['user'] != 'b')] 
train_reference_df = pd.get_dummies(train_dataset_df, columns=['gt'])

test_dataset_df = dataset_df.loc[(dataset_df['user'] == 'a') | (dataset_df['user'] == 'b')] 
test_reference_df = pd.get_dummies(test_dataset_df, columns=['gt'])


In [6]:

def print_dataset_statistics(train_reference_df, test_reference_df):

    # Count the elements in the sets
    num_train_data = (len(train_reference_df))
    num_train_data_sit = sum(train_reference_df['gt_sit'] == 1)
    num_train_data_stand = sum(train_reference_df['gt_stand'] == 1)
    num_train_data_walk = sum(train_reference_df['gt_walk'] == 1)
    num_train_data_bike = sum(train_reference_df['gt_bike'] == 1)
    num_train_data_stairs_up = sum(train_reference_df['gt_stairsup'] == 1)
    num_train_data_stairs_down = sum(train_reference_df['gt_stairsdown'] == 1)

    num_test_data = (len(test_reference_df))
    num_test_data_sit = sum(test_reference_df['gt_sit'] == 1)
    num_test_data_stand = sum(test_reference_df['gt_stand'] == 1)
    num_test_data_walk = sum(test_reference_df['gt_walk'] == 1)
    num_test_data_bike = sum(test_reference_df['gt_bike'] == 1)
    num_test_data_stairs_up = sum(test_reference_df['gt_stairsup'] == 1)
    num_test_data_stairs_down = sum(test_reference_df['gt_stairsdown'] == 1)

    total_df_data = num_train_data + num_test_data

    print('TRAIN SET')
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_train_data_stand, 100 * num_train_data_stand / len(train_reference_df)))
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_train_data_sit, 100 * num_train_data_sit / len(train_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_train_data_walk, 100 * num_train_data_walk / len(train_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_train_data_bike, 100 * num_train_data_bike / len(train_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_train_data_stairs_up, 100 * num_train_data_stairs_up / len(train_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_train_data_stairs_down, 100 * num_train_data_stairs_down / len(train_reference_df)))
    print('')
    print('\tPercentage of total\t{} ({:.2f}%)'.format(num_train_data, 100 * num_train_data/ total_df_data))
    print('')

    print('TEST SET')
    
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_test_data_stand, 100 * num_test_data_stand / len(test_reference_df)))
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_test_data_sit, 100 * num_test_data_sit / len(test_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_test_data_walk, 100 * num_test_data_walk / len(test_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_test_data_bike, 100 * num_test_data_bike / len(test_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_test_data_stairs_up, 100 * num_test_data_stairs_up / len(test_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_test_data_stairs_down, 100 * num_test_data_stairs_down / len(test_reference_df)))
    print('')
    print('\tPercentage of total\t{} ({:.2f}%)'.format(num_test_data, 100 * num_test_data/ total_df_data))


print_dataset_statistics(train_reference_df, test_reference_df)


TRAIN SET
	Stand:		2845 (16.34%)
	Sit:		3669 (21.08%)
	Walk:		3923 (22.53%)
	Bike:		2341 (13.45%)
	Stairs up:	2589 (14.87%)
	Stairs down:	2042 (11.73%)

	Percentage of total	17409 (77.07%)

TEST SET
	Stand:		859 (16.58%)
	Sit:		961 (18.55%)
	Walk:		1086 (20.97%)
	Bike:		815 (15.73%)
	Stairs up:	832 (16.06%)
	Stairs down:	627 (12.10%)

	Percentage of total	5180 (22.93%)


In [5]:
print(train_reference_df)

              0    1         2         3         4         5         6  \
2574  -1.000000 -1.0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000   
2575  -1.000000 -1.0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000   
2576  -1.000000 -1.0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000   
2577  -1.000000 -1.0 -1.000000 -1.000000 -2.000000 -1.304657 -0.642802   
2578  -1.000000 -1.0 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000   
...         ...  ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.0 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.0 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.0  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.0  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.0 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

              7         8         9  ...  748  749  user     model  gt_bike  \
2574  -1.000000 -1.000000 -1.000

In [7]:
def extract_basic_features(acc_x, acc_y, acc_z):
    # prova = np.array(np.apply_along_axis(np.histogram, 1, acc_x)[0]).reshape(2,1)

    np_acc_x = np.array(acc_x)
    np_acc_y = np.array(acc_y)
    np_acc_z = np.array(acc_z)

    mean_x = np.expand_dims(np.mean(np_acc_x, axis=1), axis=0).T 
    mean_y = np.expand_dims(np.mean(np_acc_y, axis=1), axis=0).T 
    mean_z = np.expand_dims(np.mean(np_acc_z, axis=1), axis=0).T 

    basic_features = np.concatenate( (
        # insert MEANS 
        mean_x,
        mean_y,
        mean_z,

        # insert STD
        np.expand_dims(np.std(np_acc_x, axis=1), axis=0).T, 
        np.expand_dims(np.std(np_acc_y, axis=1), axis=0).T, 
        np.expand_dims(np.std(np_acc_z, axis=1), axis=0).T, 

        # insert sum of thew absolute values
        np.expand_dims(np.mean(abs(np_acc_x - mean_x), axis=1), axis=1),
        np.expand_dims(np.mean(abs(np_acc_y - mean_y), axis=1), axis=1),
        np.expand_dims(np.mean(abs(np_acc_z - mean_z), axis=1), axis=1),

        np.expand_dims(np.mean( np.sqrt( np.power(np_acc_x, 2) + np.power(np_acc_y,2) + np.power(np_acc_z, 2) ), axis=1), axis=0).T
        
    ), axis=1).tolist()

    for i in range(0, len(acc_x)):
        bins_x, centers_x = np.histogram(acc_x[i], bins=10)
        bins_y, centers_y = np.histogram(acc_y[i], bins=10)
        bins_z, centers_z = np.histogram(acc_z[i], bins=10)

        basic_features[i].extend(bins_x / len(acc_x))
        basic_features[i].extend(bins_y / len(acc_y))
        basic_features[i].extend(bins_z / len(acc_z))

    return basic_features

prova_feat = np.array(extract_basic_features( [[1,10,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]] , [[1,3,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]], [[1,3,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]]))

print(prova_feat)

[[ 6.3         5.6         5.6         2.9         2.76405499  2.76405499
   2.5         2.4         2.4        10.26614733  0.5         0.
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         1.          0.5         0.          1.          0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.          1.          0.5         0.5         0.5
   0.5         0.5         0.5         0.5       ]
 [ 5.5         5.5         5.5         2.87228132  2.87228132  2.87228132
   2.5         2.5         2.5         9.52627944  0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5       ]]


In [10]:
def create_dataset(reference_df, batch_size, shuffle, cache_file, center_data=False):
    target = reference_df[['gt_sit','gt_stand','gt_walk','gt_bike','gt_stairsup','gt_stairsdown']].values.astype(int).tolist()

    # RESHAPING DATAS
    np_data = np.array(reference_df.iloc[:,0:750])

    np_reshaped_data = np.reshape(np_data.copy(), (np_data.shape[0], 6, 125))

    # Data centering
    if center_data:
        for i in range(len(np_reshaped_data)):
            window = np_reshaped_data[i]
            means = np.mean(window, axis=1)
            centered_acc = np.array(([window[j] - means[j] for j in range(3)]))
            np_reshaped_data[i] = np.concatenate((centered_acc, window[3:]), axis=0)
             
    
    # Extract manual features
    np_basic_features = np.array(extract_basic_features(np_data[:, 0:125], np_data[:, 125:250], np_data[:, 250: 375]))

    # Create dataset obj
    dataset = tf.data.Dataset.from_tensor_slices( ({"input_1": np_reshaped_data, "input_2": np_basic_features}, target) )

    # Cache dataset
    if cache_file:
        dataset = dataset.cache(cache_file)

    # Shuffle
    if shuffle:
        dataset = dataset.shuffle(len(target))

    # Repeat the dataset indefinitely
    dataset = dataset.repeat()

    # Batch
    dataset = dataset.batch(batch_size=batch_size)

    # Prefetch
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

batch_size = 128

training_dataset = create_dataset(train_reference_df, batch_size=batch_size, shuffle=True, cache_file=None)
val_dataset = create_dataset(test_reference_df, batch_size=batch_size, shuffle=True, cache_file=None)

for train, targ in training_dataset.take(1):
  print ('Features: {}, Target: {}'.format(train, targ))

train_steps = int(np.ceil(len(train_reference_df)/batch_size))
val_steps = int(np.ceil(len(test_reference_df)/batch_size))

Features: {'input_1': <tf.Tensor: shape=(128, 6, 125), dtype=float64, numpy=
array([[[-5.        , -5.        , -4.        , ..., -7.        ,
         -7.        , -7.        ],
        [ 0.        , -0.01291504, -1.        , ...,  0.        ,
         -0.82843572, -0.18791504],
        [ 4.        ,  4.        ,  4.01247337, ...,  9.08952637,
          7.17156428,  5.18791504],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.02744141, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]],

       [[-2.        , -2.        , -2.        , ..., -1.        ,
         -0.54345703, -0.8388916 ],
        [ 0.        ,  0.        , -1.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 7.        ,  7.        ,  7.        , ...,  7.        ,
          7.        ,  7.        ],
   

In [11]:
def build_model(input_shape):

    l2_reg = 5e-4

    encoder = tf.keras.models.load_model('encoder.h5')

    # NOT TRAIN THE MODEL
    encoder.trainable = False

    # Define the input placeholder as a tensor with shape input_shape. Think of this as your input image!
    training_input = tf.keras.Input(shape=input_shape, dtype=tf.float32, name='input_1')
    basic_feat_input = tf.keras.Input(shape=40, dtype=tf.float32, name='input_2')

    CNN = tf.keras.layers.Conv1D(196, 16, activation='relu', padding='same')(training_input)
    CNN = tf.keras.layers.MaxPool1D(4, padding='same')(CNN)
    
    feautures_CCN = tf.keras.layers.Flatten()(CNN)
    
    featuers_encoder = encoder(training_input)

    features = tf.concat((feautures_CCN, basic_feat_input), 1) 

    #features = tf.concat((feautures_CCN), 1) 

    FFNN = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(l2_reg), activity_regularizer=tf.keras.regularizers.L2(l2_reg))(features)
    FFNN = tf.keras.layers.Dropout(0.05)(FFNN)
    model_output = tf.keras.layers.Dense(6, activation='softmax')(FFNN)

    model = tf.keras.Model(inputs = [training_input, basic_feat_input], outputs = model_output, name='OurModel')

    return model

model = build_model((6,125))

adam_optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)
loss_funct = tf.keras.losses.CategoricalCrossentropy()

model.compile(optimizer = adam_optimizer, loss = loss_funct, metrics = ["accuracy"])
print(model.summary())


Model: "OurModel"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 6, 125)]     0                                            
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 6, 196)       392196      input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D)  (None, 2, 196)       0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 392)          0           max_pooling1d_1[0][0]            
___________________________________________________________________________________________

In [12]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='./models/checkpoint', save_weights_only=True, monitor='val_accuracy', mode='max', save_best_only=True)

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

model.fit(training_dataset, epochs = 100, steps_per_epoch=train_steps, validation_data=val_dataset, validation_steps=val_steps,  callbacks = [early_stopping_callback, model_checkpoint_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<tensorflow.python.keras.callbacks.History at 0x7f2c7c6b2e50>

# K-FOLD CROSS VALIDATION

In [21]:
from sklearn.metrics import classification_report

user_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
batch_size = 128
checkpoint_filepath = './models/checkpoint'

models_accuracy = []
models_precision = []
models_recall = []

for user_to_exclude in user_list:

    train_dataset_df = dataset_df.loc[(dataset_df['user'] != user_to_exclude)] 
    train_reference_df = pd.get_dummies(train_dataset_df, columns=['gt'])

    test_dataset_df = dataset_df.loc[(dataset_df['user'] == user_to_exclude)] 
    test_reference_df = pd.get_dummies(test_dataset_df, columns=['gt'])

    training_dataset = create_dataset(train_reference_df, batch_size=batch_size, shuffle=True, cache_file=None)
    val_dataset = create_dataset(test_reference_df, batch_size=batch_size, shuffle=True, cache_file=None)

    train_steps = int(np.ceil(len(train_reference_df)/batch_size))
    val_steps = int(np.ceil(len(test_reference_df)/batch_size))

    model = build_model((6,125))

    adam_optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)
    loss_funct = tf.keras.losses.CategoricalCrossentropy()

    model.compile(optimizer = adam_optimizer, loss = loss_funct, metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()],
    )

    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, monitor='val_categorical_accuracy', mode='max', save_best_only=True)
    early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

    model.fit(training_dataset, epochs = 100, steps_per_epoch=train_steps, validation_data=val_dataset, validation_steps=val_steps,  callbacks = [early_stopping_callback, model_checkpoint_callback])

    # L0AD BEST MODEL
    # The model weights (that are considered the best) are loaded into the model.
    model.load_weights(checkpoint_filepath)

    metrics = model.evaluate(val_dataset, batch_size=batch_size, steps=val_steps)

    loss, accuracy, precision, recall = metrics
    print('Accuracy: ' + str(accuracy))
    print('Precision: ' + str(precision))
    print('Recall: ' + str(recall))

    models_accuracy.append(accuracy)
    models_precision.append(precision)
    models_recall.append(recall)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Accuracy: 0.6391369104385376
Precision: 0.7130637764930725
Recall: 0.3411458432674408
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Accuracy: 0.9356398582458496
Precision: 0.9366381168365479
Recall: 0.9348958134651184
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/1

In [4]:
import numpy as np

print(f'Accuracy:\tMean={np.mean(models_accuracy)}\tstd={np.std(models_accuracy)}')

NameError: name 'models_accuracy' is not defined