In [1]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data_df = pd.read_csv('./datasets/heterogenity/original/dataset_50_2.5.csv', header=None)

print(data_df)

            0         1         2         3         4         5         6    \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

            7         8         9    ...  740  741 

In [2]:
label_df = pd.read_csv('./datasets/heterogenity/original/dataset_labels_50_2.5.csv')
print(label_df)

      user    device     gt
0        a  nexus4_1  stand
1        a  nexus4_1  stand
2        a  nexus4_1  stand
3        a  nexus4_1  stand
4        a  nexus4_1  stand
...    ...       ...    ...
22584    i  s3mini_2   walk
22585    i  s3mini_2   walk
22586    i  s3mini_2   walk
22587    i  s3mini_2   walk
22588    i  s3mini_2   walk

[22589 rows x 3 columns]


In [3]:
dataset_df = pd.concat([data_df,label_df], axis=1)
print(dataset_df)

              0         1         2         3         4         5         6  \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

              7         8         9  ...  743  744 

In [4]:
dataset_one_hot_df = pd.get_dummies(dataset_df, columns=['gt'])
print(dataset_one_hot_df)

              0         1         2         3         4         5         6  \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

              7         8         9  ...  748  749 

## SPLITTING DATASET IN TRAIN AND TEST SET

In [5]:
# Keep 20% of the data out for validation
### START CODE HERE ### (1 line)
train_reference_df, val_reference_df = train_test_split(dataset_one_hot_df, test_size=0.2, shuffle=True, random_state=123)
### END CODE HERE ###


def print_dataset_statistics(train_reference_df, val_reference_df):

    # Count the elements in the sets
    num_train_data_sit = sum(train_reference_df['gt_sit'] == 1)
    num_train_data_stand = sum(train_reference_df['gt_stand'] == 1)
    num_train_data_walk = sum(train_reference_df['gt_walk'] == 1)
    num_train_data_bike = sum(train_reference_df['gt_bike'] == 1)
    num_train_data_stairs_up = sum(train_reference_df['gt_stairsup'] == 1)
    num_train_data_stairs_down = sum(train_reference_df['gt_stairsdown'] == 1)

    num_val_data_sit = sum(val_reference_df['gt_sit'] == 1)
    num_val_data_stand = sum(val_reference_df['gt_stand'] == 1)
    num_val_data_walk = sum(val_reference_df['gt_walk'] == 1)
    num_val_data_bike = sum(val_reference_df['gt_bike'] == 1)
    num_val_data_stairs_up = sum(val_reference_df['gt_stairsup'] == 1)
    num_val_data_stairs_down = sum(val_reference_df['gt_stairsdown'] == 1)



    print('TRAIN SET')
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_train_data_sit, 100 * num_train_data_sit / len(train_reference_df)))
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_train_data_stand, 100 * num_train_data_stand / len(train_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_train_data_walk, 100 * num_train_data_walk / len(train_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_train_data_bike, 100 * num_train_data_bike / len(train_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_train_data_stairs_up, 100 * num_train_data_stairs_up / len(train_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_train_data_stairs_down, 100 * num_train_data_stairs_down / len(train_reference_df)))


    print('VALIDATION SET')
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_val_data_sit, 100 * num_val_data_sit / len(val_reference_df)))
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_val_data_stand, 100 * num_val_data_stand / len(val_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_val_data_walk, 100 * num_val_data_walk / len(val_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_val_data_bike, 100 * num_val_data_bike / len(val_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_val_data_stairs_up, 100 * num_val_data_stairs_up / len(val_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_val_data_stairs_down, 100 * num_val_data_stairs_down / len(val_reference_df)))


print_dataset_statistics(train_reference_df, val_reference_df)


TRAIN SET
	Sit:		3694 (20.44%)
	Stand:		2968 (16.42%)
	Walk:		3991 (22.09%)
	Bike:		2533 (14.02%)
	Stairs up:	2735 (15.13%)
	Stairs down:	2150 (11.90%)
VALIDATION SET
	Sit:		936 (20.72%)
	Stand:		736 (16.29%)
	Walk:		1018 (22.53%)
	Bike:		623 (13.79%)
	Stairs up:	686 (15.18%)
	Stairs down:	519 (11.49%)


In [6]:
print(train_reference_df)

         0         1    2        3         4    5         6         7  \
6785  -3.0 -3.000000 -3.0 -3.00000 -2.091919 -2.0 -2.829190 -3.000000   
20108  5.0  5.000000  5.0  5.00000  5.000000  5.0  5.000000  5.000000   
17854 -5.0 -4.733199 -4.0 -4.00000 -4.000000 -3.0 -3.000000 -3.214111   
16603  4.0  4.000000  4.0  4.00000  4.000000  4.0  4.000000  4.000000   
14823 -1.0 -1.000000 -1.0 -1.00000 -1.000000 -1.0 -1.153836 -2.000000   
...    ...       ...  ...      ...       ...  ...       ...       ...   
15377 -1.0 -1.000000 -1.0 -1.00000 -1.000000 -1.0 -1.000000 -1.000000   
21602  5.0  5.000000  5.0  5.00000  5.000000  5.0  5.000000  5.000000   
17730 -5.0 -5.000000 -5.0 -4.45692 -4.000000 -3.0 -3.998675 -4.000000   
15725 -1.0 -1.000000 -1.0 -2.00000 -5.176727 -6.0 -7.000000 -5.000000   
19966  0.0  0.000000  0.0  0.00000  0.000000  0.0  0.000000  0.000000   

              8         9  ...  748  749  user    device  gt_bike  gt_sit  \
6785  -4.000000 -4.818164  ...  0.0  0.0     c

In [44]:

def extract_basic_features(acc_x, acc_y, acc_z):
    # prova = np.array(np.apply_along_axis(np.histogram, 1, acc_x)[0]).reshape(2,1)

    np_acc_x = np.array(acc_x)
    np_acc_y = np.array(acc_y)
    np_acc_z = np.array(acc_z)

    mean_x = np.expand_dims(np.mean(np_acc_x, axis=1), axis=0).T 
    mean_y = np.expand_dims(np.mean(np_acc_y, axis=1), axis=0).T 
    mean_z = np.expand_dims(np.mean(np_acc_z, axis=1), axis=0).T 

    basic_features = np.concatenate( (
        # insert MEANS 
        mean_x,
        mean_y,
        mean_z,

        # insert STD
        np.expand_dims(np.std(np_acc_x, axis=1), axis=0).T, 
        np.expand_dims(np.std(np_acc_y, axis=1), axis=0).T, 
        np.expand_dims(np.std(np_acc_z, axis=1), axis=0).T, 

        # insert sum of thew absolute values
        np.expand_dims(np.mean(abs(np_acc_x - mean_x), axis=1), axis=1),
        np.expand_dims(np.mean(abs(np_acc_y - mean_y), axis=1), axis=1),
        np.expand_dims(np.mean(abs(np_acc_z - mean_z), axis=1), axis=1),

        np.expand_dims(np.mean( np.sqrt( np.power(np_acc_x, 2) + np.power(np_acc_y,2) + np.power(np_acc_z, 2) ), axis=1), axis=0).T
        
    ), axis=1).tolist()

    for i in range(0, len(acc_x)):
        bins_x, centers_x = np.histogram(acc_x[i], bins=10)
        bins_y, centers_y = np.histogram(acc_y[i], bins=10)
        bins_z, centers_z = np.histogram(acc_z[i], bins=10)

        basic_features[i].extend(bins_x / len(acc_x))
        basic_features[i].extend(bins_y / len(acc_y))
        basic_features[i].extend(bins_z / len(acc_z))

    return basic_features

prova_feat = np.array(extract_basic_features( [[1,10,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]] , [[1,3,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]], [[1,3,3,4,5,6,7,8,9,10], [1,2,3,4,5,6,7,8,9,10]]))

print(prova_feat)

[[ 6.3         5.6         5.6         2.9         2.76405499  2.76405499
   2.5         2.4         2.4        10.26614733  0.5         0.
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         1.          0.5         0.          1.          0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.          1.          0.5         0.5         0.5
   0.5         0.5         0.5         0.5       ]
 [ 5.5         5.5         5.5         2.87228132  2.87228132  2.87228132
   2.5         2.5         2.5         9.52627944  0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5         0.5         0.5
   0.5         0.5         0.5         0.5       ]]


In [45]:
def create_dataset(reference_df, batch_size, shuffle, cache_file):
    target = reference_df[['gt_sit','gt_stand','gt_walk','gt_bike','gt_stairsup','gt_stairsdown']].values.astype(int).tolist()

    # RESHAPING DATAS
    np_data = np.array(reference_df.iloc[:,0:750])
    np_reshaped_data = np.reshape(np_data, (np_data.shape[0], 6, 125))

    np_basic_features = np.array(extract_basic_features(np_data[:, 0:125], np_data[:, 125:250], np_data[:, 250: 375]))

    dataset = tf.data.Dataset.from_tensor_slices( ({"input_1": np_reshaped_data, "input_2": np_basic_features}, target) )

    # Cache dataset
    if cache_file:
        dataset = dataset.cache(cache_file)

    # Shuffle
    if shuffle:
        dataset = dataset.shuffle(len(target))

    # Repeat the dataset indefinitely
    dataset = dataset.repeat()

    # Batch
    dataset = dataset.batch(batch_size=batch_size)

    # Prefetch
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

batch_size = 128

training_dataset = create_dataset(train_reference_df, batch_size=batch_size, shuffle=False, cache_file=None)
val_dataset = create_dataset(val_reference_df, batch_size=batch_size, shuffle=False, cache_file=None)

for train, targ in training_dataset.take(1):
  print ('Features: {}, Target: {}'.format(train, targ))

train_steps = int(np.ceil(len(train_reference_df)/batch_size))
val_steps = int(np.ceil(len(val_reference_df)/batch_size))

Features: {'input_1': <tf.Tensor: shape=(128, 6, 125), dtype=float64, numpy=
array([[[-3.        , -3.        , -3.        , ..., -5.        ,
         -5.        , -4.        ],
        [ 0.        ,  0.        ,  0.        , ..., -0.28190104,
          0.        ,  0.        ],
        [ 5.        ,  5.        ,  5.90402832, ...,  6.        ,
          6.64831543,  7.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]],

       [[ 5.        ,  5.        ,  5.        , ...,  5.        ,
          5.        ,  5.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 8.        ,  8.        ,  8.        , ...,  8.        ,
          8.        ,  8.        ],
   

In [46]:
def build_model(input_shape):

    l2_reg = 5e-4

    encoder = tf.keras.models.load_model('encoder.h5')

    # NOT TRAIN THE MODEL
    encoder.trainable = False

    # Define the input placeholder as a tensor with shape input_shape. Think of this as your input image!
    training_input = tf.keras.Input(shape=input_shape, dtype=tf.float32, name='input_1')
    basic_feat_input = tf.keras.Input(shape=40, dtype=tf.float32, name='input_2')

    CNN = tf.keras.layers.Conv1D(196, 16, activation='relu', padding='same')(training_input)
    CNN = tf.keras.layers.MaxPool1D(4, padding='same')(CNN)
    
    feautures_CCN = tf.keras.layers.Flatten()(CNN)
    
    featuers_encoder = encoder(training_input)

    features = tf.concat((feautures_CCN, basic_feat_input), 1) 

    #features = tf.concat((feautures_CCN), 1) 

    FFNN = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(l2_reg), activity_regularizer=tf.keras.regularizers.L2(l2_reg))(features)
    FFNN = tf.keras.layers.Dropout(0.05)(FFNN)
    model_output = tf.keras.layers.Dense(6, activation='softmax')(FFNN)

    model = tf.keras.Model(inputs = [training_input, basic_feat_input], outputs = model_output, name='OurModel')

    return model

model = build_model((6,125))

adam_optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)
loss_funct = tf.keras.losses.CategoricalCrossentropy()

model.compile(optimizer = adam_optimizer, loss = loss_funct, metrics = ["accuracy"])
print(model.summary())


Model: "OurModel"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 6, 125)]     0                                            
__________________________________________________________________________________________________
conv1d_8 (Conv1D)               (None, 6, 196)       392196      input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling1d_8 (MaxPooling1D)  (None, 2, 196)       0           conv1d_8[0][0]                   
__________________________________________________________________________________________________
flatten_8 (Flatten)             (None, 392)          0           max_pooling1d_8[0][0]            
___________________________________________________________________________________________

In [47]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

model.fit(training_dataset, epochs = 100, steps_per_epoch=train_steps, validation_data=val_dataset, validation_steps=val_steps,  callbacks = [early_stopping_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


<tensorflow.python.keras.callbacks.History at 0x7f3408349d00>

In [11]:
model.save('./models/model.h5')