In [1]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data_df = pd.read_csv('./datasets/dataset_50_2.5.csv', header=None)

print(data_df)

            0         1         2         3         4         5         6    \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

            7         8         9    ...  740  741 

In [2]:
label_df = pd.read_csv('./datasets/dataset_labels_50_2.5.csv')
print(label_df)

      user    device     gt
0        a  nexus4_1  stand
1        a  nexus4_1  stand
2        a  nexus4_1  stand
3        a  nexus4_1  stand
4        a  nexus4_1  stand
...    ...       ...    ...
22584    i  s3mini_2   walk
22585    i  s3mini_2   walk
22586    i  s3mini_2   walk
22587    i  s3mini_2   walk
22588    i  s3mini_2   walk

[22589 rows x 3 columns]


In [3]:
dataset_df = pd.concat([data_df,label_df], axis=1)
print(dataset_df)

              0         1         2         3         4         5         6  \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

              7         8         9  ...  743  744 

In [4]:
dataset_one_hot_df = pd.get_dummies(dataset_df, columns=['gt'])
print(dataset_one_hot_df)

              0         1         2         3         4         5         6  \
0     -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
1     -5.056885 -6.000000 -6.000000 -6.000000 -6.000000 -5.172044 -6.000000   
2     -5.908984 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000 -6.000000   
3     -6.000000 -6.000000 -5.283560 -5.862354 -6.000000 -6.000000 -5.063055   
4     -5.982080 -5.695964 -6.000000 -6.000000 -6.000000 -5.510071 -6.000000   
...         ...       ...       ...       ...       ...       ...       ...   
22584 -2.313599 -2.000000 -2.000000 -2.000000 -2.000000 -1.000000 -1.000000   
22585 -1.493705 -1.000000 -1.000000  0.000000  0.000000  0.000000  0.000000   
22586  0.000000  0.000000  0.000000  0.000000 -1.000000 -1.000000  0.000000   
22587  0.000000  0.000000  0.000000  0.720006  0.707711 -0.721697 -4.587960   
22588 -4.646136 -5.000000 -4.175241 -3.000000 -3.000000 -3.000000 -2.171858   

              7         8         9  ...  748  749 

## SPLITTING DATASET IN TRAIN AND TEST SET

In [5]:
# Keep 20% of the data out for validation
### START CODE HERE ### (1 line)
train_reference_df, val_reference_df = train_test_split(dataset_one_hot_df, test_size=0.2, shuffle=True, random_state=123)
### END CODE HERE ###


def print_dataset_statistics(train_reference_df, val_reference_df):

    # Count the elements in the sets
    num_train_data_sit = sum(train_reference_df['gt_sit'] == 1)
    num_train_data_stand = sum(train_reference_df['gt_stand'] == 1)
    num_train_data_walk = sum(train_reference_df['gt_walk'] == 1)
    num_train_data_bike = sum(train_reference_df['gt_bike'] == 1)
    num_train_data_stairs_up = sum(train_reference_df['gt_stairsup'] == 1)
    num_train_data_stairs_down = sum(train_reference_df['gt_stairsdown'] == 1)

    num_val_data_sit = sum(val_reference_df['gt_sit'] == 1)
    num_val_data_stand = sum(val_reference_df['gt_stand'] == 1)
    num_val_data_walk = sum(val_reference_df['gt_walk'] == 1)
    num_val_data_bike = sum(val_reference_df['gt_bike'] == 1)
    num_val_data_stairs_up = sum(val_reference_df['gt_stairsup'] == 1)
    num_val_data_stairs_down = sum(val_reference_df['gt_stairsdown'] == 1)



    print('TRAIN SET')
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_train_data_sit, 100 * num_train_data_sit / len(train_reference_df)))
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_train_data_stand, 100 * num_train_data_stand / len(train_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_train_data_walk, 100 * num_train_data_walk / len(train_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_train_data_bike, 100 * num_train_data_bike / len(train_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_train_data_stairs_up, 100 * num_train_data_stairs_up / len(train_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_train_data_stairs_down, 100 * num_train_data_stairs_down / len(train_reference_df)))


    print('VALIDATION SET')
    print('\tSit:\t\t{} ({:.2f}%)'.format(num_val_data_sit, 100 * num_val_data_sit / len(val_reference_df)))
    print('\tStand:\t\t{} ({:.2f}%)'.format(num_val_data_stand, 100 * num_val_data_stand / len(val_reference_df)))
    print('\tWalk:\t\t{} ({:.2f}%)'.format(num_val_data_walk, 100 * num_val_data_walk / len(val_reference_df)))
    print('\tBike:\t\t{} ({:.2f}%)'.format(num_val_data_bike, 100 * num_val_data_bike / len(val_reference_df)))
    print('\tStairs up:\t{} ({:.2f}%)'.format(num_val_data_stairs_up, 100 * num_val_data_stairs_up / len(val_reference_df)))
    print('\tStairs down:\t{} ({:.2f}%)'.format(num_val_data_stairs_down, 100 * num_val_data_stairs_down / len(val_reference_df)))


print_dataset_statistics(train_reference_df, val_reference_df)


TRAIN SET
	Sit:		3694 (20.44%)
	Stand:		2968 (16.42%)
	Walk:		3991 (22.09%)
	Bike:		2533 (14.02%)
	Stairs up:	2735 (15.13%)
	Stairs down:	2150 (11.90%)
VALIDATION SET
	Sit:		936 (20.72%)
	Stand:		736 (16.29%)
	Walk:		1018 (22.53%)
	Bike:		623 (13.79%)
	Stairs up:	686 (15.18%)
	Stairs down:	519 (11.49%)


In [6]:
print(train_reference_df)

         0         1    2        3         4    5         6         7  \
6785  -3.0 -3.000000 -3.0 -3.00000 -2.091919 -2.0 -2.829190 -3.000000   
20108  5.0  5.000000  5.0  5.00000  5.000000  5.0  5.000000  5.000000   
17854 -5.0 -4.733199 -4.0 -4.00000 -4.000000 -3.0 -3.000000 -3.214111   
16603  4.0  4.000000  4.0  4.00000  4.000000  4.0  4.000000  4.000000   
14823 -1.0 -1.000000 -1.0 -1.00000 -1.000000 -1.0 -1.153836 -2.000000   
...    ...       ...  ...      ...       ...  ...       ...       ...   
15377 -1.0 -1.000000 -1.0 -1.00000 -1.000000 -1.0 -1.000000 -1.000000   
21602  5.0  5.000000  5.0  5.00000  5.000000  5.0  5.000000  5.000000   
17730 -5.0 -5.000000 -5.0 -4.45692 -4.000000 -3.0 -3.998675 -4.000000   
15725 -1.0 -1.000000 -1.0 -2.00000 -5.176727 -6.0 -7.000000 -5.000000   
19966  0.0  0.000000  0.0  0.00000  0.000000  0.0  0.000000  0.000000   

              8         9  ...  748  749  user    device  gt_bike  gt_sit  \
6785  -4.000000 -4.818164  ...  0.0  0.0     c

In [7]:
def create_dataset(reference_df, batch_size, shuffle, cache_file):
    target = reference_df[['gt_sit','gt_stand','gt_walk','gt_bike','gt_stairsup','gt_stairsdown']].values.astype(int).tolist()

    # RESHAPING DATAS
    np_data = np.array(reference_df.iloc[:,0:750])
    np_reshaped_data =np.reshape(np_data, (np_data.shape[0], 6, 125))

    dataset = tf.data.Dataset.from_tensor_slices((np_reshaped_data, target))

    # Cache dataset
    if cache_file:
        dataset = dataset.cache(cache_file)

    # Shuffle
    if shuffle:
        dataset = dataset.shuffle(len(target))

    # Repeat the dataset indefinitely
    dataset = dataset.repeat()

    # Batch
    dataset = dataset.batch(batch_size=batch_size)

    # Prefetch
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

batch_size = 128

training_dataset = create_dataset(train_reference_df, batch_size=batch_size, shuffle=False, cache_file=None)
val_dataset = create_dataset(val_reference_df, batch_size=batch_size, shuffle=False, cache_file=None)

for feat, targ in training_dataset.take(1):
  print ('Features: {}, Target: {}'.format(feat, targ))

train_steps = int(np.ceil(len(train_reference_df)/batch_size))
val_steps = int(np.ceil(len(val_reference_df)/batch_size))

Features: [[[-3.         -3.         -3.         ... -5.         -5.
   -4.        ]
  [ 0.          0.          0.         ... -0.28190104  0.
    0.        ]
  [ 5.          5.          5.90402832 ...  6.          6.64831543
    7.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[ 5.          5.          5.         ...  5.          5.
    5.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 8.          8.          8.         ...  8.          8.
    8.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]
  [ 0.          0.          0.         ...  0.          0.
    0.        ]]

 [[-5.         -4.73319869 -4.         ... -3.         -3.
   -3.        ]
  [

In [10]:
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(6,125)))
model.add(tf.keras.layers.Conv1D(196, 16, activation='relu', padding='same'))
model.add(tf.keras.layers.MaxPool1D(4, padding='same'))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1024, activation='relu'))
model.add(tf.keras.layers.Dropout(0.05))
model.add(tf.keras.layers.Dense(6, activation='softmax'))

adam_optimizer = tf.keras.optimizers.Adam()
loss_funct = tf.keras.losses.CategoricalCrossentropy()

model.compile(optimizer = adam_optimizer, loss = loss_funct, metrics = ["accuracy"])
print(model.summary())

model.fit(training_dataset, epochs = 20, steps_per_epoch=train_steps, validation_data=val_dataset, validation_steps=val_steps)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 6, 196)            392196    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 2, 196)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 392)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              402432    
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 6150      
Total params: 800,778
Trainable params: 800,778
Non-trainable params: 0
________________________________________________

<tensorflow.python.keras.callbacks.History at 0x7f91a84f63a0>

In [11]:
model.save('./models/model.h5')