## Deep learning with Keras: Chest accelerometer data preparation

Dataset from https://archive.ics.uci.edu/ml/datasets/Activity+Recognition+from+Single+Chest-Mounted+Accelerometer

In [None]:
import numpy as np

# Files from the download are in a directory called "activity" saved in the same place as this notebook
import glob
data_files = glob.glob("activity/*.csv")

# Initialise array for data:
#   Rows will be observations
#   Columns will be:
#   * Time point (sequential count integer)
#   * x-, y-, z-directional accelerometer data time series (integer)
#   * Activity label (1-7)
#   * Person label (0-14)
dataset = np.empty((0, 6), dtype = "float64")

# Add data from each file in turn
for i in range(len(data_files)):
    
    print("Reading file", i+1, "/", len(data_files))
    
    f = data_files[i]
    data = np.genfromtxt(f, delimiter=',')
    
    # Add a column with a label representing the person
    # (this doesn't necessarily line up with the file number)
    augmented = np.column_stack(
        (data, np.array([i]*data.shape[0]))
    )
    dataset = np.vstack((dataset, augmented))

In [None]:
# Check how many observations we have
dataset.shape

In [None]:
# Reshape data into 3 dimensions:
#   0-dimension ("rows") is observations (1926896 in total)
#   1-dimension ("columns") is time series values (260 = 5{seconds}*52{Hz} in total)
#   2-dimension ("leaves") are as follows (5 in total):
#     * 3 directions (x-, y-, z-acceleration)
#     * Activity type labels
#     * Person labels

# We'll chop the time series into 260-length (5 second) sections every 52 points (every 1 second)
t = int((dataset.shape[0]-208) / 52)
chopped = -np.ones((t, 260, 5), dtype="float64")

for k in range(0, t):
    start, stop = (52*k, 52*k + 260)
    # If the count column's value at "stop" is smaller than at "start", we've changed person, so discard
    # If the activity label column is not all the same, we have more than one activity in that section, so discard
    if (dataset[stop, 0] < dataset[start, 0] or not all(dataset[start:stop, 4] == dataset[start, 4])):
        continue
    # Else copy all but count column to the new data block
    chopped[k, :, :] = dataset[start:stop, 1:6]

# Remove the extra rows, which will have person label -1
chopped = chopped[(chopped[:, 0, 4] != -1), :, :]

In [None]:
# Check shape again
chopped.shape

In [None]:
# "Walking" corresponds to activity label 4
walking = chopped[(chopped[:, 0, 3] == 4), :, :]

# Scale each time series individually, because recorded data is not necessarily calibrated
from sklearn.preprocessing import scale
walking_data = np.apply_along_axis(scale, 1, walking[:, :, 0:3])

# The person label is in layer 4, and it's the same in all columns so we just get it from column 0
walking_labels = walking[:, 0, 4]

In [None]:
# Check shapes
walking_data.shape
walking_labels.shape

In [None]:
# Save this information in .npy files
np.save(arr=walking_data, file="walking_data.npy")
np.save(arr=walking_labels, file="walking_labels.npy")