# Initialization

In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
from scipy.interpolate import CubicSpline

# Load Data

In [2]:
# load data without header
data = pd.read_csv('./ISWC21_data_plus_raw/rwhar_data.csv', header=None)
# add header
data.columns = ['subject', 'x', 'y', 'z', 'activity']
data.head()

Unnamed: 0,subject,x,y,z,activity
0,0,-9.57434,-2.02733,1.34506,climbing_up
1,0,-9.56479,-1.99597,1.39345,climbing_up
2,0,-9.55122,-1.98445,1.41139,climbing_up
3,0,-9.51335,-1.97557,1.42615,climbing_up
4,0,-9.52959,-1.98187,1.45395,climbing_up


In [3]:
# describe data
data.describe()

Unnamed: 0,subject,x,y,z
count,3200803.0,3200803.0,3200803.0,3200803.0
mean,7.011755,-5.071222,-4.468575,1.885688
std,4.261132,6.336688,4.661046,4.155098
min,0.0,-19.5774,-19.9123,-19.5788
25%,3.0,-9.39427,-6.59929,-0.2520825
50%,7.0,-6.81262,-3.48132,1.44136
75%,11.0,-0.4284745,-2.02736,4.16675
max,14.0,19.422,19.0871,19.4206


In [4]:
# check for missing values
data.isnull().sum()

subject     0
x           0
y           0
z           0
activity    0
dtype: int64

In [5]:
data.shape

(3200803, 5)

# Windowing Data

In [6]:
# function for sliding window

def sliding_window_samples(data, samples_per_window, overlap_ratio):
    """
    Return a sliding window measured in number of samples over a data array.

    :param data: input array, can be numpy or pandas dataframe
    :param samples_per_window: window length as number of samples
    :param overlap_ratio: overlap is meant as percentage and should be an integer value
    :return: tuple of windows and indices
    """
    windows = []
    indices = []
    curr = 0
    win_len = int(samples_per_window)
    if overlap_ratio is not None:
        overlapping_elements = int((overlap_ratio / 100) * (win_len))
        if overlapping_elements >= win_len:
            print('Number of overlapping elements exceeds window size.')
            return
    while curr < len(data) - win_len:
        windows.append(data[curr:curr + win_len])
        indices.append([curr, curr + win_len])
        curr = curr + win_len - overlapping_elements
    try:
        result_windows = np.array(windows)
        result_indices = np.array(indices)
    except:
        result_windows = np.empty(shape=(len(windows), win_len, data.shape[1]), dtype=object)
        result_indices = np.array(indices)
        for i in range(0, len(windows)):
            result_windows[i] = windows[i]
            result_indices[i] = indices[i]
    return result_windows, result_indices

In [7]:
sampling_rate = 50
time_window = 2
window_size = sampling_rate * time_window
overlap_ratio = 0

window_data, _ = sliding_window_samples(data, window_size, overlap_ratio)
print(f"shape of window dataset (2 sec with 0% overlap): {window_data.shape}")

shape of window dataset (2 sec with 0% overlap): (32008, 100, 5)


In [8]:
window_data[0]

array([[0, -9.57434, -2.02733, 1.34506, 'climbing_up'],
       [0, -9.56479, -1.99597, 1.39345, 'climbing_up'],
       [0, -9.55122, -1.98445, 1.41139, 'climbing_up'],
       [0, -9.51335, -1.97557, 1.42615, 'climbing_up'],
       [0, -9.52959, -1.98187, 1.45395, 'climbing_up'],
       [0, -9.55446, -2.00818, 1.40735, 'climbing_up'],
       [0, -9.53834, -2.00737, 1.37628, 'climbing_up'],
       [0, -9.53804, -2.01022, 1.39708, 'climbing_up'],
       [0, -9.54524, -2.00552, 1.33041, 'climbing_up'],
       [0, -9.52776, -2.00702, 1.34845, 'climbing_up'],
       [0, -9.55386, -2.02527, 1.35837, 'climbing_up'],
       [0, -9.52835, -2.03238, 1.38365, 'climbing_up'],
       [0, -9.56699, -1.99843, 1.39401, 'climbing_up'],
       [0, -9.56685, -2.00259, 1.39221, 'climbing_up'],
       [0, -9.52701, -2.00899, 1.43456, 'climbing_up'],
       [0, -9.5379, -1.98004, 1.41574, 'climbing_up'],
       [0, -9.5269, -2.00275, 1.39914, 'climbing_up'],
       [0, -9.53383, -2.01756, 1.39552, 'climbing_

In [9]:
# remove the label column
window_data = window_data[:, :, :-1]
#remove the subject column
window_data = window_data[:, :, 1:]

In [10]:
window_data[0].shape

(100, 3)

# Signal Transformation

In [18]:
def add_random_noise_single_window(data, noise_level=0.1):
    """
    Add random Gaussian noise to a single window of data.

    :param data: Input data with shape (window_size, 3).
    :param noise_level: Standard deviation of the Gaussian noise.
    :return: Noisy data.
    """
    noise = np.random.normal(0, noise_level, data.shape)
    noisy_data = data + noise
    return noisy_data


In [20]:
def random_cropping_single_window(data, crop_ratio=0.8):
    """
    Randomly crop a single window of data and pad it to maintain original shape.

    :param data: Input data with shape (window_size, 3).
    :param crop_ratio: Ratio of the original window size to keep.
    :return: Cropped and padded data.
    """
    window_size = data.shape[0]
    new_size = int(window_size * crop_ratio)
    start = np.random.randint(0, window_size - new_size)
    end = start + new_size
    cropped_data = data[start:end, :]

    # Pad the cropped data to maintain original window size
    padding_size = window_size - new_size
    padding = np.zeros((padding_size, data.shape[1]))
    padded_data = np.vstack((cropped_data, padding))

    return padded_data


In [21]:
def magnitude_warping_single_window(data, warp_factor=0.2):
    """
    Apply magnitude warping to a single window of data.

    :param data: Input data with shape (window_size, 3).
    :param warp_factor: Factor to determine the magnitude of warping.
    :return: Warped data.
    """
    window_size = data.shape[0]
    warped_data = np.copy(data)

    for j in range(3):  # for each axis
        time_points = np.linspace(0, 1, window_size)
        random_points = np.linspace(0, 1, np.random.randint(4, 10))
        warp_values = 1 + np.random.normal(0, warp_factor, random_points.size)
        interpolator = CubicSpline(random_points, warp_values)
        warped_data[:, j] *= interpolator(time_points)

    return warped_data


In [22]:
def time_warping_single_window(data, warp_factor=0.2):
    """
    Apply time warping to a single window of data.

    :param data: Input data with shape (window_size, 3).
    :param warp_factor: Factor to determine the magnitude of time warping.
    :return: Time-warped data.
    """
    window_size = data.shape[0]
    warped_data = np.zeros_like(data)
    time_points = np.linspace(0, 1, window_size)
    random_points = np.sort(np.random.rand(np.random.randint(3, 6)))
    warp_values = np.random.normal(1, warp_factor, random_points.size)
    interpolator = CubicSpline(random_points, warp_values)
    warped_time = interpolator(time_points)
    warped_time -= warped_time.min()
    warped_time /= warped_time.max()
    warped_time *= (window_size - 1)

    for j in range(3):  # for each axis
        interpolator = CubicSpline(np.arange(window_size), data[:, j])
        warped_data[:, j] = interpolator(warped_time)

    return warped_data


In [23]:
# add random noise
noisy_data = add_random_noise_single_window(window_data[0], noise_level=0.1)
print(f"shape of noisy data: {noisy_data.shape}")
# random cropping
cropped_data = random_cropping_single_window(window_data[0], crop_ratio=0.8)
print(f"shape of cropped data: {cropped_data.shape}")
# magnitude warping
warped_data = magnitude_warping_single_window(window_data[0], warp_factor=0.2)
print(f"shape of warped data: {warped_data.shape}")
# time warping
time_warped_data = time_warping_single_window(window_data[0], warp_factor=0.2)
print(f"shape of time warped data: {time_warped_data.shape}")

shape of noisy data: (100, 3)
shape of cropped data: (100, 3)
shape of warped data: (100, 3)
shape of time warped data: (100, 3)


In [24]:
#make a copy of the original data
augmented_data = np.copy(window_data)

# create labels for augmented data
augmented_labels = []

# loop over all windows
for i in range(window_data.shape[0]):
    # choose one number from 0 to 3
    random_number = np.random.randint(0, 4)

    if random_number == 0:
        augmented_data[i] = add_random_noise_single_window(window_data[i], noise_level=0.1)
        augmented_labels.append(0)
    elif random_number == 1:
        augmented_data[i] = random_cropping_single_window(window_data[i], crop_ratio=0.8)
        augmented_labels.append(1)
    elif random_number == 2:
        augmented_data[i] = magnitude_warping_single_window(window_data[i], warp_factor=0.2)
        augmented_labels.append(2)
    else:
        augmented_data[i] = time_warping_single_window(window_data[i], warp_factor=0.2)
        augmented_labels.append(3)

In [25]:
print(f"shape of augmented data: {augmented_data.shape}")
print(f"shape of augmented labels: {len(augmented_labels)}")

shape of augmented data: (32008, 100, 3)
shape of augmented labels: 32008


In [28]:
# save augmented data and labels
np.save('./ISWC21_data_plus_raw/augmented_data.npy', augmented_data)
np.save('./ISWC21_data_plus_raw/augmented_labels.npy', augmented_labels)