In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
from scipy.interpolate import CubicSpline
import torch.optim as optim

In [2]:
num_epochs = 20
BATCH_SIZE = 128
learning_rate = 0.001

In [3]:
# load data without header
data1 = pd.read_csv('./ISWC21_data_plus_raw/wetlab_data.csv')
# add header
data1.columns = ['subject_id', 'acc_x', 'acc_y', 'acc_z', 'activity', 'activity_label_2']
data1.head()

Unnamed: 0,subject_id,acc_x,acc_y,acc_z,activity,activity_label_2
0,0,0.306563,9.196875,-1.22625,null_class,null_class
1,0,0.306563,9.196875,-1.22625,null_class,null_class
2,0,0.306563,9.196875,-1.22625,null_class,null_class
3,0,0.306563,9.196875,-1.22625,null_class,null_class
4,0,0.306563,9.196875,-1.22625,null_class,null_class


In [4]:
#remove activity label 2 column
data1 = data1.drop(['activity_label_2'], axis=1)
data1.shape

(3163679, 5)

In [5]:
#count number of unique subjects
print("Number of unique subjects: ", data1['subject_id'].nunique())

Number of unique subjects:  22


In [6]:
# load data without header
data2 = pd.read_csv('./ISWC21_data_plus_raw/rwhar_data.csv', header=None)
# add header
data2.columns = ['subject_id', 'acc_x', 'acc_y', 'acc_z', 'activity']
data2.head()

Unnamed: 0,subject_id,acc_x,acc_y,acc_z,activity
0,0,-9.57434,-2.02733,1.34506,climbing_up
1,0,-9.56479,-1.99597,1.39345,climbing_up
2,0,-9.55122,-1.98445,1.41139,climbing_up
3,0,-9.51335,-1.97557,1.42615,climbing_up
4,0,-9.52959,-1.98187,1.45395,climbing_up


In [7]:
data2.shape

(3200803, 5)

In [8]:
#count number of unique subjects
print("Number of unique subjects: ", data2['subject_id'].nunique())

Number of unique subjects:  15


In [9]:
# load data without header
data3 = pd.read_csv('./ISWC21_data_plus_raw/sbhar_data.csv', header=None)
# add header
data3.columns = ['subject_id', 'acc_x', 'acc_y', 'acc_z', 'activity']
data3.head()

Unnamed: 0,subject_id,acc_x,acc_y,acc_z,activity
0,0,0.443056,0.0375,0.888889,null_class
1,0,0.440278,0.041667,0.880556,null_class
2,0,0.451389,0.043056,0.876389,null_class
3,0,0.456944,0.034722,0.888889,null_class
4,0,0.447222,0.036111,0.888889,null_class


In [10]:
data3.shape

(1122772, 5)

In [28]:
#count number of unique subjects
print("Number of unique subjects: ", data3['subject_id'].nunique())

Number of unique subjects:  30


In [30]:
#print all of the unique subjects
print("Unique subjects: ", data3['subject_id'].unique())

Unique subjects:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29]


In [31]:
# convert subject_id to int
data3['subject_id'] = data3['subject_id'].astype(int)

In [32]:
# # join all data in one dataframe row-wise
# data = pd.concat([data1, data2, data3], ignore_index=True, axis=0)
data = data3

In [33]:
data.shape

(1122772, 5)

In [34]:
#check null values in subject_id column
data['subject_id'].isnull().values.any()

False

In [43]:
#split train and test data
#randomly select 20% of subjects for test data
test_subjects = data['subject_id'].unique()
test_subjects = np.random.choice(test_subjects, size=int(0.2*len(test_subjects)), replace=False)
# test_subjects = [27 24  5  4 16  1]
print("Test subjects: ", test_subjects)

#split data into train and test
train_data = data[~data['subject_id'].isin(test_subjects)]
test_data = data[data['subject_id'].isin(test_subjects)]
print("Train data shape: ", train_data.shape)
print("Test data shape: ", test_data.shape)

Test subjects:  [ 4 11  3 15  7 29]
Train data shape:  (907785, 5)
Test data shape:  (214987, 5)


In [44]:
# z normalization with respect to train data
train_data_mean = train_data[['acc_x', 'acc_y', 'acc_z']].mean()
train_data_std = train_data[['acc_x', 'acc_y', 'acc_z']].std()
# Normalize Training Data
train_data.loc[:, ['acc_x', 'acc_y', 'acc_z']] = (train_data[['acc_x', 'acc_y', 'acc_z']] - train_data_mean) / train_data_std

# Normalize Test Data with Training Statistics
test_data.loc[:, ['acc_x', 'acc_y', 'acc_z']] = (test_data[['acc_x', 'acc_y', 'acc_z']] - train_data_mean) / train_data_std

In [45]:
# function for sliding window

def sliding_window_samples(data, samples_per_window, overlap_ratio):
    """
    Return a sliding window measured in number of samples over a data array.

    :param data: input array, can be numpy or pandas dataframe
    :param samples_per_window: window length as number of samples
    :param overlap_ratio: overlap is meant as percentage and should be an integer value
    :return: tuple of windows and indices
    """
    windows = []
    indices = []
    curr = 0
    win_len = int(samples_per_window)
    if overlap_ratio is not None:
        overlapping_elements = int((overlap_ratio / 100) * (win_len))
        if overlapping_elements >= win_len:
            print('Number of overlapping elements exceeds window size.')
            return
    while curr < len(data) - win_len:
        windows.append(data[curr:curr + win_len])
        indices.append([curr, curr + win_len])
        curr = curr + win_len - overlapping_elements
    try:
        result_windows = np.array(windows)
        result_indices = np.array(indices)
    except:
        result_windows = np.empty(shape=(len(windows), win_len, data.shape[1]), dtype=object)
        result_indices = np.array(indices)
        for i in range(0, len(windows)):
            result_windows[i] = windows[i]
            result_indices[i] = indices[i]
    return result_windows, result_indices

In [46]:
sampling_rate = 50
time_window = 8
window_size = sampling_rate * time_window
overlap_ratio = 50

train_window_data, _ = sliding_window_samples(train_data, window_size, overlap_ratio)
print(f"shape of train window dataset ({time_window} sec with {overlap_ratio}% overlap): {train_window_data.shape}")

test_window_data, _ = sliding_window_samples(test_data, window_size, overlap_ratio)
print(f"shape of test window dataset ({time_window} sec with {overlap_ratio}% overlap): {test_window_data.shape}")

shape of train window dataset (8 sec with 50% overlap): (4537, 400, 5)
shape of test window dataset (8 sec with 50% overlap): (1073, 400, 5)


In [47]:
train_window_data[0]

array([[0, -0.9374415776332142, 0.1511316569299629, 2.2058765963654703,
        'null_class'],
       [0, -0.9444299648514514, 0.16221732187678917, 2.1831314363879004,
        'null_class'],
       [0, -0.9164765383070843, 0.16591254352573132, 2.1717588563991153,
        'null_class'],
       ...,
       [0, 0.43577345253585653, -0.8096260526491502,
        -0.19373279410209546, 'standing'],
       [0, 0.4252911163656642, -0.8170165121178687, -0.19373279410209546,
        'standing'],
       [0, 0.4252911163656642, -0.8170165121178687, -0.19373279410209546,
        'standing']], dtype=object)

In [49]:
# remove the label column
train_window_data = train_window_data[:, :, :-1]
# train_window_data = train_window_data[:, :, :-1]
#remove the subject column
train_window_data = train_window_data[:, :, 1:]

test_window_data = test_window_data[:, :, :-1]
test_window_data = test_window_data[:, :, 1:]


In [50]:
train_window_data[0].shape

(400, 3)

In [51]:
test_window_data[0].shape

(400, 3)

In [63]:
def add_jitter(data, noise_factor=0.05):
    jitter = noise_factor * np.random.randn(*data.shape)
    return data + jitter

In [64]:
def scale_data(data, min_scale=0.5, max_scale=1.5):
    scaling_factor = np.random.uniform(min_scale, max_scale)
    return data * scaling_factor


In [65]:
def rotate_data(data):
    # Invert the sign of the data to simulate sensor rotation
    return -data

In [66]:
def negate_data(data):
    return -data

In [74]:
def horizontal_flip(data):
    # This function now correctly handles 2D data arrays
    return data[::-1, :]

In [75]:
def permute_data(data, num_segments=4):
    segment_length = data.shape[0] // num_segments  # Adjusted to the first dimension for 2D data
    permuted_indices = np.random.permutation(num_segments)
    return np.concatenate(
        [data[segment_length * idx:segment_length * (idx + 1), :] for idx in permuted_indices], axis=0)  # Concatenating along the first axis

In [81]:
from scipy.interpolate import interp1d
import numpy as np

def time_warp(data, warp_factor_range=(0.8, 1.2)):
    sequence_length, num_channels = data.shape
    original_time_points = np.linspace(0, 1, sequence_length)
    warp_factor = np.random.uniform(*warp_factor_range)
    
    # Generate new time points based on the warp factor
    warped_time_points = np.linspace(0, warp_factor, sequence_length)

    warped_data = np.zeros_like(data)
    for j in range(num_channels):
        # Interpolate each channel
        interpolation = interp1d(original_time_points, data[:, j], 
                                 kind='linear', fill_value="extrapolate")
        warped_data[:, j] = interpolation(warped_time_points)

    return warped_data


In [80]:
def shuffle_channels(data):
    # Assuming data is 2D with shape (sequence_length, num_channels)
    shuffled_indices = np.random.permutation(data.shape[1])  # Shuffle along the second dimension
    return data[:, shuffled_indices]

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TPN(nn.Module):
    def __init__(self):
        super(TPN, self).__init__()
        self.trunk = nn.Sequential(
            nn.Conv1d(in_channels=..., out_channels=32, kernel_size=24, stride=1),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=16, stride=1),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Conv1d(in_channels=64, out_channels=96, kernel_size=8, stride=1),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.AdaptiveMaxPool1d(output_size=1)
        )

        self.heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(96, 256),
                nn.ReLU(),
                nn.Linear(256, 1),
                nn.Sigmoid()
            ) for _ in range(8)  # 8 heads for 8 different transformations
        ])

    def forward(self, x):
        x = self.trunk(x)
        x = x.view(x.size(0), -1)  # Flatten the output for the fully-connected layer
        outputs = [head(x) for head in self.heads]
        return outputs


In [83]:
import numpy as np

# Initialize lists to store datasets
train_dataset = [[] for _ in range(8)]

# Loop over all training data
for data in train_window_data:
    # loop over all transformations
    # print(f"shape of data: {data.shape}")
    for j in range(8):
        # Original data with label 0
        train_dataset[j].append((data, 0))
        # Apply transformation based on j and save it in the transformed_data variable
        if j == 0:
            transformed_data = add_jitter(data)
        elif j == 1:
            transformed_data = scale_data(data)
        elif j == 2:
            transformed_data = rotate_data(data)
        elif j == 3:
            transformed_data = negate_data(data)
        elif j == 4:
            transformed_data = horizontal_flip(data)
        elif j == 5:
            transformed_data = permute_data(data)
        elif j == 6:
            transformed_data = time_warp(data)
        elif j == 7:
            transformed_data = shuffle_channels(data)
        # Append the transformed data with label 1
        train_dataset[j].append((transformed_data, 1))

# Convert lists to numpy arrays
for j in range(8):
    data, labels = zip(*train_dataset[j])
    data = np.array(data)
    labels = np.array(labels)
    train_dataset[j] = (data, labels)


In [84]:
# print the shape of all training datasets
for j in range(8):
    print(f"shape of training dataset {j}: {train_dataset[j][0].shape}")

shape of training dataset 0: (9074, 400, 3)
shape of training dataset 1: (9074, 400, 3)
shape of training dataset 2: (9074, 400, 3)
shape of training dataset 3: (9074, 400, 3)
shape of training dataset 4: (9074, 400, 3)
shape of training dataset 5: (9074, 400, 3)
shape of training dataset 6: (9074, 400, 3)
shape of training dataset 7: (9074, 400, 3)


In [85]:
# print the class distribution of all training datasets
for j in range(8):
    print(f"Class distribution of training dataset {j}: {np.unique(train_dataset[j][1], return_counts=True)}")

Class distribution of training dataset 0: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 1: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 2: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 3: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 4: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 5: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 6: (array([0, 1]), array([4537, 4537], dtype=int64))
Class distribution of training dataset 7: (array([0, 1]), array([4537, 4537], dtype=int64))


In [86]:
# initialize lists to store datasets for test data
test_dataset = [[] for _ in range(8)]

# loop over all test data
for data in test_window_data:
    # loop over all transformations
    for j in range(8):
        # Original data with label 0
        test_dataset[j].append((data, 0))
        # Apply transformation based on j and save it in the transformed_data variable
        if j == 0:
            transformed_data = add_jitter(data)
        elif j == 1:
            transformed_data = scale_data(data)
        elif j == 2:
            transformed_data = rotate_data(data)
        elif j == 3:
            transformed_data = negate_data(data)
        elif j == 4:
            transformed_data = horizontal_flip(data)
        elif j == 5:
            transformed_data = permute_data(data)
        elif j == 6:
            transformed_data = time_warp(data)
        elif j == 7:
            transformed_data = shuffle_channels(data)
        # Append the transformed data with label 1
        test_dataset[j].append((transformed_data, 1))

# Convert lists to numpy arrays
for j in range(8):
    data, labels = zip(*test_dataset[j])
    data = np.array(data)
    labels = np.array(labels)
    test_dataset[j] = (data, labels)

In [87]:
# print the shape of all test datasets
for j in range(8):
    print(f"shape of test dataset {j}: {test_dataset[j][0].shape}")

shape of test dataset 0: (2146, 400, 3)
shape of test dataset 1: (2146, 400, 3)
shape of test dataset 2: (2146, 400, 3)
shape of test dataset 3: (2146, 400, 3)
shape of test dataset 4: (2146, 400, 3)
shape of test dataset 5: (2146, 400, 3)
shape of test dataset 6: (2146, 400, 3)
shape of test dataset 7: (2146, 400, 3)


In [88]:
# print the class distribution of all test datasets
for j in range(8):
    print(f"Class distribution of test dataset {j}: {np.unique(test_dataset[j][1], return_counts=True)}")

Class distribution of test dataset 0: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 1: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 2: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 3: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 4: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 5: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 6: (array([0, 1]), array([1073, 1073], dtype=int64))
Class distribution of test dataset 7: (array([0, 1]), array([1073, 1073], dtype=int64))
