In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
from scipy.interpolate import CubicSpline
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder

In [2]:
BATCH_SIZE = 256
train_rate = 0.1

In [3]:
# load data without header
data = pd.read_csv('./ISWC21_data_plus_raw/rwhar_3sbjs_data.csv', header=None)
# add header
data.columns = ['subject', 'x', 'y', 'z', 'activity']
data.head()

Unnamed: 0,subject,x,y,z,activity
0,0,0.378284,10.168175,0.847547,climbing_up
1,0,0.383671,10.172364,0.849942,climbing_up
2,0,0.372298,10.181941,0.859518,climbing_up
3,0,0.342969,10.170568,0.834379,climbing_up
4,0,0.319626,10.159795,0.818817,climbing_up


In [4]:
data.shape

(659260, 5)

In [5]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(data['activity'])
data['encoded_activity'] = encoded_labels

In [6]:
data.head()

Unnamed: 0,subject,x,y,z,activity,encoded_activity
0,0,0.378284,10.168175,0.847547,climbing_up,1
1,0,0.383671,10.172364,0.849942,climbing_up,1
2,0,0.372298,10.181941,0.859518,climbing_up,1
3,0,0.342969,10.170568,0.834379,climbing_up,1
4,0,0.319626,10.159795,0.818817,climbing_up,1


In [7]:
def sliding_window_samples(data, samples_per_window, overlap_ratio):
    """
    Return a sliding window measured in number of samples over a data array along with the mode label for each window.

    :param data: input array, can be numpy or pandas dataframe
    :param samples_per_window: window length as number of samples
    :param overlap_ratio: overlap is meant as percentage and should be an integer value
    :return: tuple of windows, indices, and labels
    """
    windows = []
    indices = []
    labels = []
    curr = 0
    win_len = int(samples_per_window)
    if overlap_ratio is not None:
        overlapping_elements = int((overlap_ratio / 100) * win_len)
        if overlapping_elements >= win_len:
            print('Number of overlapping elements exceeds window size.')
            return
    while curr < len(data) - win_len:
        window = data[curr:curr + win_len]
        windows.append(window.iloc[:, :-2])  # Exclude the last two columns (original and encoded labels)
        indices.append([curr, curr + win_len])
        
        # Extract and compute the mode of the encoded labels for the current window
        window_labels = window['encoded_activity']
        mode_result = mode(window_labels)
        window_label = mode_result[0] if mode_result[0].size > 0 else mode_result
        labels.append(window_label)

        curr += win_len - overlapping_elements

    result_windows = np.array(windows)
    result_indices = np.array(indices)
    result_labels = np.array(labels)
    return result_windows, result_indices, result_labels


In [8]:
sampling_rate = 50
time_window = 2
window_size = sampling_rate * time_window
overlap_ratio = 0

window_data, _, window_label = sliding_window_samples(data, window_size, overlap_ratio)
print(f"shape of window dataset (2 sec with 0% overlap): {window_data.shape}")
print(f"shape of window label (2 sec with 0% overlap): {window_label.shape}")

shape of window dataset (2 sec with 0% overlap): (6592, 100, 4)
shape of window label (2 sec with 0% overlap): (6592,)


In [9]:
#remove the subject column
window_data = window_data[:, :, 1:]

In [10]:
window_data[0].shape

(100, 3)

In [11]:
import torch
from torch.utils.data import DataLoader, TensorDataset, Subset
import numpy as np


window_data = window_data.astype(np.float32)

# Convert to PyTorch tensors
window_data_tensor = torch.from_numpy(window_data)
window_label_tensor = torch.from_numpy(window_label)
#convert labels to long
window_label_tensor = window_label_tensor.long()

# split data into train and test sets
train_size = int(train_rate * len(window_data_tensor))
test_size = len(window_data_tensor) - train_size

# Creating datasets
dataset = TensorDataset(window_data_tensor, window_label_tensor)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Function to extract tensors from Subset
def extract_subset_data(subset, dataset):
    return dataset.tensors[0][subset.indices], dataset.tensors[1][subset.indices]

# Extract data and labels from train and test sets
train_data, train_labels = extract_subset_data(train_dataset, dataset)
test_data, test_labels = extract_subset_data(test_dataset, dataset)

# create train and test TensorDataset
train_dataset = TensorDataset(train_data, train_labels)
test_dataset = TensorDataset(test_data, test_labels)

# create DataLoader for train and test sets
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
#  print the shape of train_loader and test_loader
print(f"shape of train_loader: {len(train_loader)}")
print(f"shape of test_loader: {len(test_loader)}")

shape of train_loader: 3
shape of test_loader: 24


In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNFeatureExtractor(nn.Module):
    def __init__(self, num_classes=4):
        super(CNNFeatureExtractor, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * 12, 128)  # Adjust the input features according to your final conv layer output
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [14]:
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, padding):
        super(TemporalBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               stride=stride, padding=0, dilation=dilation)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               stride=stride, padding=0, dilation=dilation)
        self.relu2 = nn.ReLU()
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        
        res = x if self.downsample is None else self.downsample(x)

        # Adjusting the length of the residual to match the output
        if out.size(2) != res.size(2):
            desired_length = out.size(2)
            res = res[:, :, :desired_length]

        return self.relu(out + res)


class TCN(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size, dropout=0.2, num_classes=4):
        super(TCN, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size + (dilation_size - 1))]

        self.tcn = nn.Sequential(*layers)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(num_channels[-1], num_classes)

    def forward(self, x):
        x = self.tcn(x)
        x = F.avg_pool1d(x, x.size(2)).squeeze(2)  # Global Average Pooling
        x = self.dropout(x)
        return self.fc(x)

In [15]:
# create training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.transpose(1, 2)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [16]:
# create testing function
def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    #calculate accuracy
    correct = 0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs = inputs.transpose(1, 2)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            #calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
    accuracy = correct / len(test_loader.dataset)
    return running_loss / len(test_loader), accuracy

In [17]:
# create function to train and test model
def train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs):
    train_losses = []
    test_losses = []
    test_accuracies = []
    for epoch in range(num_epochs):
        train_loss = train(model, train_loader, criterion, optimizer, device)
        test_loss, test_accuracy = test(model, test_loader, criterion, device)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
        print(f"Epoch: {epoch + 1}/{num_epochs}.. Train Loss: {train_loss:.3f}.. Test Loss: {test_loss:.3f}.. Test Accuracy: {test_accuracy:.3f}")
    return train_losses, test_losses, test_accuracies

In [18]:
# get number of classes
num_classes = len(np.unique(window_label))
print(f"number of classes: {num_classes}")

number of classes: 8


In [19]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 50
train_losses, test_losses, test_accuracies = train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/50.. Train Loss: 1.921.. Test Loss: 1.475.. Test Accuracy: 0.444
Epoch: 2/50.. Train Loss: 1.298.. Test Loss: 1.093.. Test Accuracy: 0.567
Epoch: 3/50.. Train Loss: 0.999.. Test Loss: 0.978.. Test Accuracy: 0.628
Epoch: 4/50.. Train Loss: 0.902.. Test Loss: 0.867.. Test Accuracy: 0.707
Epoch: 5/50.. Train Loss: 0.794.. Test Loss: 0.767.. Test Accuracy: 0.755
Epoch: 6/50.. Train Loss: 0.709.. Test Loss: 0.726.. Test Accuracy: 0.794
Epoch: 7/50.. Train Loss: 0.616.. Test Loss: 0.705.. Test Accuracy: 0.776
Epoch: 8/50.. Train Loss: 0.611.. Test Loss: 0.649.. Test Accuracy: 0.812
Epoch: 9/50.. Train Loss: 0.548.. Test Loss: 0.633.. Test Accuracy: 0.803
Epoch: 10/50.. Train Loss: 0.502.. Test Loss: 0.658.. Test Accuracy: 0.784
Epoch: 11/50.. Train Loss: 0.462.. Test Loss: 0.611.. Test Accuracy: 0.805
Epoch: 12/50.. Train Loss: 0.485.. Test Loss: 0.573.. Test Accuracy: 0.835
Epoch: 13/50.. Train Loss: 0.451.. Test Loss: 0.601.. Test Accuracy: 0.820
Epoch: 14/50.. Train Loss: 0.412..

In [20]:
model = CNNFeatureExtractor(num_classes=4)

#load pretrained model
model.load_state_dict(torch.load('./models/cnn_feature_extractor.pt'))

model.fc2 = nn.Linear(in_features=model.fc2.in_features, out_features=num_classes)
model.to(device)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc2.parameters(), lr=0.001)

# train and test model
num_epochs = 50
train_losses, test_losses, test_accuracies = train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs)


Epoch: 1/50.. Train Loss: 2.351.. Test Loss: 2.316.. Test Accuracy: 0.108
Epoch: 2/50.. Train Loss: 2.282.. Test Loss: 2.243.. Test Accuracy: 0.122
Epoch: 3/50.. Train Loss: 2.209.. Test Loss: 2.177.. Test Accuracy: 0.138
Epoch: 4/50.. Train Loss: 2.138.. Test Loss: 2.118.. Test Accuracy: 0.194
Epoch: 5/50.. Train Loss: 2.101.. Test Loss: 2.064.. Test Accuracy: 0.261
Epoch: 6/50.. Train Loss: 2.039.. Test Loss: 2.015.. Test Accuracy: 0.273
Epoch: 7/50.. Train Loss: 2.013.. Test Loss: 1.970.. Test Accuracy: 0.280
Epoch: 8/50.. Train Loss: 1.947.. Test Loss: 1.928.. Test Accuracy: 0.289
Epoch: 9/50.. Train Loss: 1.910.. Test Loss: 1.890.. Test Accuracy: 0.296
Epoch: 10/50.. Train Loss: 1.889.. Test Loss: 1.854.. Test Accuracy: 0.301
Epoch: 11/50.. Train Loss: 1.852.. Test Loss: 1.822.. Test Accuracy: 0.308
Epoch: 12/50.. Train Loss: 1.818.. Test Loss: 1.792.. Test Accuracy: 0.312
Epoch: 13/50.. Train Loss: 1.801.. Test Loss: 1.764.. Test Accuracy: 0.318
Epoch: 14/50.. Train Loss: 1.772..

In [21]:
num_inputs = 3  # Assuming 3 input channels (x, y, z axes of the accelerometer)
num_channels = [64, 128, 256]  # Example channel sizes for each layer
kernel_size = 8  # Kernel size for temporal convolutions

model = TCN(num_inputs, num_channels, kernel_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 50
train_losses, test_losses, test_accuracies = train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/50.. Train Loss: 1.815.. Test Loss: 1.382.. Test Accuracy: 0.460
Epoch: 2/50.. Train Loss: 1.248.. Test Loss: 1.172.. Test Accuracy: 0.515
Epoch: 3/50.. Train Loss: 1.077.. Test Loss: 0.996.. Test Accuracy: 0.567
Epoch: 4/50.. Train Loss: 0.974.. Test Loss: 0.962.. Test Accuracy: 0.643
Epoch: 5/50.. Train Loss: 0.889.. Test Loss: 0.893.. Test Accuracy: 0.663
Epoch: 6/50.. Train Loss: 0.861.. Test Loss: 0.824.. Test Accuracy: 0.749
Epoch: 7/50.. Train Loss: 0.752.. Test Loss: 0.770.. Test Accuracy: 0.740
Epoch: 8/50.. Train Loss: 0.653.. Test Loss: 0.828.. Test Accuracy: 0.677
Epoch: 9/50.. Train Loss: 0.671.. Test Loss: 0.703.. Test Accuracy: 0.743
Epoch: 10/50.. Train Loss: 0.612.. Test Loss: 0.660.. Test Accuracy: 0.789
Epoch: 11/50.. Train Loss: 0.568.. Test Loss: 0.636.. Test Accuracy: 0.794
Epoch: 12/50.. Train Loss: 0.518.. Test Loss: 0.644.. Test Accuracy: 0.790
Epoch: 13/50.. Train Loss: 0.489.. Test Loss: 0.587.. Test Accuracy: 0.819
Epoch: 14/50.. Train Loss: 0.486..

In [22]:
model = TCN(num_inputs, num_channels, kernel_size, num_classes=4)

#load pretrained model
model.load_state_dict(torch.load('./models/tcn_20231216-0131.pt'))

model.fc = nn.Linear(in_features=model.fc.in_features, out_features=num_classes)
model.to(device)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

# train and test model
num_epochs = 50
train_losses, test_losses, test_accuracies = train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs)

Epoch: 1/50.. Train Loss: 2.059.. Test Loss: 2.015.. Test Accuracy: 0.234
Epoch: 2/50.. Train Loss: 1.987.. Test Loss: 1.952.. Test Accuracy: 0.258
Epoch: 3/50.. Train Loss: 1.934.. Test Loss: 1.902.. Test Accuracy: 0.468
Epoch: 4/50.. Train Loss: 1.886.. Test Loss: 1.860.. Test Accuracy: 0.494
Epoch: 5/50.. Train Loss: 1.854.. Test Loss: 1.824.. Test Accuracy: 0.488
Epoch: 6/50.. Train Loss: 1.815.. Test Loss: 1.791.. Test Accuracy: 0.481
Epoch: 7/50.. Train Loss: 1.792.. Test Loss: 1.760.. Test Accuracy: 0.481
Epoch: 8/50.. Train Loss: 1.761.. Test Loss: 1.732.. Test Accuracy: 0.474
Epoch: 9/50.. Train Loss: 1.717.. Test Loss: 1.704.. Test Accuracy: 0.470
Epoch: 10/50.. Train Loss: 1.689.. Test Loss: 1.679.. Test Accuracy: 0.470
Epoch: 11/50.. Train Loss: 1.673.. Test Loss: 1.655.. Test Accuracy: 0.468
Epoch: 12/50.. Train Loss: 1.644.. Test Loss: 1.632.. Test Accuracy: 0.472
Epoch: 13/50.. Train Loss: 1.606.. Test Loss: 1.610.. Test Accuracy: 0.474
Epoch: 14/50.. Train Loss: 1.596..