# Setup

In [1]:
# import libraries
import os
import sys
import time
import pandas as pd
import numpy as np
from scipy import stats
from scipy.interpolate import CubicSpline
import torch.optim as optim
import torch.nn as nn
import torch
from torch.optim import Adam
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder
import random
from sklearn.metrics import f1_score
from torch.utils.data import TensorDataset, DataLoader

## Hyperparameters

In [22]:
# set the seed
np.random.seed(420)
torch.manual_seed(420)
torch.cuda.manual_seed(420)
batch_size = 32

In [3]:
# set the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load Dataset

In [4]:
# load data without header
data = pd.read_csv('./ISWC21_data_plus_raw/rwhar_data.csv', header=None)
# add header
data.columns = ['subject_id', 'acc_x', 'acc_y', 'acc_z', 'activity']
data.head()

Unnamed: 0,subject_id,acc_x,acc_y,acc_z,activity
0,0,-9.57434,-2.02733,1.34506,climbing_up
1,0,-9.56479,-1.99597,1.39345,climbing_up
2,0,-9.55122,-1.98445,1.41139,climbing_up
3,0,-9.51335,-1.97557,1.42615,climbing_up
4,0,-9.52959,-1.98187,1.45395,climbing_up


In [5]:
# print the count of unique subjects
print('The number of unique subjects is {}'.format(data['subject_id'].nunique()))

The number of unique subjects is 15


In [6]:
data.shape

(3200803, 5)

In [7]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(data['activity'])
data['encoded_activity'] = encoded_labels

In [8]:
data.head()

Unnamed: 0,subject_id,acc_x,acc_y,acc_z,activity,encoded_activity
0,0,-9.57434,-2.02733,1.34506,climbing_up,1
1,0,-9.56479,-1.99597,1.39345,climbing_up,1
2,0,-9.55122,-1.98445,1.41139,climbing_up,1
3,0,-9.51335,-1.97557,1.42615,climbing_up,1
4,0,-9.52959,-1.98187,1.45395,climbing_up,1


In [9]:
# get number of classes
num_classes = data['encoded_activity'].nunique()
num_classes

8

# Data Preprocessing

## Data Normalization

In [10]:
# setup the mean and std for normalization
mean = {'acc_x': 0.816012, 'acc_y': -0.007595, 'acc_z': 0.074082}
std = {'acc_x': 0.398664, 'acc_y': 0.375481, 'acc_z': 0.366527}

# normalize the data for acc_x, acc_y, acc_z
data['acc_x'] = (data['acc_x'] - mean['acc_x']) / std['acc_x']
data['acc_y'] = (data['acc_y'] - mean['acc_y']) / std['acc_y']
data['acc_z'] = (data['acc_z'] - mean['acc_z']) / std['acc_z']

In [11]:
# check the null values
data.isnull().sum()

subject_id          0
acc_x               0
acc_y               0
acc_z               0
activity            0
encoded_activity    0
dtype: int64

## Split Train and Test

In [12]:
# split train and test users
# randomly select 70% of the users for training
train_subjects = np.random.choice(data['subject_id'].unique(), int(0.7*len(data['subject_id'].unique())), replace=False)
# split the data into train and test
train = data[data['subject_id'].isin(train_subjects)]
test = data[~data['subject_id'].isin(train_subjects)]

# print test and train users
print('The number of train users is {}'.format(train['subject_id'].nunique()))
print('The number of test users is {}'.format(test['subject_id'].nunique()))

# print the shape of train and test
print('The shape of train is {}'.format(train.shape))
print('The shape of test is {}'.format(test.shape))

The number of train users is 10
The number of test users is 5
The shape of train is (2200794, 6)
The shape of test is (1000009, 6)


In [13]:
train_subjects

array([ 2, 14,  5,  0, 11,  4, 10, 12,  7, 13], dtype=int64)

In [14]:
# print the test subjects
print('The test subjects are {}'.format(test['subject_id'].unique()))

# [1 3 6 8 9] are the test subjects

The test subjects are [1 3 6 8 9]


In [32]:
# randomly select 75% of the users for training
train_subjects_75 = np.random.choice(train['subject_id'].unique(), int(0.75*len(train['subject_id'].unique())), replace=False)
train_75 = data[data['subject_id'].isin(train_subjects_75)]

# print the train subjects
print('The train subjects are {}'.format(train_75['subject_id'].unique()))
#print shape of train_75
print('The shape of train_75 is {}'.format(train_75.shape))

The train subjects are [ 0  2  4  5  7 11 12]
The shape of train_75 is (1579134, 6)


In [33]:
# randomly select 50% of the users for training
train_subjects_50 = np.random.choice(train['subject_id'].unique(), int(0.5*len(train['subject_id'].unique())), replace=False)
train_50 = data[data['subject_id'].isin(train_subjects_50)]

# print the train subjects
print('The train subjects are {}'.format(train_50['subject_id'].unique()))
#print shape of train_50
print('The shape of train_50 is {}'.format(train_50.shape))

The train subjects are [ 2  5  7 11 12]
The shape of train_50 is (1118653, 6)


In [34]:
# randomly select 25% of the users for training
train_subjects_25 = np.random.choice(train['subject_id'].unique(), int(0.25*len(train['subject_id'].unique())), replace=False)
train_25 = data[data['subject_id'].isin(train_subjects_25)]

# print the train subjects
print('The train subjects are {}'.format(train_25['subject_id'].unique()))
#print shape of train_25
print('The shape of train_25 is {}'.format(train_25.shape))

The train subjects are [10 14]
The shape of train_25 is (442378, 6)


In [35]:
# randomly select 10% of the users for training
train_subjects_10 = np.random.choice(train['subject_id'].unique(), int(0.1*len(train['subject_id'].unique())), replace=False)
train_10 = data[data['subject_id'].isin(train_subjects_10)]

# print the train subjects
print('The train subjects are {}'.format(train_10['subject_id'].unique()))
#print shape of train_10
print('The shape of train_10 is {}'.format(train_10.shape))

The train subjects are [14]
The shape of train_10 is (222116, 6)


## Windowing

In [37]:
def sliding_window_samples(data, samples_per_window, overlap_ratio):
    """
    Return a sliding window measured in number of samples over a data array along with the mode label for each window.

    :param data: input array, can be numpy or pandas dataframe
    :param samples_per_window: window length as number of samples
    :param overlap_ratio: overlap is meant as percentage and should be an integer value
    :return: tuple of windows, indices, and labels
    """
    windows = []
    indices = []
    labels = []
    curr = 0
    win_len = int(samples_per_window)
    if overlap_ratio is not None:
        overlapping_elements = int((overlap_ratio / 100) * win_len)
        if overlapping_elements >= win_len:
            print('Number of overlapping elements exceeds window size.')
            return
    while curr < len(data) - win_len:
        window = data[curr:curr + win_len]
        windows.append(window.iloc[:, :-2])  # Exclude the last two columns (original and encoded labels)
        indices.append([curr, curr + win_len])
        
        # Extract and compute the mode of the encoded labels for the current window
        window_labels = window['encoded_activity']
        mode_result = mode(window_labels)
        window_label = mode_result[0] if mode_result[0].size > 0 else mode_result
        labels.append(window_label)

        curr += win_len - overlapping_elements

    result_windows = np.array(windows)
    result_indices = np.array(indices)
    result_labels = np.array(labels)
    return result_windows, result_indices, result_labels

In [38]:
sampling_rate = 50
time_window = 8
window_size = sampling_rate * time_window
overlap_ratio = 50

train_window_data, _, train_window_label = sliding_window_samples(train, window_size, overlap_ratio)
print(f"shape of train window dataset ({time_window} sec with {overlap_ratio}% overlap): {train_window_data.shape}")

test_window_data, _, test_window_label = sliding_window_samples(test, window_size, overlap_ratio)
print(f"shape of test window dataset ({time_window} sec with {overlap_ratio}% overlap): {test_window_data.shape}")

shape of train window dataset (8 sec with 50% overlap): (11002, 400, 4)
shape of test window dataset (8 sec with 50% overlap): (4999, 400, 4)


In [17]:
train_window_data[0]

array([[  0.        , -26.06293019,  -5.37906046,   3.46762449],
       [  0.        , -26.03897518,  -5.29554092,   3.5996475 ],
       [  0.        , -26.00493649,  -5.26486027,   3.64859342],
       ...,
       [  0.        , -26.13381695,  -4.23423555,   3.95580135],
       [  0.        , -26.16785564,  -3.89187469,   3.90532757],
       [  0.        , -26.12014629,  -4.00375785,   3.63495186]])

In [18]:
test_window_data[0]

array([[  1.        , -25.5075753 ,  -4.43600342,   6.51132932],
       [  1.        , -25.69123874,  -4.65707985,   6.63361226],
       [  1.        , -25.81781149,  -4.74408825,   6.7427993 ],
       ...,
       [  1.        , -24.91434391,  -4.57406633,   8.66257602],
       [  1.        , -24.89106616,  -4.40130126,   8.7713811 ],
       [  1.        , -24.84172637,  -4.05377369,   8.8242285 ]])

In [19]:
# remove the subject column
train_window_data = train_window_data[:, :, 1:]
test_window_data = test_window_data[:, :, 1:]

In [20]:
# print the shape of train and test
print('The shape of train is {}'.format(train_window_data.shape))
print('The shape of test is {}'.format(test_window_data.shape))

The shape of train is (11002, 400, 3)
The shape of test is (4999, 400, 3)


In [21]:
# length of train and test label
print('The length of train label is {}'.format(len(train_window_label)))
print('The length of test label is {}'.format(len(test_window_label)))

The length of train label is 11002
The length of test label is 4999


### 75% Data

In [36]:
sampling_rate = 50
time_window = 8
window_size = sampling_rate * time_window
overlap_ratio = 50

train_window_data_75, _, train_window_label_75 = sliding_window_samples(train_75, window_size, overlap_ratio)
print(f"shape of train window dataset ({time_window} sec with {overlap_ratio}% overlap): {train_window_data_75.shape}")

NameError: name 'sliding_window_samples' is not defined

In [None]:
# remove the subject column
train_window_data_75 = train_window_data_75[:, :, 1:]

In [None]:
print('The shape of train_75 is {}'.format(train_window_data_75.shape))
print('The length of train_75 label is {}'.format(len(train_window_label_75)))

### 50%

In [None]:
train_window_data_50, _, train_window_label_50 = sliding_window_samples(train_50, window_size, overlap_ratio)
print(f"shape of train window dataset ({time_window} sec with {overlap_ratio}% overlap): {train_window_data_50.shape}")

# remove the subject column
train_window_data_50 = train_window_data_50[:, :, 1:]

print('The shape of train_50 is {}'.format(train_window_data_50.shape))
print('The length of train_50 label is {}'.format(len(train_window_label_50)))

### 25%

In [None]:
train_window_data_25, _, train_window_label_25 = sliding_window_samples(train_25, window_size, overlap_ratio)
# remove the subject column
train_window_data_25 = train_window_data_25[:, :, 1:]

print('The shape of train_25 is {}'.format(train_window_data_25.shape))
print('The length of train_25 label is {}'.format(len(train_window_label_25)))

### 10%

In [None]:
train_window_data_10, _, train_window_label_10 = sliding_window_samples(train_10, window_size, overlap_ratio)
# remove the subject column
train_window_data_10 = train_window_data_10[:, :, 1:]

print('The shape of train_10 is {}'.format(train_window_data_10.shape))
print('The length of train_10 label is {}'.format(len(train_window_label_10)))

## Generate Subset of Training Data

In [22]:
# Calculate the sample size
sample_size = int(0.75 * len(train_window_data))

# Generate random indices
indices = random.sample(range(len(train_window_data)), sample_size)

# Sample the data and labels
sampled_train_window_data_75 = [train_window_data[i] for i in indices]
sampled_train_window_label_75 = [train_window_label[i] for i in indices]

# print the shape of sampled train data and label
print('The shape of sampled train label is {}'.format(np.array(sampled_train_window_label_75).shape))
print('The shape of sampled train is {}'.format(np.array(sampled_train_window_data_75).shape))



The shape of sampled train label is (8251,)
The shape of sampled train is (8251, 400, 3)


In [23]:
# Calculate the sample size
sample_size = int(0.5 * len(train_window_data))

# Generate random indices
indices = random.sample(range(len(train_window_data)), sample_size)

# Sample the data and labels
sampled_train_window_data_50 = [train_window_data[i] for i in indices]
sampled_train_window_label_50 = [train_window_label[i] for i in indices]

# print the shape of sampled train data and label
print('The shape of sampled train label is {}'.format(np.array(sampled_train_window_label_50).shape))
print('The shape of sampled train is {}'.format(np.array(sampled_train_window_data_50).shape))

The shape of sampled train label is (5501,)
The shape of sampled train is (5501, 400, 3)


In [24]:
# Calculate the sample size
sample_size = int(0.25 * len(train_window_data))

# Generate random indices
indices = random.sample(range(len(train_window_data)), sample_size)

# Sample the data and labels
sampled_train_window_data_25 = [train_window_data[i] for i in indices]
sampled_train_window_label_25 = [train_window_label[i] for i in indices]

# print the shape of sampled train data and label
print('The shape of sampled train label is {}'.format(np.array(sampled_train_window_label_25).shape))
print('The shape of sampled train is {}'.format(np.array(sampled_train_window_data_25).shape))


The shape of sampled train label is (2750,)
The shape of sampled train is (2750, 400, 3)


In [25]:
# Calculate the sample size
sample_size = int(0.1 * len(train_window_data))

# Generate random indices
indices = random.sample(range(len(train_window_data)), sample_size)

# Sample the data and labels
sampled_train_window_data_10 = [train_window_data[i] for i in indices]
sampled_train_window_label_10 = [train_window_label[i] for i in indices]

# print the shape of sampled train data and label
print('The shape of sampled train label is {}'.format(np.array(sampled_train_window_label_10).shape))
print('The shape of sampled train is {}'.format(np.array(sampled_train_window_data_10).shape))

The shape of sampled train label is (1100,)
The shape of sampled train is (1100, 400, 3)


## Generate dataloader

In [26]:
# generate dataloader for train and test
def generate_dataloader(data, label, batch_size, is_shuffle=True):
    """
    Generate dataloader for train and test

    :param data: input data
    :param label: input label
    :param batch_size: batch size
    :return: train and test dataloader
    """
    # Check if data and label are lists, and convert them to NumPy arrays if they are
    if isinstance(data, list):
        data = np.array(data)
    if isinstance(label, list):
        label = np.array(label)
    
    # Convert data and label to tensor
    data_tensor = torch.from_numpy(data).float()  # Ensure data is converted to float for PyTorch
    label_tensor = torch.from_numpy(label).long()  # Labels typically converted to long for classification tasks
    
    # Generate dataloader
    dataset = TensorDataset(data_tensor, label_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=is_shuffle)
    
    return dataloader


In [27]:
# generate dataloader for train and test
train_dataloader = generate_dataloader(train_window_data, train_window_label, batch_size)   
test_dataloader = generate_dataloader(test_window_data, test_window_label, batch_size, is_shuffle=False)

# generate dataloader for train sampled data and label
train_dataloader_75 = generate_dataloader(sampled_train_window_data_75, sampled_train_window_label_75, batch_size)
train_dataloader_50 = generate_dataloader(sampled_train_window_data_50, sampled_train_window_label_50, batch_size)
train_dataloader_25 = generate_dataloader(sampled_train_window_data_25, sampled_train_window_label_25, batch_size)
train_dataloader_10 = generate_dataloader(sampled_train_window_data_10, sampled_train_window_label_10, batch_size)

# Full Supervised

In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNFeatureExtractor(nn.Module):
    def __init__(self, num_classes=num_classes):
        super(CNNFeatureExtractor, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(12800, 128)  # Adjust the input features according to your final conv layer output
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Train and Test Functions

In [29]:
# create training function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs = inputs.transpose(1, 2)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
    return running_loss / len(train_loader)

In [30]:
def test(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for i, data in enumerate(test_loader):
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs = inputs.transpose(1, 2)  # Assuming this is necessary for your model
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            # Collect all true labels and predictions for F1 score calculation
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    # Calculate accuracy
    accuracy = correct / len(test_loader.dataset)
    
    # Calculate F1 score. You might need to adjust the 'average' parameter based on your task
    # For binary classification, you can use 'binary'. For multi-class, consider 'macro' or 'weighted'
    f1 = f1_score(all_labels, all_predictions, average='weighted')

    return running_loss / len(test_loader), accuracy, f1


In [31]:
# create function to train and test model
def train_and_test(model, train_loader, test_loader, criterion, optimizer, device, num_epochs):
    train_losses = []
    test_losses = []
    test_accuracies = []
    test_f1_scores = []  # List to store F1-scores for each epoch

    for epoch in range(num_epochs):
        # Training phase
        train_loss = train(model, train_loader, criterion, optimizer, device)
        train_losses.append(train_loss)

        # Testing phase
        test_loss, test_accuracy, test_f1 = test(model, test_loader, criterion, device)  # Modified to receive F1-score
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)
        test_f1_scores.append(test_f1)  # Store the F1-score

        # Print epoch summary
        print(f"Epoch: {epoch + 1}/{num_epochs}.. Train Loss: {train_loss:.3f}.. "
              f"Test Loss: {test_loss:.3f}.. Test Accuracy: {test_accuracy:.3f}.. Test F1 Score: {test_f1:.3f}")

    return train_losses, test_losses, test_accuracies, test_f1_scores

## Train and Test CNN Model

### Full Train Data

In [37]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 30
train_losses, test_losses, test_accuracies, test_f1_scores = train_and_test(model, train_dataloader, test_dataloader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/30.. Train Loss: 0.855.. Test Loss: 1.019.. Test Accuracy: 0.677.. Test F1 Score: 0.668
Epoch: 2/30.. Train Loss: 0.525.. Test Loss: 1.008.. Test Accuracy: 0.682.. Test F1 Score: 0.691
Epoch: 3/30.. Train Loss: 0.442.. Test Loss: 1.080.. Test Accuracy: 0.705.. Test F1 Score: 0.714
Epoch: 4/30.. Train Loss: 0.345.. Test Loss: 1.228.. Test Accuracy: 0.722.. Test F1 Score: 0.713
Epoch: 5/30.. Train Loss: 0.290.. Test Loss: 1.189.. Test Accuracy: 0.734.. Test F1 Score: 0.736
Epoch: 6/30.. Train Loss: 0.257.. Test Loss: 1.392.. Test Accuracy: 0.747.. Test F1 Score: 0.753
Epoch: 7/30.. Train Loss: 0.240.. Test Loss: 1.307.. Test Accuracy: 0.732.. Test F1 Score: 0.738
Epoch: 8/30.. Train Loss: 0.232.. Test Loss: 1.524.. Test Accuracy: 0.682.. Test F1 Score: 0.694
Epoch: 9/30.. Train Loss: 0.193.. Test Loss: 1.395.. Test Accuracy: 0.722.. Test F1 Score: 0.725
Epoch: 10/30.. Train Loss: 0.175.. Test Loss: 1.560.. Test Accuracy: 0.688.. Test F1 Score: 0.686
Epoch: 11/30.. Train Loss: 0.

### 75% Train Data

In [36]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 30
train_losses, test_losses, test_accuracies, test_f1_scores = train_and_test(model, train_dataloader_75, test_dataloader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/30.. Train Loss: 0.811.. Test Loss: 0.997.. Test Accuracy: 0.652.. Test F1 Score: 0.652
Epoch: 2/30.. Train Loss: 0.506.. Test Loss: 1.295.. Test Accuracy: 0.670.. Test F1 Score: 0.659
Epoch: 3/30.. Train Loss: 0.390.. Test Loss: 1.314.. Test Accuracy: 0.635.. Test F1 Score: 0.642
Epoch: 4/30.. Train Loss: 0.327.. Test Loss: 1.193.. Test Accuracy: 0.674.. Test F1 Score: 0.682
Epoch: 5/30.. Train Loss: 0.311.. Test Loss: 1.013.. Test Accuracy: 0.722.. Test F1 Score: 0.727
Epoch: 6/30.. Train Loss: 0.244.. Test Loss: 1.272.. Test Accuracy: 0.648.. Test F1 Score: 0.653
Epoch: 7/30.. Train Loss: 0.224.. Test Loss: 1.226.. Test Accuracy: 0.653.. Test F1 Score: 0.657
Epoch: 8/30.. Train Loss: 0.181.. Test Loss: 1.507.. Test Accuracy: 0.645.. Test F1 Score: 0.651
Epoch: 9/30.. Train Loss: 0.163.. Test Loss: 1.869.. Test Accuracy: 0.631.. Test F1 Score: 0.633
Epoch: 10/30.. Train Loss: 0.154.. Test Loss: 1.895.. Test Accuracy: 0.668.. Test F1 Score: 0.677
Epoch: 11/30.. Train Loss: 0.

### 50% Train Data

In [38]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 30
train_losses, test_losses, test_accuracies, test_f1_scores = train_and_test(model, train_dataloader_50, test_dataloader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/30.. Train Loss: 0.981.. Test Loss: 1.078.. Test Accuracy: 0.632.. Test F1 Score: 0.642
Epoch: 2/30.. Train Loss: 0.608.. Test Loss: 1.226.. Test Accuracy: 0.599.. Test F1 Score: 0.634
Epoch: 3/30.. Train Loss: 0.485.. Test Loss: 1.133.. Test Accuracy: 0.661.. Test F1 Score: 0.669
Epoch: 4/30.. Train Loss: 0.421.. Test Loss: 1.043.. Test Accuracy: 0.718.. Test F1 Score: 0.723
Epoch: 5/30.. Train Loss: 0.361.. Test Loss: 1.227.. Test Accuracy: 0.718.. Test F1 Score: 0.718
Epoch: 6/30.. Train Loss: 0.270.. Test Loss: 1.500.. Test Accuracy: 0.712.. Test F1 Score: 0.714
Epoch: 7/30.. Train Loss: 0.251.. Test Loss: 1.743.. Test Accuracy: 0.644.. Test F1 Score: 0.660
Epoch: 8/30.. Train Loss: 0.211.. Test Loss: 1.566.. Test Accuracy: 0.647.. Test F1 Score: 0.665
Epoch: 9/30.. Train Loss: 0.185.. Test Loss: 1.822.. Test Accuracy: 0.659.. Test F1 Score: 0.664
Epoch: 10/30.. Train Loss: 0.214.. Test Loss: 2.041.. Test Accuracy: 0.640.. Test F1 Score: 0.653
Epoch: 11/30.. Train Loss: 0.

### 25% Train Data

In [39]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 30
train_losses, test_losses, test_accuracies, test_f1_scores = train_and_test(model, train_dataloader_25, test_dataloader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/30.. Train Loss: 1.173.. Test Loss: 0.985.. Test Accuracy: 0.686.. Test F1 Score: 0.692
Epoch: 2/30.. Train Loss: 0.727.. Test Loss: 1.049.. Test Accuracy: 0.718.. Test F1 Score: 0.713
Epoch: 3/30.. Train Loss: 0.588.. Test Loss: 0.997.. Test Accuracy: 0.672.. Test F1 Score: 0.652
Epoch: 4/30.. Train Loss: 0.500.. Test Loss: 1.046.. Test Accuracy: 0.661.. Test F1 Score: 0.644
Epoch: 5/30.. Train Loss: 0.468.. Test Loss: 1.267.. Test Accuracy: 0.638.. Test F1 Score: 0.653
Epoch: 6/30.. Train Loss: 0.456.. Test Loss: 1.136.. Test Accuracy: 0.623.. Test F1 Score: 0.632
Epoch: 7/30.. Train Loss: 0.368.. Test Loss: 1.392.. Test Accuracy: 0.640.. Test F1 Score: 0.660
Epoch: 8/30.. Train Loss: 0.317.. Test Loss: 1.467.. Test Accuracy: 0.665.. Test F1 Score: 0.678
Epoch: 9/30.. Train Loss: 0.328.. Test Loss: 1.890.. Test Accuracy: 0.564.. Test F1 Score: 0.552
Epoch: 10/30.. Train Loss: 0.461.. Test Loss: 1.201.. Test Accuracy: 0.719.. Test F1 Score: 0.723
Epoch: 11/30.. Train Loss: 0.

### 10% Train Data

In [40]:
model = CNNFeatureExtractor(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train and test model
num_epochs = 30
train_losses, test_losses, test_accuracies, test_f1_scores = train_and_test(model, train_dataloader_10, test_dataloader, criterion, optimizer, device, num_epochs=num_epochs)

Epoch: 1/30.. Train Loss: 1.656.. Test Loss: 1.417.. Test Accuracy: 0.450.. Test F1 Score: 0.455
Epoch: 2/30.. Train Loss: 0.908.. Test Loss: 1.216.. Test Accuracy: 0.551.. Test F1 Score: 0.548
Epoch: 3/30.. Train Loss: 0.706.. Test Loss: 1.172.. Test Accuracy: 0.643.. Test F1 Score: 0.631
Epoch: 4/30.. Train Loss: 0.572.. Test Loss: 1.328.. Test Accuracy: 0.618.. Test F1 Score: 0.627
Epoch: 5/30.. Train Loss: 0.478.. Test Loss: 1.467.. Test Accuracy: 0.625.. Test F1 Score: 0.638
Epoch: 6/30.. Train Loss: 0.417.. Test Loss: 1.507.. Test Accuracy: 0.626.. Test F1 Score: 0.635
Epoch: 7/30.. Train Loss: 0.437.. Test Loss: 1.384.. Test Accuracy: 0.604.. Test F1 Score: 0.613
Epoch: 8/30.. Train Loss: 0.389.. Test Loss: 1.651.. Test Accuracy: 0.601.. Test F1 Score: 0.609
Epoch: 9/30.. Train Loss: 0.335.. Test Loss: 2.084.. Test Accuracy: 0.592.. Test F1 Score: 0.607
Epoch: 10/30.. Train Loss: 0.294.. Test Loss: 1.911.. Test Accuracy: 0.581.. Test F1 Score: 0.597
Epoch: 11/30.. Train Loss: 0.

In [33]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F

# class SupervisedTPN(nn.Module):
#     def __init__(self, num_classes):
#         super(SupervisedTPN, self).__init__()
#         self.trunk = nn.Sequential(
#             nn.Conv1d(in_channels=3, out_channels=32, kernel_size=24, stride=1),
#             nn.ReLU(),
#             nn.Dropout(0.1),
#             nn.Conv1d(in_channels=32, out_channels=64, kernel_size=16, stride=1),
#             nn.ReLU(),
#             nn.Dropout(0.1),
#             nn.Conv1d(in_channels=64, out_channels=96, kernel_size=8, stride=1),
#             nn.ReLU(),
#             nn.Dropout(0.1),
#             nn.AdaptiveMaxPool1d(output_size=1)
#         )

#         self.head = nn.Sequential(
#             nn.Linear(96, 1024),  # Adjusted to match the document's description
#             nn.ReLU(),
#             nn.Linear(1024, num_classes)  # Softmax applied externally during training
#         )
#         # No softmax here as it's included in nn.CrossEntropyLoss during training

#     def forward(self, x):
#         x = self.trunk(x)
#         x = x.view(x.size(0), -1)  # Flatten the output for the fully-connected layer
#         output = self.head(x)
#         return output


In [34]:
# import torch.optim as optim

# model = SupervisedTPN(num_classes=10)  # Example for 10 classes
# optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=0.0001)
# criterion = nn.CrossEntropyLoss()

# for epoch in range(num_epochs):
#     model.train()
#     total_loss = 0

#     for data, labels in train_loader:  # Assuming a single DataLoader for supervised learning
#         optimizer.zero_grad()
#         data = data.transpose(1, 2)  # Transpose to match input shape
#         outputs = model(data)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         total_loss += loss.item()

#     avg_loss = total_loss / len(train_loader)
#     print(f"Epoch {epoch + 1}, Average Training Loss: {avg_loss}")
