# Objective

This notebook implements a deep learning convolutional neural network with transfer learning from models Harnet10 and Harnet30. 

Author: Kate Lassiter

Reference: [source](https://github.com/OxWearables/Oxford_Wearables_Activity_Recognition/blob/master/6_self_supervised_learning/ssl_tutorial.ipynb)

In [477]:
import sys
sys.path.append("../")
import joblib
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import LabelEncoder
from utils.data import NormalDataset, resize, get_inverse_class_weights
from utils.utils import EarlyStopping
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [643]:
# Data Tranformations
csv_file_path = "/XXXX.csv"
hip_data = pd.read_csv(csv_file_path)

num_rows = hip_data.shape[0] # Add a 'group' field, dividing the dataset into 10 equal parts
group_size = num_rows // 10  # Calculate the number of rows per group
# Create an array with group labels (1 to 10) repeated for each group size
group_labels = np.repeat(np.arange(1, 11), group_size)

# If remaining rows due to integer division assign them to last group
if len(group_labels) < num_rows:
    group_labels = np.concatenate([group_labels, np.full(num_rows - len(group_labels), 10)])
hip_data['group'] = group_labels # Assign the group labels 

X = hip_data[['x', 'y', 'z']].values
y = hip_data['annotation'].values
groups = hip_data['group'].values

# Reshape the data into windows of size X
def create_windows(data, labels, groups, window_size):
    num_windows = data.shape[0] // window_size
    X_windows = data[:num_windows * window_size].reshape(num_windows, window_size, -1)
    y_windows = labels[window_size-1:num_windows * window_size:window_size]  # One label per window
    group_windows = groups[window_size-1:num_windows * window_size:window_size]  # One group per window
    return X_windows, y_windows, group_windows

window_size = 1000
X_windows, y_windows, group_windows = create_windows(X, y, groups, window_size)

# Split the data into train, validation, and test sets
x_train, x_temp, y_train, y_temp, group_train, group_temp = train_test_split(
    X_windows, y_windows, group_windows, test_size=0.3, random_state=42
)
x_val, x_test, y_val, y_test, group_val, group_test = train_test_split(
    x_temp, y_temp, group_temp, test_size=0.5, random_state=42
)
# Output shapes:
print(((x_train.shape, y_train.shape, group_train.shape),
       (x_val.shape, y_val.shape, group_val.shape),
       (x_test.shape, y_test.shape, group_test.shape)))

(((251, 1000, 3), (251,), (251,)), ((54, 1000, 3), (54,), (54,)), ((54, 1000, 3), (54,), (54,)))


In [645]:
# construct dataloaders
train_dataset = NormalDataset(x_train, y_train, group_train, name="training", transform=True)
val_dataset = NormalDataset(x_val, y_val, group_val, name="validation")
test_dataset = NormalDataset(x_test, y_test, group_test, name="test")
train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=2,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded
training set sample count : 251
validation set sample count : 54
test set sample count : 54


In [615]:
for batch in test_loader:
    print(batch)
    print(len(batch[0]),len(batch[1]),len(batch[2]))
    break

[tensor([[[ 0.8423,  0.8298,  0.8320,  ...,  0.7434,  0.7612,  0.7713],
         [ 0.3924,  0.3924,  0.3924,  ...,  0.3836,  0.3744,  0.3764],
         [-0.3874, -0.3874, -0.3874,  ..., -0.6145, -0.6145, -0.6145]],

        [[ 0.7674,  0.7674,  0.7674,  ...,  0.7813,  0.7840,  0.7835],
         [ 0.0389,  0.0389,  0.0389,  ...,  0.0389,  0.0389,  0.0389],
         [-0.6287, -0.6286, -0.6295,  ..., -0.6294, -0.6274, -0.6440]],

        [[ 0.7351,  0.7351,  0.7348,  ...,  0.7513,  0.7513,  0.7513],
         [ 0.2319,  0.2319,  0.2319,  ...,  0.2819,  0.2798,  0.2801],
         [-0.6260, -0.6260, -0.6260,  ..., -0.5935, -0.5954, -0.5807]],

        ...,

        [[ 0.7511,  0.7511,  0.7511,  ...,  0.7532,  0.7431,  0.7326],
         [ 0.3674,  0.3583,  0.3621,  ...,  0.3281,  0.3262,  0.3352],
         [-0.5334, -0.5313, -0.5408,  ..., -0.5821, -0.5801, -0.5892]],

        [[ 0.8443,  0.8675,  0.9617,  ...,  1.0334,  1.0448,  1.0412],
         [-0.2197, -0.2229, -0.2423,  ..., -0.1639, -0

What follows directly below is from [source]('https://github.com/OxWearables/Oxford_Wearables_Activity_Recognition/blob/master/6_self_supervised_learning/ssl_tutorial.ipynb')

In [653]:
def train(model, train_loader, val_loader, device, weights=None):
    """
    Iterate over the training dataloader and train a pytorch model.
    After each epoch, validate model and early stop when validation loss function bottoms out.

    Trained model weights will be saved to disk (state_dict.pt).

    :param nn.Module model: pytorch model
    :param train_loader: training data loader
    :param val_loader: validation data loader
    :param str device: pytorch map device.
    :param weights: training class weights (to enable weighted loss function)
    """

    state_dict = 'state_dict.pt'

    # REDUCE THIS IF YOU WANT TO SPEED UP THINGS, E.G. 2
    num_epoch = 5

    optimizer = torch.optim.Adam(
        model.parameters(), lr=0.0001, amsgrad=True
    )

    if weights:
        weights = torch.FloatTensor(weights).to(device)
        loss_fn = nn.CrossEntropyLoss(weight=weights)
    else:
        loss_fn = nn.CrossEntropyLoss()

    early_stopping = EarlyStopping(
        patience=5, path=state_dict, verbose=True
    )

    for epoch in range(num_epoch):
        model.train()
        train_losses = []
        train_acces = []
        for i, (x, y, _) in enumerate(tqdm(train_loader)):
            x.requires_grad_(True)
            x = x.to(device, dtype=torch.float)
            true_y = y.to(device, dtype=torch.long)
            optimizer.zero_grad()
            logits = model(x)
            loss = loss_fn(logits, true_y)
            loss.backward()
            optimizer.step()

            pred_y = torch.argmax(logits, dim=1)
            train_acc = torch.sum(pred_y == true_y)
            train_acc = train_acc / (pred_y.size()[0])

            train_losses.append(loss.cpu().detach())
            train_acces.append(train_acc.cpu().detach())

        val_loss, val_acc = _validate_model(model, val_loader, device, loss_fn)

        epoch_len = len(str(num_epoch))
        print_msg = (
            f"[{epoch:>{epoch_len}}/{num_epoch:>{epoch_len}}] | "
            + f"train_loss: {np.mean(train_losses):.3f} | "
            + f"train_acc: {np.mean(train_acces):.3f} | "
            + f"val_loss: {val_loss:.3f} | "
            + f"val_acc: {val_acc:.2f}"
        )

        early_stopping(val_loss, model)
        print(print_msg)

        if early_stopping.early_stop:
            print('Early stopping')
            print(f'SSLNet weights saved to {state_dict}')
            break


def _validate_model(model, val_loader, device, loss_fn):
    """ Iterate over a validation data loader and return mean model loss and accuracy. """
    model.eval()
    losses = []
    acces = []
    for i, (x, y, _) in enumerate(val_loader):
        with torch.inference_mode():
            x = x.to(device, dtype=torch.float)
            true_y = y.to(device, dtype=torch.long)

            logits = model(x)
            loss = loss_fn(logits, true_y)

            pred_y = torch.argmax(logits, dim=1)

            val_acc = torch.sum(pred_y == true_y)
            val_acc = val_acc / (list(pred_y.size())[0])

            losses.append(loss.cpu().detach())
            acces.append(val_acc.cpu().detach())
    losses = np.array(losses)
    acces = np.array(acces)
    return np.mean(losses), np.mean(acces)


def predict(model, data_loader, device):
    """
    Iterate over the dataloader and do inference with a pytorch model.

    :param nn.Module model: pytorch Module
    :param data_loader: pytorch dataloader
    :param str device: pytorch map device
    :return: true labels, model predictions, pids
    :rtype: (np.ndarray, np.ndarray, np.ndarray)
    """

    from tqdm import tqdm

    predictions_list = []
    true_list = []
    pid_list = []
    model.eval()

    for i, (x, y, pid) in enumerate(tqdm(data_loader)):
        with torch.inference_mode():
            x = x.to(device, dtype=torch.float)
            logits = model(x)
            true_list.append(y)
            pred_y = torch.argmax(logits, dim=1)
            predictions_list.append(pred_y.cpu())
            pid_list.extend(pid)
    true_list = torch.cat(true_list)
    predictions_list = torch.cat(predictions_list)

    return (
        torch.flatten(true_list).numpy(),
        torch.flatten(predictions_list).numpy(),
        np.array(pid_list),
    )

In [543]:
# Load the pretrained model
os.environ['GITHUB_TOKEN'] = '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'
repo = 'OxWearables/ssl-wearables'
sslnet: nn.Module = torch.hub.load(repo, 'harnet30', trust_repo=True, class_num=2, pretrained=True, weights_only=False)
sslnet.to(device)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded


Resnet(
  (feature_extractor): Sequential(
    (layer1): Sequential(
      (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
      (1): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (relu): ReLU(inplace=True)
      )
      (2): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(6

In [655]:
# Train the model. The trained weights will be saved in the file 'state_dict.pt'
device="cpu"
train(sslnet, train_loader, val_loader, device, get_inverse_class_weights(y_train))
#

Inverse class weights: 
[1.004, 251.0]


  0%|                                                     | 0/2 [00:00<?, ?it/s]python(52872) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(52873) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:11<00:11, 11.44s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:21<00:00, 10.77s/it]


Validation loss decreased (inf --> 0.000164). Saving model ...
[0/5] | train_loss: 0.153 | train_acc: 0.957 | val_loss: 0.000 | val_acc: 1.00


  0%|                                                     | 0/2 [00:00<?, ?it/s]python(52889) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(52890) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:11<00:11, 11.68s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:21<00:00, 10.67s/it]


Validation loss decreased (0.000164 --> 0.000005). Saving model ...
[1/5] | train_loss: 16.323 | train_acc: 0.996 | val_loss: 0.000 | val_acc: 1.00


  0%|                                                     | 0/2 [00:00<?, ?it/s]python(52915) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(52916) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:10<00:10, 10.99s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:22<00:00, 11.03s/it]


Validation loss decreased (0.000005 --> 0.000000). Saving model ...
[2/5] | train_loss: 2.460 | train_acc: 0.996 | val_loss: 0.000 | val_acc: 1.00


  0%|                                                     | 0/2 [00:00<?, ?it/s]python(52934) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(52935) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:16<00:16, 16.94s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:34<00:00, 17.15s/it]


Validation loss decreased (0.000000 --> 0.000000). Saving model ...
[3/5] | train_loss: 0.000 | train_acc: 1.000 | val_loss: 0.000 | val_acc: 1.00


  0%|                                                     | 0/2 [00:00<?, ?it/s]python(52957) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(52958) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:18<00:18, 18.21s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:33<00:00, 16.82s/it]


Validation loss decreased (0.000000 --> 0.000000). Saving model ...
[4/5] | train_loss: 0.041 | train_acc: 0.988 | val_loss: 0.000 | val_acc: 1.00


In [651]:
# helper function to calculate classification performance scores: precision, recall, F1 and Kappa
def classification_scores(y_test, y_test_pred):
    import sklearn.metrics as metrics

    cohen_kappa = metrics.cohen_kappa_score(y_test, y_test_pred)
    precision = metrics.precision_score(
        y_test, y_test_pred, average="macro", zero_division=0
    )
    recall = metrics.recall_score(
        y_test, y_test_pred, average="macro", zero_division=0
    )
    f1 = metrics.f1_score(
        y_test, y_test_pred, average="macro", zero_division=0
    )

    data = {
        "precision": precision,
        "recall": recall,
        "f1": f1,
        "kappa": cohen_kappa,
    }

    df = pd.DataFrame(data, index=[0])  # use a dataframe because this prints nicely later

    return df

In [657]:
# load fine tuned weights (best weights prior to early-stopping) and do inference on the test set
model_dict = torch.load('state_dict.pt', map_location=device)
sslnet.load_state_dict(model_dict)

y_test, y_test_pred, pid_test = predict(sslnet, test_loader, device)

  model_dict = torch.load('state_dict.pt', map_location=device)
100%|█████████████████████████████████████████████| 1/1 [00:03<00:00,  3.26s/it]


In [659]:
scores = classification_scores(y_test, y_test_pred)
print(scores.round(3))

   precision  recall   f1  kappa
0        1.0     1.0  1.0    NaN


  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)


## Harnet30 is offerfitting, exploring Harnet 10
My code again below

In [19]:
os.getcwd()

'/Users/kat/Oxford_Wearables_Activity_Recognition/6_self_supervised_learning'

In [10]:
!pip install pynvml



In [3]:
!pip install torch torchvision torchaudio     

Collecting torch
  Downloading torch-2.5.0-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.5.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.4 kB)
Collecting sympy==1.13.1 (from torch)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.5.0-cp312-none-macosx_11_0_arm64.whl (64.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.3/64.3 MB[0m [31m52.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached sympy-1.13.1-py3-none-any.whl (6.2 MB)
Downloading torchvision-0.20.0-cp312-cp312-macosx_11_0_arm64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m47.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading torchaudio-2.5.0-cp312-cp312-macosx_11_0_arm64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [7]:
!pip install transforms3d

Collecting transforms3d
  Downloading transforms3d-0.4.2-py3-none-any.whl.metadata (2.8 kB)
Downloading transforms3d-0.4.2-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: transforms3d
Successfully installed transforms3d-0.4.2


In [255]:
# Tring harnet10 on subset
repo='OxWearables/ssl-wearables'
harnet10= torch.hub.load(repo, 'harnet10', class_num=2, pretrained=True)
float_tensor = torch.tensor(data[['x','y','z']][0:300].values.T, dtype=torch.float)
x=float_tensor.unsqueeze(0)
x = torch.FloatTensor(x)
harnet10(x)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded


tensor([[21.1082, 10.9276]], grad_fn=<AddmmBackward0>)

In [259]:
# Get time chnage metrics
def time_params(df):
    df['timestamp']=pd.to_datetime(df['timestamp'])
    initial_time=pd.Timedelta(seconds=0)
    time_chg=(df['timestamp']-df['timestamp'].shift(1)).fillna(initial_time)
    time_chg=time_chg.dt.total_seconds() 
    time_avg=time_chg.mean()  
    samp_freq_= 1.0/time_avg  
    time_= np.cumsum(time_chg) 
    return df, samp_freq_,time_,time_avg

In [261]:
# Store inputs as numpy files
data=pd.read_csv("/XXXX.csv")
np.save("X.npy", data[['x','y','z']].values)
np.save("Y.npy", data['annotation'].values)
time=time_params(data)[2].values
np.save("T.npy", time)
reps=round(len(data)/10)
pids=[]
for x in range(1,11):
    pids+=[x]*reps   
pids=pids[:len(data)]
np.save("pid.npy", pids)

## Custom Data Windowing 

In [403]:
# Windowing Data
np.random.seed(42) 
features = data[['x', 'y', 'z']].values
labels= data['annotation'].values
patient_ids = data['patient_id'].values

window_size =10
num_samples = len(features)//window_size
reshaped_features= features[:num_samples*window_size].reshape(num_samples, window_size, 3)
reshaped_labels = labels[:num_samples*window_size].reshape(num_samples, window_size)
reshaped_patient_ids = patient_ids[:num_samples*window_size].reshape(num_samples, window_size)
final_labels= stats.mode(reshaped_labels, axis=1)[0].reshape(-1)
final_patient_ids = reshaped_patient_ids[:,0]
print("Reshaped features shape:", reshaped_features.shape)
print("Final labels shape:", final_labels.shape)
print("Final patient IDs shape:", final_patient_ids.shape)

Reshaped features shape: (35999, 10, 3)
Final labels shape: (35999,)
Final patient IDs shape: (35999,)


In [505]:
#Tring GroupShuffleSplit
reshaped_patient_ids=patient_ids[:num_samples*window_size].reshape(num_samples, window_size)
final_patient_ids = reshaped_patient_ids[:,0]

splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, train_size=0.6, random_state=42)
splitter_val = GroupShuffleSplit(n_splits=1, test_size=0.5, random_state=42)  # To split remaining data into val and test
train_idx, temp_idx = next(splitter.split(reshaped_features, final_labels, groups=final_patient_ids))
val_idx, test_idx = next(splitter_val.split(reshaped_features[temp_idx], final_labels[temp_idx], groups=final_patient_ids[temp_idx]))

x_train, y_train, group_train = reshaped_features[train_idx], final_labels[train_idx], final_patient_ids[train_idx]
x_val, y_val, group_val = reshaped_features[val_idx], final_labels[val_idx], final_patient_ids[val_idx]
x_test, y_test, group_test = reshaped_features[test_idx], final_labels[test_idx], final_patient_ids[test_idx]

(x_train.shape, y_train.shape, group_train.shape), (x_val.shape, y_val.shape, group_val.shape), (x_test.shape, y_test.shape, group_train.shape)

(((21635, 10, 3), (21635,), (21635,)),
 ((3593, 10, 3), (3593,), (3593,)),
 ((3572, 10, 3), (3572,), (21635,)))

In [507]:
# construct dataloaders
train_dataset = NormalDataset(x_train, y_train, group_train, name="training", transform=True)
val_dataset = NormalDataset(x_val, y_val, group_val, name="validation")
test_dataset = NormalDataset(x_test, y_test, group_test, name="test")
train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=2,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded
training set sample count : 21635
validation set sample count : 3593
test set sample count : 3572


In [509]:
def train_harnet10(model, train_loader, val_loader, device, weights=None):
    state_dict = 'state_dict.pt'
    num_epoch = 100
    optimizer = torch.optim.Adam(
        model.parameters(), lr=0.0001, amsgrad=True
    )
    if weights:
        weights = torch.FloatTensor(weights).to(device)
        loss_fn = nn.CrossEntropyLoss(weight=weights)
    else:
        loss_fn = nn.CrossEntropyLoss()

    early_stopping = EarlyStopping(
        patience=5, path=state_dict, verbose=True
    )

    for epoch in range(num_epoch):
        model.train()
        train_losses = []
        train_acces = []
        for i, (x, y, _) in enumerate(tqdm(train_loader)):
            x.requires_grad_(True)
            x = x.to(device, dtype=torch.float)
            true_y = y.to(device, dtype=torch.long)
            print(f"Shape of input to the model: {x.shape}")
            optimizer.zero_grad()
            # # Forward pass
            # try:
            #     logits = model(x)
            # except Exception as e:
            #     print(f"Error during forward pass: {e}")
            #     return  # Stop execution if an error occurs
            x = torch.nn.functional.pad(x, (0, 128))  # Pad sequence length dimension from 10 to 128
            
            logits = model(x)
            loss = loss_fn(logits, true_y)
            loss.backward()
            optimizer.step()

            pred_y = torch.argmax(logits, dim=1)
            train_acc = torch.sum(pred_y == true_y)
            train_acc = train_acc / (pred_y.size()[0])

            train_losses.append(loss.cpu().detach())
            train_acces.append(train_acc.cpu().detach())

        val_loss, val_acc = _validate_model(model, val_loader, device, loss_fn)

        epoch_len = len(str(num_epoch))
        print_msg = (
            f"[{epoch:>{epoch_len}}/{num_epoch:>{epoch_len}}] | "
            + f"train_loss: {np.mean(train_losses):.3f} | "
            + f"train_acc: {np.mean(train_acces):.3f} | "
            + f"val_loss: {val_loss:.3f} | "
            + f"val_acc: {val_acc:.2f}"
        )

        early_stopping(val_loss, model)
        print(print_msg)

        if early_stopping.early_stop:
            print('Early stopping')
            print(f'SSLNet weights saved to {state_dict}')
            break

In [511]:
# Train the model. The trained weights will be saved in the file 'state_dict.pt'
train_harnet10(sslnet, train_loader, val_loader, device, get_inverse_class_weights(y_train))

Inverse class weights: 
[1.0]


  0%|                                                   | 0/170 [00:00<?, ?it/s]python(51599) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(51601) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Shape of input to the model: torch.Size([128, 3, 10])


  0%|                                                   | 0/170 [00:04<?, ?it/s]


RuntimeError: Calculated padded input size per channel: (7). Kernel size: (9). Kernel size can't be greater than actual input size

In [449]:
# Explore error
sslnet: nn.Module = torch.hub.load(repo, 'harnet10', trust_repo=True, class_num=2, pretrained=True, weights_only=False)
sslnet.to(device)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded


Resnet(
  (feature_extractor): Sequential(
    (layer1): Sequential(
      (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
      (1): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (relu): ReLU(inplace=True)
      )
      (2): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(6

In [405]:
# Editing data type to match Pytorch
X = data[['x','y','z']].values.astype(
    "f4"
)  # PyTorch defaults to float32
Y=data['annotation'].values
time=time_params(data)[2].values
reps=round(len(data)/10)
pids=[]
for x in range(1,11):
    pids+=[x]*reps
group_train=pids[:len(data)]

# construct dataloaders
train_dataset = NormalDataset(X, Y, group_train, name="training", transform=True)
val_dataset = NormalDataset(x_val, y_val, group_val, name="validation")
test_dataset = NormalDataset(x_test, y_test, group_test, name="test")

train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True#
)

val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False
)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main


131 Weights loaded
training set sample count : 35999


  pretrained_dict = torch.load(weight_path, map_location=my_device)


In [263]:
data

Unnamed: 0,timestamp,x,y,z,annotation
0,2019-07-22 13:32:52.809,0.411490,0.181621,-0.497670,0
1,2019-07-22 13:32:52.819,0.688887,0.124579,-0.506827,0
2,2019-07-22 13:32:52.828,0.736904,0.020060,-0.306294,0
3,2019-07-22 13:32:52.838,0.777280,-0.050111,-0.225117,0
4,2019-07-22 13:32:52.848,0.869990,-0.073111,-0.216985,0
...,...,...,...,...,...
359994,2019-07-22 14:32:52.750,0.880580,0.247766,-0.419820,0
359995,2019-07-22 14:32:52.759,0.880580,0.247766,-0.419820,0
359996,2019-07-22 14:32:52.769,0.882499,0.247766,-0.419820,0
359997,2019-07-22 14:32:52.779,0.865828,0.247766,-0.419820,0


In [381]:
X

array([[ 0.41149 ,  0.181621, -0.49767 ],
       [ 0.688887,  0.124579, -0.506827],
       [ 0.736904,  0.02006 , -0.306294],
       ...,
       [ 0.882499,  0.247766, -0.41982 ],
       [ 0.865828,  0.247766, -0.41982 ],
       [ 0.875269,  0.247766, -0.41982 ]], dtype=float32)

In [369]:
(
    x_train, y_train, group_train, time_train,
    x_val, y_val, group_val, time_val,
    x_test, y_test, group_test, time_test,
    le
) = load_data()

X shape: (359999, 3)
Y shape: (359999,)
Label distribution:
0    359394
1       605
Name: count, dtype: int64
Original labels: [0 1]
Transformed labels: [0 1]


In [275]:
# Tranforming the data to have another dimension
x_train = np.expand_dims(x_train, axis=1)
y_train = np.expand_dims(y_train, axis=1)
group_train = np.expand_dims(group_train, axis=1)
x_val = np.expand_dims(x_val, axis=1)
y_val = np.expand_dims(y_val, axis=1)
group_val = np.expand_dims(group_val, axis=1)
x_test = np.expand_dims(x_test, axis=1)
y_test = np.expand_dims(y_test, axis=1)
group_test = np.expand_dims(group_test, axis=1)

# construct dataloaders
train_dataset = NormalDataset(x_train, y_train, group_train, name="training", transform=True)
val_dataset = NormalDataset(x_val, y_val, group_val, name="validation")
test_dataset = NormalDataset(x_test, y_test, group_test, name="test")

train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False
)
test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False
)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded
training set sample count : 251999
validation set sample count : 36000
test set sample count : 72000


In [None]:
# Successful model Harnet10 model:
train_harnet10(sslnet, train_loader, val_loader, device, get_inverse_class_weights(y_train))

In [None]:
# load fine tuned weights (best weights prior to early-stopping) do inference on the test set
model_dict = torch.load('state_dict.pt', map_location=device)
sslnet.load_state_dict(model_dict)
y_test, y_test_pred, pid_test = predict(sslnet, test_loader, device)

In [None]:
scores = classification_scores(y_test, y_test_pred)
print(scores.round(3))

## 2. Fine-tuning Prospects 

### Freezing all the conv layers but the linear layers 

In [None]:
for name, param in sslnet.named_parameters():
    print(name)
    
def set_bn_eval(m):
    classname = m.__class__.__name__
    if classname.find("BatchNorm1d") != -1:
        m.eval()

i = 0
name_idx = 0
for name, param in sslnet.named_parameters():
    if name.split(".")[name_idx] == "feature_extractor":
        param.requires_grad = False
        i += 1
sslnet.apply(set_bn_eval)

print("Frozen weights: %d" % i)

### Freezing all the weights layers in the first residual block

In [None]:
i = 0
name_idx = 1
for name, param in sslnet.named_parameters():
    if name.split(".")[name_idx] == "layer1":
        param.requires_grad = False
        i += 1
sslnet.apply(set_bn_eval)