In [1]:
# Configure GPU
import sys
sys.path.append("../")
import joblib
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import LabelEncoder

from utils.data import NormalDataset, resize, get_inverse_class_weights
from utils.utils import EarlyStopping

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# Load the accelerometer dataset
path1="/Users/kat/Oxford_Wearables_Activity_Recognition/6_self_supervised_learning/"
device="cpu"
csv_file_path = path1+"OxWalk_Dec2022/Hip_100Hz/P39_hip100.csv"
hip_data = pd.read_csv(csv_file_path)
#!Huge imbalance
# Load the accelerometer dataset
csv_file_path = path1+"OxWalk_Dec2022/Hip_100Hz/P39_hip100.csv"
hip_data = pd.read_csv(csv_file_path)

# Add a 'group' sudo field, dividing the dataset into 10 equal parts
num_rows = hip_data.shape[0]
group_size = num_rows // 10  # Calculate the number of rows per group
# Create an array with group labels (1 to 10) repeated for each group size
group_labels = np.repeat(np.arange(1, 11), group_size)
if len(group_labels) < num_rows:
    group_labels = np.concatenate([group_labels, np.full(num_rows - len(group_labels), 10)])
hip_data['group'] = group_labels

X = hip_data[['x', 'y', 'z']].values
y = hip_data['annotation'].values
groups = hip_data['group'].values

# Reshape the data into windows of size 1000
def create_windows(data, labels, groups, window_size):
    num_windows = data.shape[0] // window_size
    X_windows = data[:num_windows * window_size].reshape(num_windows, window_size, -1)
    y_windows = labels[window_size-1:num_windows * window_size:window_size]  # One label per window
    group_windows = groups[window_size-1:num_windows * window_size:window_size]  # One group per window
    return X_windows, y_windows, group_windows

window_size = 1000
X_windows, y_windows, group_windows = create_windows(X, y, groups, window_size)

# Split the data into train, validation, and test sets
x_train, x_temp, y_train, y_temp, group_train, group_temp = train_test_split(
    X_windows, y_windows, group_windows, test_size=0.3, random_state=42
)

x_val, x_test, y_val, y_test, group_val, group_test = train_test_split(
    x_temp, y_temp, group_temp, test_size=0.5, random_state=42
)

# Output the shapes as expected:
print(((x_train.shape, y_train.shape, group_train.shape),
       (x_val.shape, y_val.shape, group_val.shape),
       (x_test.shape, y_test.shape, group_test.shape)))

(((251, 1000, 3), (251,), (251,)), ((54, 1000, 3), (54,), (54,)), ((54, 1000, 3), (54,), (54,)))


In [5]:
repo = 'OxWearables/ssl-wearables'

sslnet: nn.Module = torch.hub.load(repo, 'harnet30', trust_repo=True, class_num=2, pretrained=True, weights_only=False)
sslnet.to(device)


train_dataset = NormalDataset(x_train, y_train, group_train, name="training", transform=True)
val_dataset = NormalDataset(x_val, y_val, group_val, name="validation")
test_dataset = NormalDataset(x_test, y_test, group_test, name="test")

train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=2,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=0,
)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded
training set sample count : 251
validation set sample count : 54
test set sample count : 54


In [7]:
def train(model, train_loader, val_loader, device, weights=None):
    state_dict = 'state_dict.pt'
    num_epoch = 5

    optimizer = torch.optim.Adam(
        model.parameters(), lr=0.0001, amsgrad=True
    )

    if weights:
        weights = torch.FloatTensor(weights).to(device)
        loss_fn = nn.CrossEntropyLoss(weight=weights)
    else:
        loss_fn = nn.CrossEntropyLoss()

    early_stopping = EarlyStopping(
        patience=5, path=state_dict, verbose=True
    )

    for epoch in range(num_epoch):
        model.train()
        train_losses = []
        train_acces = []
        for i, (x, y, _) in enumerate(tqdm(train_loader)):
            x.requires_grad_(True)
            x = x.to(device, dtype=torch.float)
            true_y = y.to(device, dtype=torch.long)

            optimizer.zero_grad()
            print("here",x.shape)
            logits = model(x)
            loss = loss_fn(logits, true_y)
            loss.backward()
            optimizer.step()

            pred_y = torch.argmax(logits, dim=1)
            train_acc = torch.sum(pred_y == true_y)
            train_acc = train_acc / (pred_y.size()[0])

            train_losses.append(loss.cpu().detach())
            train_acces.append(train_acc.cpu().detach())

        val_loss, val_acc = _validate_model(model, val_loader, device, loss_fn)

        epoch_len = len(str(num_epoch))
        print_msg = (
            f"[{epoch:>{epoch_len}}/{num_epoch:>{epoch_len}}] | "
            + f"train_loss: {np.mean(train_losses):.3f} | "
            + f"train_acc: {np.mean(train_acces):.3f} | "
            + f"val_loss: {val_loss:.3f} | "
            + f"val_acc: {val_acc:.2f}"
        )

        early_stopping(val_loss, model)
        print(print_msg)

        if early_stopping.early_stop:
            print('Early stopping')
            print(f'SSLNet weights saved to {state_dict}')
            break


def _validate_model(model, val_loader, device, loss_fn):
    model.eval()
    losses = []
    acces = []
    for i, (x, y, _) in enumerate(val_loader):
        with torch.inference_mode():
            x = x.to(device, dtype=torch.float)
            true_y = y.to(device, dtype=torch.long)

            logits = model(x)
            loss = loss_fn(logits, true_y)

            pred_y = torch.argmax(logits, dim=1)

            val_acc = torch.sum(pred_y == true_y)
            val_acc = val_acc / (list(pred_y.size())[0])

            losses.append(loss.cpu().detach())
            acces.append(val_acc.cpu().detach())
    losses = np.array(losses)
    acces = np.array(acces)
    return np.mean(losses), np.mean(acces)


def predict(model, data_loader, device):
    from tqdm import tqdm

    predictions_list = []
    true_list = []
    pid_list = []
    model.eval()

    for i, (x, y, pid) in enumerate(tqdm(data_loader)):
        with torch.inference_mode():
            x = x.to(device, dtype=torch.float)
            logits = model(x)
            true_list.append(y)
            pred_y = torch.argmax(logits, dim=1)
            predictions_list.append(pred_y.cpu())
            pid_list.extend(pid)
    true_list = torch.cat(true_list)
    predictions_list = torch.cat(predictions_list)

    return (
        torch.flatten(true_list).numpy(),
        torch.flatten(predictions_list).numpy(),
        np.array(pid_list),
    )

In [13]:
repo = 'OxWearables/ssl-wearables'
# load the pretrained model
sslnet: nn.Module = torch.hub.load(repo, 'harnet30', trust_repo=True, class_num=2, pretrained=True, weights_only=False)
sslnet.to(device)

Using cache found in /Users/kat/.cache/torch/hub/OxWearables_ssl-wearables_main
  pretrained_dict = torch.load(weight_path, map_location=my_device)


131 Weights loaded


Resnet(
  (feature_extractor): Sequential(
    (layer1): Sequential(
      (0): Conv1d(3, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
      (1): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (relu): ReLU(inplace=True)
      )
      (2): ResBlock(
        (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv1d(64, 64, kernel_size=(5,), stride=(1,), padding=(2,), bias=False, padding_mode=circular)
        (conv2): Conv1d(6

In [15]:
# Train the model. The trained weights will be saved in the file 'state_dict.pt'
train(sslnet, train_loader, val_loader, device, get_inverse_class_weights(y_train))
#

Inverse class weights: 
[1.004, 251.0]


  0%|                                                     | 0/2 [00:00<?, ?it/s]

here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:14<00:14, 14.14s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:28<00:00, 14.47s/it]


Validation loss decreased (inf --> 3.310958). Saving model ...
[0/5] | train_loss: 1.394 | train_acc: 0.770 | val_loss: 3.311 | val_acc: 0.35


  0%|                                                     | 0/2 [00:00<?, ?it/s]

here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:15<00:15, 15.72s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:27<00:00, 13.64s/it]


Validation loss decreased (3.310958 --> 1.560020). Saving model ...
[1/5] | train_loss: 7.028 | train_acc: 0.976 | val_loss: 1.560 | val_acc: 0.61


  0%|                                                     | 0/2 [00:00<?, ?it/s]

here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:13<00:13, 13.10s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:24<00:00, 12.06s/it]


Validation loss decreased (1.560020 --> 0.859991). Saving model ...
[2/5] | train_loss: 0.748 | train_acc: 0.956 | val_loss: 0.860 | val_acc: 0.74


  0%|                                                     | 0/2 [00:00<?, ?it/s]

here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:13<00:13, 13.15s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:24<00:00, 12.16s/it]


Validation loss decreased (0.859991 --> 0.500994). Saving model ...
[3/5] | train_loss: 1.172 | train_acc: 0.952 | val_loss: 0.501 | val_acc: 0.87


  0%|                                                     | 0/2 [00:00<?, ?it/s]

here torch.Size([128, 3, 1000])


 50%|██████████████████████▌                      | 1/2 [00:13<00:13, 13.05s/it]

here torch.Size([123, 3, 1000])


100%|█████████████████████████████████████████████| 2/2 [00:24<00:00, 12.17s/it]


Validation loss decreased (0.500994 --> 0.123252). Saving model ...
[4/5] | train_loss: 0.827 | train_acc: 0.968 | val_loss: 0.123 | val_acc: 0.98


In [None]:
#confusion matrix
#Correlation 
