# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torchvision import models
from torch.utils.data import Dataset, DataLoader
import gc 

gc.collect()
torch.cuda.empty_cache()

x=np.load('datasets/scaled_spec_resampled_array.npy')
x =np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2]))
y=np.load('datasets/labels_array.npy')-1
subjects=np.load('datasets/subjects_array.npy')
print(f"Spectrograms (X) shape: {x.shape}")
print(f"Labels (Y) shape: {y.shape}")
print(f"Subjects shape: {subjects.shape}")

num_samples = x.shape[0]
num_classes = len(np.unique(y))
print(f"Number of samples: {num_samples}")
print(f"Number of classes: {num_classes}")

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
print(f"Test set: {x_test.shape}, {y_test.shape}")

x_test=torch.tensor(x_test, dtype=torch.float32)
y_test=torch.tensor(y_test, dtype=torch.long)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

Spectrograms (X) shape: (1754, 1, 2048, 80)
Labels (Y) shape: (1754,)
Subjects shape: (1754,)
Number of samples: 1754
Number of classes: 6
Test set: (351, 1, 2048, 80), (351,)
Device: cuda


In [2]:
def get_predictions(model, dataloader):
    model.eval()
    predictions = []
    with torch.no_grad():
        for x_batch in dataloader:
            x_batch = x_batch.to(device)
            y_pred = model(x_batch)
            predictions.append(y_pred)
    return torch.cat(predictions)

def get_scores(y_true, y_pred):
    acc = accuracy_score(y_true.cpu().numpy(), y_pred.cpu().numpy())
    prec = precision_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average=None)
    rec = recall_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average=None)
    f1 = f1_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average=None)
    return acc, prec, rec, f1

def print_scores(acc, prec, rec, f1):
    activities = ['Walking', 'Sitting Down', 'Standing Up', 'Picking up an Object', 'Drinking Water', 'Falling']
    print(f"Accuracy = {100*acc:.4f}%")
    print("Precision:")
    print([f"{activities[j]}: {100*prec[j]:.2f}%" for j in range(num_classes)])
    print("Recall:")
    print([f"{activities[j]}: {100*rec[j]:.2f}%" for j in range(num_classes)])
    print("F1:")
    print([f"{activities[j]}: {100*f1[j]:.2f}%" for j in range(num_classes)])
    print()

def print_scores_folds(acc, prec, rec, f1):
    activities = ['Walking', 'Sitting Down', 'Standing Up', 'Picking up an Object', 'Drinking Water', 'Falling']
    print(f"Accuracy = {100*np.mean(acc):.4f}%")
    print("Precision:")
    print([f"{activities[j]}: {100*np.mean(prec[:,j]):.2f}%" for j in range(num_classes)])
    print("Recall:")
    print([f"{activities[j]}: {100*np.mean(rec[:,j]):.2f}%" for j in range(num_classes)])
    print("F1:")
    print([f"{activities[j]}: {100*np.mean(f1[:,j]):.2f}%" for j in range(num_classes)])
    print()

### "Vanilla" ResNets evaluation

In [3]:
resnet18 = models.resnet18(pretrained=False)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) #change input channels to 1 to use single layer spectrogram images
resnet18.fc = nn.Linear(in_features=512, out_features=6, bias=True)

resnet18.load_state_dict(torch.load('checkpoints/resnet18.pth'))
# resnet18.load_state_dict(torch.load('checkpoints/resnet18.pth', map_location=torch.device('cpu')))
resnet18.to(device)

test_loader = DataLoader(x_test, batch_size=32, shuffle=False)

y_pred = get_predictions(resnet18, test_loader)
y_pred = torch.argmax(y_pred, dim=1)

acc, prec, rec, f1 = get_scores(y_test, y_pred)

print_scores(acc, prec, rec, f1)



Accuracy = 86.8946%
Precision:
['Walking: 100.00%', 'Sitting Down: 98.21%', 'Standing Up: 67.82%', 'Picking up an Object: 89.29%', 'Drinking Water: 83.33%', 'Falling: 92.86%']
Recall:
['Walking: 100.00%', 'Sitting Down: 87.30%', 'Standing Up: 95.16%', 'Picking up an Object: 80.65%', 'Drinking Water: 64.52%', 'Falling: 97.50%']
F1:
['Walking: 100.00%', 'Sitting Down: 92.44%', 'Standing Up: 79.19%', 'Picking up an Object: 84.75%', 'Drinking Water: 72.73%', 'Falling: 95.12%']



In [4]:
resnet34 = models.resnet34(pretrained=False)
resnet34.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) #change input channels to 1 to use single layer spectrogram images
resnet34.fc = nn.Linear(in_features=512, out_features=6, bias=True)

resnet34.load_state_dict(torch.load('checkpoints/resnet34.pth'))
# resnet34.load_state_dict(torch.load('checkpoints/resnet34.pth', map_location=torch.device('cpu')))
resnet34.to(device)

y_pred = get_predictions(resnet34, test_loader)
y_pred = torch.argmax(y_pred, dim=1)

acc, prec, rec, f1 = get_scores(y_test, y_pred)

print_scores(acc, prec, rec, f1)




Accuracy = 72.0798%
Precision:
['Walking: 100.00%', 'Sitting Down: 72.22%', 'Standing Up: 62.67%', 'Picking up an Object: 74.07%', 'Drinking Water: 50.00%', 'Falling: 100.00%']
Recall:
['Walking: 100.00%', 'Sitting Down: 41.27%', 'Standing Up: 75.81%', 'Picking up an Object: 64.52%', 'Drinking Water: 74.19%', 'Falling: 80.00%']
F1:
['Walking: 100.00%', 'Sitting Down: 52.53%', 'Standing Up: 68.61%', 'Picking up an Object: 68.97%', 'Drinking Water: 59.74%', 'Falling: 88.89%']



### Reduced datasets models

In [5]:
x=np.load('datasets/scaled_spec_resampled_array.npy') #load the dataset
y=np.load('datasets/labels_array.npy')-1 # labels start from 1, we want them to start from 0
x = x.reshape(x.shape[0], 1, x.shape[1], x.shape[2]) # add channel dimension for CNN

print(x.shape, y.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"Test set: {x_test.shape}, {y_test.shape}")

x_test=x_test[:,724:1324,:]
x_train=x_train[:,724:1324,:]

x_test=torch.tensor(x_test, dtype=torch.float32)
y_test=torch.tensor(y_test, dtype=torch.long)

(1754, 1, 2048, 80) (1754,)
Test set: (351, 1, 2048, 80), (351,)


In [7]:
resnet18_small = models.resnet18(pretrained=False)
resnet18_small.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) #change input channels to 1 to use single layer spectrogram images
resnet18_small.fc = nn.Linear(in_features=512, out_features=6, bias=True)

resnet18_small.load_state_dict(torch.load('checkpoints/resnet18_smalldata.pth'))
resnet18_small.to(device)

test_loader = DataLoader(x_test, batch_size=32, shuffle=False)

y_pred = get_predictions(resnet18_small, test_loader)
y_pred = torch.argmax(y_pred, dim=1)

acc, prec, rec, f1 = get_scores(y_test, y_pred)

print_scores(acc, prec, rec, f1)

RuntimeError: Given groups=1, weight of size [64, 1, 7, 7], expected input[32, 0, 2048, 80] to have 1 channels, but got 0 channels instead

In [None]:
resnet34_small = models.resnet34(pretrained=False)
resnet34_small.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) #change input channels to 1 to use single layer spectrogram images
resnet34_small.fc = nn.Linear(in_features=512, out_features=6, bias=True)

resnet34_small.load_state_dict(torch.load('good_checkpoints/resnet34_smalldata.pth'))
resnet34_small.to(device)

y_pred = get_predictions(resnet34_small, test_loader)
y_pred = torch.argmax(y_pred, dim=1)

acc, prec, rec, f1 = get_scores(y_test, y_pred)

print_scores(acc, prec, rec, f1)

Accuracy = 86.8946%
Precision:
['Walking: 100.00%', 'Sitting Down: 89.09%', 'Standing Up: 83.82%', 'Picking up an Object: 77.19%', 'Drinking Water: 79.10%', 'Falling: 94.12%']
Recall:
['Walking: 100.00%', 'Sitting Down: 90.74%', 'Standing Up: 86.36%', 'Picking up an Object: 72.13%', 'Drinking Water: 81.54%', 'Falling: 91.43%']
F1:
['Walking: 100.00%', 'Sitting Down: 89.91%', 'Standing Up: 85.07%', 'Picking up an Object: 74.58%', 'Drinking Water: 80.30%', 'Falling: 92.75%']



## Fold evaluations

In [None]:
import os
from sklearn.model_selection import GroupKFold
FOLDS = 5

# Splits for cross-validation:
gkf = GroupKFold(n_splits=FOLDS)
splits = list(gkf.split(x, y, groups=subjects))

For ResNet-18:

In [None]:
# Load checkpoint filenames for resnet18:
cwd = os.getcwd()
base_folder="checkpoints/"
fold_paths = [f.path for f in os.scandir(base_folder) if f.is_file() and 'resnet18_fold' in f.name]

accuracies = np.zeros(FOLDS)
precisions = np.zeros((FOLDS, num_classes))
recalls = np.zeros((FOLDS, num_classes))
f1s = np.zeros((FOLDS, num_classes))

# Fold evaluation loop:
for i, (train_index, test_index) in enumerate(splits):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    x_test = torch.tensor(x_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.long)

    print(f"Evaluating with {fold_paths[i]}...")

    resnet18 = models.resnet18(pretrained=False)
    resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet18.fc = nn.Linear(in_features=512, out_features=6, bias=True)

    resnet18.load_state_dict(torch.load('checkpoints/resnet18.pth'))
    # resnet18.load_state_dict(torch.load(fold_paths[i], map_location=torch.device('cpu')))
    resnet18.to(device)

    test_loader = DataLoader(x_test, batch_size=32, shuffle=False)
    y_pred = get_predictions(resnet18, test_loader)
    y_pred = torch.argmax(y_pred, dim=1)

    accuracies[i], precisions[i], recalls[i], f1s[i] = get_scores(y_test, y_pred)
    
print("Done!")

Evaluating with checkpoints/resnet18_fold1.pth...




Evaluating with checkpoints/resnet18_fold2.pth...




Evaluating with checkpoints/resnet18_fold3.pth...




Evaluating with checkpoints/resnet18_fold4.pth...




Evaluating with checkpoints/resnet18_fold5.pth...




Done!


In [None]:
print_scores_folds(accuracies, precisions, recalls, f1s)

Accuracy = 76.3682%
Precision:
['Walking: 99.09%', 'Sitting Down: 76.55%', 'Standing Up: 69.32%', 'Picking up an Object: 73.17%', 'Drinking Water: 61.23%', 'Falling: 88.95%']
Recall:
['Walking: 100.00%', 'Sitting Down: 69.66%', 'Standing Up: 77.53%', 'Picking up an Object: 65.59%', 'Drinking Water: 62.02%', 'Falling: 87.18%']
F1:
['Walking: 99.53%', 'Sitting Down: 72.35%', 'Standing Up: 72.90%', 'Picking up an Object: 68.19%', 'Drinking Water: 61.19%', 'Falling: 87.91%']



For ResNet-34:

In [None]:
# Load checkpoint filenames for resnet34:
cwd = os.getcwd()
base_folder="checkpoints/"
fold_paths = [f.path for f in os.scandir(base_folder) if f.is_file() and 'resnet34_fold' in f.name]

accuracies = np.zeros(FOLDS)
precisions = np.zeros((FOLDS, num_classes))
recalls = np.zeros((FOLDS, num_classes))
f1s = np.zeros((FOLDS, num_classes))

# Fold evaluation loop:
for i, (train_index, test_index) in enumerate(splits):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    x_test = torch.tensor(x_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.long)

    print(f"Evaluating with {fold_paths[i]}...")

    resnet34 = models.resnet34(pretrained=False)
    resnet34.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    resnet34.fc = nn.Linear(in_features=512, out_features=6, bias=True)

    resnet34.load_state_dict(torch.load('checkpoints/resnet34.pth'))
    # resnet34.load_state_dict(torch.load('checkpoints/resnet34.pth', map_location=torch.device('cpu')))
    resnet34.to(device)

    test_loader = DataLoader(x_test, batch_size=32, shuffle=False)
    y_pred = get_predictions(resnet34, test_loader)
    y_pred = torch.argmax(y_pred, dim=1)

    accuracies[i], precisions[i], recalls[i], f1s[i] = get_scores(y_test, y_pred)
    
print("Done!")

Evaluating with checkpoints/resnet34_fold1.pth...
Evaluating with checkpoints/resnet34_fold2.pth...




Evaluating with checkpoints/resnet34_fold3.pth...




Evaluating with checkpoints/resnet34_fold4.pth...




Evaluating with checkpoints/resnet34_fold5.pth...




Done!


In [None]:
print_scores_folds(accuracies, precisions, recalls, f1s)

Accuracy = 71.4409%
Precision:
['Walking: 99.69%', 'Sitting Down: 77.86%', 'Standing Up: 59.18%', 'Picking up an Object: 72.19%', 'Drinking Water: 52.35%', 'Falling: 94.79%']
Recall:
['Walking: 100.00%', 'Sitting Down: 42.60%', 'Standing Up: 67.18%', 'Picking up an Object: 57.86%', 'Drinking Water: 81.32%', 'Falling: 84.10%']
F1:
['Walking: 99.84%', 'Sitting Down: 55.00%', 'Standing Up: 62.53%', 'Picking up an Object: 64.16%', 'Drinking Water: 63.39%', 'Falling: 89.05%']

