<a href="https://colab.research.google.com/github/ounospanas/KD_PULSE/blob/main/notebooks/teacher_dalia_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import os
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

from sklearn.preprocessing import StandardScaler

In [2]:
torch.__version__

'2.1.0+cu121'

In [3]:
window = 8
batch_size = 256

In [5]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# assumes that data have been stored to gdrive using the load_segment_data_.ipynb
ppg_X = np.load('/content/drive/MyDrive/ppg_data/dalia_data.npy')
y = np.load('/content/drive/MyDrive/ppg_data/dalia_y.npy')
subs = np.load('/content/drive/MyDrive/ppg_data/dalia_subs.npy')
acts = np.load('/content/drive/MyDrive/ppg_data/dalia_acts.npy')

In [13]:
val_sets = [[5,6,7,8],[9,10,11,12],[13,14,15],[1,2,3,4]]

In [14]:
def defineSets(X, y, subs, v, t):
    tests = []
    vals = []

    tests_b = []
    vals_b = []

    y_tests = []
    y_vals = []

    all_data = list(np.unique(subs))
    [all_data.remove(i) for i in v]

    ts= X[subs == t].reshape(-1,1,4,32*window)
    val = X[np.in1d(subs, [i for i in v if i!=t])].reshape(-1,1,4,32*window)

    y_ts= y[subs == t]
    y_val = y[np.in1d(subs, [i for i in v if i!=t])]

    tr = X[np.in1d(subs, all_data)].reshape(-1,1,4,32*window)
    y_tr = y[np.in1d(subs, all_data)]

    return tr, val, ts, y_tr, y_val, y_ts

In [15]:
tr, vals, tests, y_tr, y_vals, y_tests = defineSets(ppg_X, y, subs, val_sets[0], val_sets[0][0])

In [16]:
def z_score(
    train_s,
    val_s,
    test_s,
):
    train_signal = np.copy(train_s)
    val_signal = np.copy(val_s)
    test_signal = np.copy(test_s)

    for i in range(train_signal.shape[2]):

        mean = torch.mean(torch.from_numpy(train_signal[:, :, i, :]))
        std = torch.std(torch.from_numpy(train_signal[:, :, i, :]))

        print(mean,std)

        train_signal[:, :, i, :] = (torch.from_numpy(train_signal[:, :, i, :]) - mean) / std
        val_signal[:, :, i, :] = (torch.from_numpy(val_signal[:, :, i, :]) - mean) / std
        test_signal[:, :, i, :] = (torch.from_numpy(test_signal[:, :, i, :]) - mean) / std

    x_train = train_signal
    x_val = val_signal
    x_test = test_signal

    return x_train, x_val, x_test

In [17]:
class ReadyData(Dataset):

    def __init__(self, X, y, scale_X=False):
        if not torch.is_tensor(X):
            if scale_X:
                for i in range(4):
                    X[:,0,i,:] = StandardScaler().fit_transform(X[:,0,i,:]) #batch z-score
            self.X = torch.Tensor(X)
        if not torch.is_tensor(y):
            self.y = torch.Tensor(y)


    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Model

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
from time import time

In [19]:
class RConvPPG(nn.Module):
    def __init__(self, conv_blocks=3, conv_layers=3, in_channels=[1,32,48], out_channels=[32,48,64],
             kernel_size=(1,5), dilation=2, padding=(0,4), dropout=[0.5,0.5,0.5], pooling_size=[(1,4),(1,2),(1,2)],
              heads=4, dim=16, dense_out=32, ppg_channels=1):
        super(RConvPPG, self).__init__()

        # hyperparameters
        self.conv_blocks = conv_blocks
        self.conv_layers =conv_layers
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.dilation = dilation
        self.padding = padding
        self.dropout = dropout
        self.pooling_size = pooling_size
        self.heads = heads
        self.dim = dim
        self.dense_out = dense_out
        self.ppg_channels = ppg_channels


        self.conv11 = nn.Conv2d(in_channels=self.in_channels[0], out_channels=self.out_channels[0],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv12 = nn.Conv2d(in_channels=self.out_channels[0], out_channels=self.out_channels[0],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv13 = nn.Conv2d(in_channels=self.out_channels[0], out_channels=self.out_channels[0],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)

        self.conv21 = nn.Conv2d(in_channels=self.in_channels[1], out_channels=self.out_channels[1],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv22 = nn.Conv2d(in_channels=self.out_channels[1], out_channels=self.out_channels[1],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv23 = nn.Conv2d(in_channels=self.out_channels[1], out_channels=self.out_channels[1],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)

        self.conv31 = nn.Conv2d(in_channels=self.in_channels[2], out_channels=self.out_channels[2],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv32 = nn.Conv2d(in_channels=self.out_channels[2], out_channels=self.out_channels[2],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)
        self.conv33 = nn.Conv2d(in_channels=self.out_channels[2], out_channels=self.out_channels[2],
                                kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding)

        self.multihead_attn = nn.MultiheadAttention(embed_dim=self.dim, num_heads=self.heads, batch_first=True, )
        self.layer_norm = nn.LayerNorm(self.dim)

        self.flatten = nn.Flatten()
        self.dense = nn.Linear(self.out_channels[2]*dim*self.ppg_channels,self.dense_out)
        self.out = nn.Linear(self.dense_out,1)

        self.dropout1 = nn.Dropout(self.dropout[0])
        self.dropout2 = nn.Dropout(self.dropout[1])
        self.dropout3 = nn.Dropout(self.dropout[2])

    def forward(self, x):
        x = self.emb(x)

        x1 = x[:,:,0:self.ppg_channels,:] #get PPG
        x2 = x[:,:,self.ppg_channels:(self.ppg_channels+3),:] #get acc_x, acc_y, acc_z

        attn_output, attn_output_weights = self.multihead_attn(
            x1.reshape(-1,x1.shape[2]*x1.shape[1],x1.shape[3]), #query vector
            x2.reshape(-1,x2.shape[2]*x2.shape[1],x2.shape[3]), #key vector
            x2.reshape(-1,x2.shape[2]*x2.shape[1],x2.shape[3])) #value vector

        attn_output = self.layer_norm(attn_output)

        flat = self.flatten(attn_output)
        out = self.dense(flat)
        out = self.out(out)

        return out, attn_output_weights

    def emb(self, x):
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = F.avg_pool2d(F.relu(self.conv13(x)), kernel_size=self.pooling_size[0], stride=self.pooling_size[0], ceil_mode=True)
        x = self.dropout1(x)

        x = F.relu(self.conv21(x))
        x = F.relu(self.conv22(x))
        x = F.avg_pool2d(F.relu(self.conv23(x)), kernel_size=self.pooling_size[1], stride=self.pooling_size[1], ceil_mode=True)
        x = self.dropout2(x)

        x = F.relu(self.conv31(x))
        x = F.relu(self.conv32(x))
        x = F.avg_pool2d(F.relu(self.conv33(x)), kernel_size=self.pooling_size[2], stride=self.pooling_size[2], ceil_mode=True)
        x = self.dropout3(x)

        return x

In [20]:
device = torch.device("cuda")

# Training loop (could be skipped and go to the post-processing/evaluation section)

In [22]:
!mkdir models_paper

In [23]:
val_maes = []
scores = []

for sub_vals in val_sets:

    print('Subjects val are {}'.format(sub_vals))
    for sub_tests in sub_vals:
        print('Subjects val are {}'.format(sub_tests))

        tr, val, ts, _train_label, _val_label, _test_label = defineSets(ppg_X, y, subs, sub_vals, sub_tests)

        X_train, X_val, X_test = z_score(tr, val, ts)

        print(X_train.shape)
        print(X_val.shape)
        print(X_test.shape)

        val_len = len(X_val)
        train_len = len(X_train)
        test_len = len(X_test)

        ds_train = ReadyData(X=X_train, y=_train_label, scale_X=False)
        ds_val = ReadyData(X=X_val, y=_val_label, scale_X=False)
        ds_test = ReadyData(X=X_test, y=_test_label, scale_X=False)

        batch_size = 256

        train_set = DataLoader(ds_train, batch_size=batch_size, shuffle=True)
        val_set = DataLoader(ds_val, batch_size=batch_size, shuffle=False)
        test_set = DataLoader(ds_test, batch_size=batch_size, shuffle=False)

        device = torch.device("cuda")

        model = RConvPPG()
        model.to(device)

        optimizer = optim.Adam(model.parameters(), lr=0.0005) #0.0005 works also
        loss_fn = nn.L1Loss()

        best_loss = 100000000
        best_epoch = 0

        for epoch in range(500):
            tic = time()

            model.train()
            train_loss = 0
            counter = 0


            for batch_idx, (data, target) in enumerate(train_set):

                data, target = Variable(data).float().to(device), Variable(target).type(torch.FloatTensor).to(device)#.reshape(-1,1)

                optimizer.zero_grad()
                output,_ = model(data)
                loss = loss_fn(output, target)

                loss.backward()
                optimizer.step()

                train_loss += loss.item()

                counter +=1
            toc = time()

            print('\nTrain Epoch: {} \tLoss: {:.4f}, time: {:.4f}'.format(epoch, train_loss/counter,
                                                                                              toc-tic))

            with torch.no_grad():
                model.eval()
                correct_test = 0
                test_loss = 0
                counter = 0

                preds_test,_ = model(Variable(torch.Tensor(X_test)).float().to(device))
                preds_test = preds_test.cpu().detach().numpy()
                targets_test = Variable(torch.Tensor(_test_label)).float().to(device).cpu().detach().numpy()
                score_test = np.mean(np.abs(targets_test-preds_test))
                print('TEST MAE:',score_test)


            with torch.no_grad():
                model.eval()
                correct_val = 0
                val_loss = 0
                counter = 0

                for data, target in val_set:
                    data, target = Variable(data).float().to(device), Variable(target).float().to(device)
                    output,_ = model(data)

                    # sum up batch loss
                    val_loss += loss_fn(output, target).item()

                    counter +=1

                val_loss /= counter#len(test_set.dataset)

                print('Val set: Average loss: {:.4f}'.format(val_loss))

                if val_loss < best_loss:
                    best_loss = val_loss
                    print("New best loss: {}!!!!!!!!!!!!!!!!".format(best_loss))
                    torch.save(model.state_dict(), 'models_paper/best_model_avg_all_acc_last_'+str(sub_tests)+'.pt')
                    best_epoch = epoch
                    best_acc = correct_val/val_len
                    best_score = score_test

            if epoch>(best_epoch+150):
                torch.save(model.state_dict(), 'models_paper/best_model_avg_all_acc_last_'+str(sub_tests)+'.pt')
                break

        val_maes.append(best_acc)
        scores.append(best_score)
        del X_train, _train_label, X_test, _test_label, X_val, _val_label
        del tr, val, ts

Subjects val are [5, 6, 7, 8]
Subjects val are 5
tensor(-0.0005, dtype=torch.float64) tensor(85.9549, dtype=torch.float64)
tensor(-0.5196, dtype=torch.float64) tensor(0.3501, dtype=torch.float64)
tensor(0.1493, dtype=torch.float64) tensor(0.6334, dtype=torch.float64)
tensor(0.3673, dtype=torch.float64) tensor(0.4099, dtype=torch.float64)
(48721, 1, 4, 256)
(11327, 1, 4, 256)
(4649, 1, 4, 256)

Train Epoch: 0 	Loss: 21.6633, time: 7.5516
TEST MAE: 31.349339
Val set: Average loss: 17.4667
New best loss: 17.46665488349067!!!!!!!!!!!!!!!!

Train Epoch: 1 	Loss: 9.4274, time: 7.6814
TEST MAE: 28.630928
Val set: Average loss: 13.6570
New best loss: 13.65703042348226!!!!!!!!!!!!!!!!

Train Epoch: 2 	Loss: 8.3235, time: 7.6077
TEST MAE: 21.882301
Val set: Average loss: 10.9968
New best loss: 10.996778313318888!!!!!!!!!!!!!!!!

Train Epoch: 3 	Loss: 7.5969, time: 7.6381
TEST MAE: 18.228565
Val set: Average loss: 11.5971


KeyboardInterrupt: 

# Post Processing

In [24]:
# Get paper's models in case you didn't run training section
!git clone https://github.com/ounospanas/KD_PULSE.git

Cloning into 'KD_PULSE'...
remote: Enumerating objects: 72, done.[K
remote: Counting objects: 100% (72/72), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 72 (delta 13), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (72/72), 8.59 MiB | 15.12 MiB/s, done.
Resolving deltas: 100% (13/13), done.


In [25]:
val_maes = []
scores = []
all_preds = []
all_targets = []

for sub_vals in val_sets:

    print('Subjects val are {}'.format(sub_vals))
    for sub_tests in sub_vals:
        print('Subjects val are {}'.format(sub_tests))

        tr, val, ts, _train_label, _val_label, _test_label = defineSets(ppg_X, y, subs, sub_vals, sub_tests)

        X_train, X_val, X_test = z_score(tr, val, ts)

        val_len = len(X_val)
        train_len = len(X_train)
        test_len = len(X_test)

        ds_train = ReadyData(X=X_train, y=_train_label, scale_X=False)
        ds_val = ReadyData(X=X_val, y=_val_label, scale_X=False)
        ds_test = ReadyData(X=X_test, y=_test_label, scale_X=False)

        batch_size = 256

        train_set = DataLoader(ds_train, batch_size=batch_size, shuffle=True)
        val_set = DataLoader(ds_val, batch_size=batch_size, shuffle=False)
        test_set = DataLoader(ds_test, batch_size=batch_size, shuffle=False)

        device = torch.device("cuda")

        model = RConvPPG()
        #comment if you ran the training part
        model.load_state_dict(torch.load('KD_PULSE/PPG_Dalia_models/best_model_avg_all_acc_last_'+str(sub_tests)+'.pt'))
        #uncomment if you ran the training part
        #model.load_state_dict(torch.load('models_paper/best_model_avg_all_acc_last_'+str(sub_tests)+'.pt'))
        model.to(device)

        loss_fn = nn.L1Loss()

        with torch.no_grad():
            model.eval()
            correct_test = 0
            test_loss = 0
            counter = 0

            preds_test,_ = model(Variable(torch.Tensor(X_test)).float().to(device))
            preds_test = preds_test.cpu().detach().numpy()
            targets_test = Variable(torch.Tensor(_test_label)).float().to(device).cpu().detach().numpy()
            score_test = np.mean(np.abs(targets_test-preds_test))
            print('TEST MAE:',score_test)



        scores.append(score_test)
        all_preds.append(preds_test)
        all_targets.append(targets_test)
        del X_train, _train_label, X_test, _test_label, X_val, _val_label
        del tr, val, ts

Subjects val are [5, 6, 7, 8]
Subjects val are 5
tensor(-0.0005, dtype=torch.float64) tensor(85.9549, dtype=torch.float64)
tensor(-0.5196, dtype=torch.float64) tensor(0.3501, dtype=torch.float64)
tensor(0.1493, dtype=torch.float64) tensor(0.6334, dtype=torch.float64)
tensor(0.3673, dtype=torch.float64) tensor(0.4099, dtype=torch.float64)
TEST MAE: 7.4337406
Subjects val are 6
tensor(-0.0005, dtype=torch.float64) tensor(85.9549, dtype=torch.float64)
tensor(-0.5196, dtype=torch.float64) tensor(0.3501, dtype=torch.float64)
tensor(0.1493, dtype=torch.float64) tensor(0.6334, dtype=torch.float64)
tensor(0.3673, dtype=torch.float64) tensor(0.4099, dtype=torch.float64)
TEST MAE: 4.21625
Subjects val are 7
tensor(-0.0005, dtype=torch.float64) tensor(85.9549, dtype=torch.float64)
tensor(-0.5196, dtype=torch.float64) tensor(0.3501, dtype=torch.float64)
tensor(0.1493, dtype=torch.float64) tensor(0.6334, dtype=torch.float64)
tensor(0.3673, dtype=torch.float64) tensor(0.4099, dtype=torch.float64)
TE

In [26]:
def post_processing(model, x_test, y_test, post=True):

    post_MAE = []

    preds = np.copy(x_test) #it should be model(x_test)

    n = 10 #number_of_examples
    thresh = 10


    for i in range(n,len(preds)):
        if post:
            running_upper = np.mean(preds[(i-n):i])*(100+thresh)/100.0 #1.1
            running_lower = np.mean(preds[(i-n):i])*(100-thresh)/100.0 #0.9

            if preds[i] > running_upper:
                preds[i] = running_upper
                #print(preds[i])
            if preds[i] < running_lower:
                preds[i] = running_lower
                #print(preds[i])

    for j in range(len(preds)):
        mae = np.abs(preds[j]-y_test[j])
        post_MAE.append(mae)

    return np.mean(post_MAE)

In [27]:
post_maes = []
for i in range(15):
    mae = post_processing(model, all_preds[i], all_targets[i])
    post_maes.append(mae)
print(np.mean(post_maes))

4.0269256


In [35]:
# print all scores for the subjects
flattent_subjects = [item for row in val_sets for item in row]
for i, sub in enumerate(flattent_subjects):
  print("Subject {} has MAE: {}".format(sub,post_maes[i]))

Subject 5 has MAE: 6.947514057159424
Subject 6 has MAE: 3.7082343101501465
Subject 7 has MAE: 2.3935508728027344
Subject 8 has MAE: 8.170137405395508
Subject 9 has MAE: 6.192663192749023
Subject 10 has MAE: 2.596329689025879
Subject 11 has MAE: 3.8525259494781494
Subject 12 has MAE: 5.219122409820557
Subject 13 has MAE: 1.9768399000167847
Subject 14 has MAE: 3.1287477016448975
Subject 15 has MAE: 2.793991804122925
Subject 1 has MAE: 3.7777018547058105
Subject 2 has MAE: 3.037588596343994
Subject 3 has MAE: 2.201739549636841
Subject 4 has MAE: 4.407191753387451
