In [7]:
import pandas as pd
import numpy as np
import os
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch import optim
import torch
from torchvision.transforms import Compose
import plotly.express as ex
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from scipy import signal
import heartpy as hp
from biosppy.signals import ecg

In [2]:
df = pd.read_csv('ds01_withStress.csv')
df.drop('Elapsed time', axis = 1, inplace = True)
df

Unnamed: 0,Driver,ECG,EMG,foot GSR,hand GSR,HR,marker,RESP,stress
0,06,-0.048,0.124,9.051,19.072,90.0,12.36,39.97,1
1,06,-0.052,0.124,9.051,19.072,90.0,12.36,39.97,1
2,06,-0.057,0.124,9.051,19.072,90.0,12.36,39.97,1
3,06,-0.063,0.124,9.051,19.072,90.0,12.36,39.97,1
4,06,-0.067,0.124,9.051,19.072,90.0,12.36,39.97,1
...,...,...,...,...,...,...,...,...,...
48040,17b,-0.022,0.124,5.322,8.557,63.0,10.15,37.48,-1
48041,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1
48042,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1
48043,17b,-0.016,0.124,5.322,8.557,63.0,10.15,37.48,-1


In [3]:
# Testing SCAE on '06' driver
dr = '06'

In [14]:
temp_df = df.loc[df['Driver']==dr, ['ECG']]
temp_df

Unnamed: 0,ECG
0,-0.048
1,-0.052
2,-0.057
3,-0.063
4,-0.067
...,...
4955,-0.109
4956,-0.111
4957,-0.107
4958,-0.109


In [33]:
# Preprocessing using butter fillter and centralizing:
w = 0.05
b, a = signal.butter(4, w, 'low')
temp_df['Processed ECG'] = signal.filtfilt(b, a, temp_df['ECG'])
temp_df['Processed ECG'] = temp_df['Processed ECG'] - temp_df['Processed ECG'].mean()

# temp_df.plot()
fig = go.Figure()
fig.add_trace(go.Scatter(x=[i for i in range(len(temp_df))], y=temp_df['ECG'],
                    mode='lines',
                    name='RaW', 
                    line_color='rgba(0,0,255, 0.5)')
             )

fig.add_trace(go.Scatter(x=[i for i in range(len(temp_df))], y=temp_df['Processed ECG'],
                    mode='lines',
                    name='Processed', 
                    line_color='rgba(0,165,0, 0.5)')
             )


fig.show()

Cool, so it's working well. Now, applying this to the entire dataframe.

In [39]:
w = 0.05
b, a = signal.butter(4, w, 'low')
drivers = df['Driver'].value_counts().keys()
df['Processed ECG'] = [0 for _ in range(len(df))]
for dr in drivers:
    start, end = df[df['Driver']==dr].index[0], df[df['Driver']==dr].index[-1] 
    temp_df = df.loc[df['Driver'] == dr, :]
    df.loc[start:end, 'Processed ECG'] = signal.filtfilt(b, a, temp_df['ECG'])
    
df['Processed ECG'] = df['Processed ECG'] - df['Processed ECG'].mean()

# Approachs to make MaxUpPool layer:

**Approac 1 [My Approach]**
```
up_sample = torch.zeros(inpTensor.shape[0], inpTensor.shape[1], inpTensor.shape[2] * 2)
up_sample[:, :, ::2] = inpTensor
```

**Approach 2: [https://discuss.pytorch.org/t/how-to-perform-max-up-pooling-on-tensor/79489]**
```
up_sample = torch.repeat_interleave(inpTensor,2, dim = 2)
up_sample.index_fill_(2, torch.arange(1, up_sample.size()[2], 2), 0)
up_sample.shape

```
I ran both the approaches and they both are giving same results. The only difference is, we're creating a new tensor in the first one and we're expanding the existing tensor in the 2nd approach. In case of the first approach, the Autograd engine is able to track the old graph of inpTensor even after it sliced into the up_sampled tensor! (Yes ! PyTorch's autograd is magic). 

In [48]:
# Making NN:
class SCAE(nn.Module):
    
    def __init__(self):
        super().__init__()        
        # Encoding block:
        self.conv1 = nn.Conv1d(in_channels = 1, out_channels = 2, kernel_size = 1)
        self.conv2 = nn.Conv1d(in_channels = 2, out_channels = 16, kernel_size = 16)
        self.conv3 = nn.Conv1d(in_channels = 16, out_channels = 4, kernel_size = 32)
        self.conv4 = nn.Conv1d(in_channels = 4, out_channels = 8, kernel_size = 64)
        self.conv5 = nn.Conv1d(in_channels = 8, out_channels = 4, kernel_size = 64)
        self.maxpool = nn.MaxPool1d(kernel_size = 2)
        self.conv6 = nn.Conv1d(in_channels = 4, out_channels = 2, kernel_size = 64)
        self.batchnorm1d = nn.BatchNorm1d(num_features = 2)
        self.conv7 = nn.Conv1d(in_channels = 2, out_channels = 1, kernel_size = 1)
        
        # Decoding block:
        self.convT1 = nn.ConvTranspose1d(in_channels = 1, out_channels = 2, kernel_size = 1)
        self.convT2 = nn.ConvTranspose1d(in_channels = 2, out_channels = 4, kernel_size = 64)
        self.convT3 = nn.ConvTranspose1d(in_channels = 4, out_channels = 8, kernel_size = 64)
        self.convT4 = nn.ConvTranspose1d(in_channels = 8, out_channels = 4, kernel_size = 64)
        self.convT5 = nn.ConvTranspose1d(in_channels = 4, out_channels = 2, kernel_size = 32)
        self.convT6 = nn.ConvTranspose1d(in_channels = 2, out_channels = 2, kernel_size = 16)
        self.convT7 = nn.ConvTranspose1d(in_channels = 2, out_channels = 1, kernel_size = 1)
        
        
    def maxUpPool(self, inpTensor):
        up_sampled = torch.repeat_interleave(inpTensor,2, dim = 2)
        up_sampled.index_fill_(2, torch.arange(1, y.size()[2], 2), 0)
        return up_sampled
        
    def maxUpPool2(self, inpTensor):
        temp = torch.zeros(inpTensor.shape[0], inpTensor.shape[1], inpTensor.shape[2] * 2)
        temp[:, :, ::2] = inpTensor
        return temp
        
    def forward(self, x):
        
        # Encoding:
        enc = torch.tanh(self.conv1(x))
        enc = torch.tanh(self.conv2(enc))
        enc = torch.tanh(self.conv3(enc))
        enc = torch.tanh(self.conv4(enc))
        enc = torch.tanh(self.conv5(enc))
        enc = self.maxpool(enc)
#         print(enc.shape)
        enc = F.tanh(self.conv6(enc))
        enc = F.tanh(self.conv7(enc))
        
        # Decoding:
        dec = torch.tanh(self.convT1(enc))
        dec = torch.tanh(self.convT2(dec))
#         dec = self.maxUpPool(dec)
        dec = self.maxUpPool2(dec)
        dec = torch.tanh(self.convT3(dec))
        dec = torch.tanh(self.convT4(dec))
        dec = torch.tanh(self.convT5(dec))
        dec = torch.tanh(self.convT6(dec))
        dec = torch.tanh(self.convT7(dec))
        return dec.view(-1, 300)

In [40]:
class SCAEdataset(Dataset):
    def __init__(self,  DF, driver, column = 'ECG', transform = None):
        self.df = DF.loc[DF['Driver'] == driver, [column, 'stress']]
        self.df = self.df[: len(self.df) - len(self.df)%300]
        self.column = column
        self.driver = driver
        self.transform = transform
        
    def __len__(self):
        return len(self.df)//300
    
    def __getitem__(self, idx):
        signal = self.df[300*idx : 300*idx + 300][self.column]
        stress = self.df[300*idx : 300*idx + 300]['stress']
        
        if self.transform:
            signal = self.transform(signal)
            
        signal = torch.tensor(list(signal)).reshape(1, 300)
        stress = torch.tensor(list(stress)).reshape(1, 300)
            
        output = {self.column: signal, 'stress':stress}
            
        return output
            

column = 'Processed ECG'
ecg = SCAEdataset(df, driver = '06', column = column)

loader = DataLoader(ecg, batch_size = 5)

for i, batch in enumerate(loader):
    print(i, batch[column].shape, batch['stress'].shape)

0 torch.Size([5, 1, 300]) torch.Size([5, 1, 300])
1 torch.Size([5, 1, 300]) torch.Size([5, 1, 300])
2 torch.Size([5, 1, 300]) torch.Size([5, 1, 300])
3 torch.Size([1, 1, 300]) torch.Size([1, 1, 300])


In [49]:
mdl = SCAE()
criterion = nn.MSELoss()
optimizer = optim.SGD(mdl.parameters(), lr = 0.01, momentum = 0.5)
epochs = 50
column = 'ECG'
loss_history = []
for epoch in range(epochs):
    total_loss = 0
    for i, batch in enumerate(loader):
        pred = mdl(batch[column])
        loss = criterion(pred, batch[column])
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        
    loss_history.append(total_loss)    
    print(epoch+1, ': Total Loss:', total_loss)


Using a target size (torch.Size([5, 1, 300])) that is different to the input size (torch.Size([5, 300])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.


Using a target size (torch.Size([1, 1, 300])) that is different to the input size (torch.Size([1, 300])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.



1 : Total Loss: 2.0357427299022675
2 : Total Loss: 1.7037691473960876
3 : Total Loss: 1.254079908132553
4 : Total Loss: 0.7575462311506271
5 : Total Loss: 0.3806428164243698
6 : Total Loss: 0.2211475521326065
7 : Total Loss: 0.1805354431271553
8 : Total Loss: 0.17170260101556778
9 : Total Loss: 0.16901617497205734
10 : Total Loss: 0.16746613383293152
11 : Total Loss: 0.1661713719367981
12 : Total Loss: 0.16497072204947472
13 : Total Loss: 0.16382957994937897
14 : Total Loss: 0.16273771598935127
15 : Total Loss: 0.16169043630361557
16 : Total Loss: 0.16068467125296593
17 : Total Loss: 0.15971798449754715
18 : Total Loss: 0.15878823027014732
19 : Total Loss: 0.15789347141981125
20 : Total Loss: 0.15703189745545387
21 : Total Loss: 0.15620191767811775
22 : Total Loss: 0.15540189668536186
23 : Total Loss: 0.1546303927898407
24 : Total Loss: 0.15388603135943413
25 : Total Loss: 0.15316756069660187
26 : Total Loss: 0.15247372537851334
27 : Total Loss: 0.1518034152686596
28 : Total Loss: 0.15

### Trying on Driver 06

In [61]:
column = 'Processed ECG'
ecg = SCAEdataset(df, driver = '06', column = column)
loader = DataLoader(ecg, batch_size = 5)

mdl = SCAE()
criterion = nn.MSELoss()
optimizer = optim.SGD(mdl.parameters(), lr = 0.1, momentum = 0.5)
epochs = 500
loss_history = []
for epoch in tqdm(range(epochs)):
    total_loss = 0
    for i, batch in enumerate(loader):
        pred = mdl(batch[column])
        loss = criterion(pred, batch[column])
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        
    loss_history.append(total_loss)    
#     print(epoch+1, ': Total Loss:', total_loss)
    
fig = go.Figure()
fig.add_trace(go.Scatter(x = [i for i in range(len(loss_history))], y = loss_history, mode = 'lines'))

100%|██████████| 500/500 [00:53<00:00,  9.38it/s]


In [62]:
# Visualizing the model output: 
X_predicted = []
X_input = []
for i in tqdm(range(len(ecg))):
    X_input+=ecg[i][column].reshape(300).tolist()
    xx = mdl(ecg[i][column].reshape(-1, 1, 300))
    X_predicted+=xx.reshape(300).tolist()
    
len(X_input), len(X_predicted)
indx = [i for i in range(len(X_input))]

fig = go.Figure()

fig.add_trace(go.Scatter(x = indx, y = X_input, mode = 'lines', name='Input'))
fig.add_trace(go.Scatter(x = indx, y = X_predicted, mode = 'lines', name='Predicted'))

fig.show()


100%|██████████| 16/16 [00:00<00:00, 155.42it/s]


### Trying on Driver 17b

In [63]:
column = 'Processed ECG'
ecg = SCAEdataset(df, driver = '17b', column = column)
loader = DataLoader(ecg, batch_size = 5)

mdl = SCAE()
criterion = nn.MSELoss()
optimizer = optim.SGD(mdl.parameters(), lr = 0.1, momentum = 0.5)
epochs = 1000

loss_history = []
for epoch in tqdm(range(epochs)):
    total_loss = 0
    for i, batch in enumerate(loader):
        pred = mdl(batch[column])
        loss = criterion(pred, batch[column])
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        
    loss_history.append(total_loss)    
#     print(epoch+1, ': Total Loss:', total_loss)
    
fig = go.Figure()
fig.add_trace(go.Scatter(x = [i for i in range(len(loss_history))], y = loss_history, mode = 'lines'))

100%|██████████| 1000/1000 [01:45<00:00,  9.45it/s]


In [64]:
# Visualizing the model output: 
X_predicted = []
X_input = []
for i in tqdm(range(len(ecg))):
    X_input+=ecg[i][column].reshape(300).tolist()
    xx = mdl(ecg[i][column].reshape(-1, 1, 300))
    X_predicted+=xx.reshape(300).tolist()
    
len(X_input), len(X_predicted)
indx = [i for i in range(len(X_input))]

fig = go.Figure()

fig.add_trace(go.Scatter(x = indx, y = X_input, mode = 'lines', name='Input'))
fig.add_trace(go.Scatter(x = indx, y = X_predicted, mode = 'lines', name='Predicted'))

fig.show()


100%|██████████| 16/16 [00:00<00:00, 137.84it/s]


### Trying on Driver 11

In [68]:
column = 'EMG'
ecg = SCAEdataset(df, driver = '11', column = column)
loader = DataLoader(ecg, batch_size = 5)

mdl = SCAE()
criterion = nn.MSELoss()
optimizer = optim.SGD(mdl.parameters(), lr = 0.1, momentum = 0.5)
epochs = 1000

loss_history = []
for epoch in tqdm(range(epochs)):
    total_loss = 0
    for i, batch in enumerate(loader):
        pred = mdl(batch[column])
        loss = criterion(pred, batch[column])
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        
    loss_history.append(total_loss)    
#     print(epoch+1, ': Total Loss:', total_loss)
    
fig = go.Figure()
fig.add_trace(go.Scatter(x = [i for i in range(len(loss_history))], y = loss_history, mode = 'lines'))


nn.functional.tanh is deprecated. Use torch.tanh instead.


Using a target size (torch.Size([5, 1, 300])) that is different to the input size (torch.Size([5, 300])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.


Using a target size (torch.Size([1, 1, 300])) that is different to the input size (torch.Size([1, 300])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.

100%|██████████| 1000/1000 [01:46<00:00,  9.42it/s]


In [69]:
# Visualizing the model output: 
X_predicted = []
X_input = []
for i in tqdm(range(len(ecg))):
    X_input+=ecg[i][column].reshape(300).tolist()
    xx = mdl(ecg[i][column].reshape(-1, 1, 300))
    X_predicted+=xx.reshape(300).tolist()
    
len(X_input), len(X_predicted)
indx = [i for i in range(len(X_input))]

fig = go.Figure()

fig.add_trace(go.Scatter(x = indx, y = X_input, mode = 'lines', name='Input'))
fig.add_trace(go.Scatter(x = indx, y = X_predicted, mode = 'lines', name='Predicted'))

fig.show()


100%|██████████| 16/16 [00:00<00:00, 141.16it/s]


In [70]:
df

Unnamed: 0,Driver,ECG,EMG,foot GSR,hand GSR,HR,marker,RESP,stress,Processed ECG
0,06,-0.048,0.124,9.051,19.072,90.0,12.36,39.97,1,-0.007574
1,06,-0.052,0.124,9.051,19.072,90.0,12.36,39.97,1,-0.011120
2,06,-0.057,0.124,9.051,19.072,90.0,12.36,39.97,1,-0.014670
3,06,-0.063,0.124,9.051,19.072,90.0,12.36,39.97,1,-0.018194
4,06,-0.067,0.124,9.051,19.072,90.0,12.36,39.97,1,-0.021663
...,...,...,...,...,...,...,...,...,...,...
48040,17b,-0.022,0.124,5.322,8.557,63.0,10.15,37.48,-1,0.043266
48041,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1,0.042620
48042,17b,-0.029,0.124,5.322,8.557,63.0,10.15,37.48,-1,0.041962
48043,17b,-0.016,0.124,5.322,8.557,63.0,10.15,37.48,-1,0.041315
