This notebook is inspired by [this](https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/) article.

## Architecture description
We will define the model as having two 1D CNN layers, followed by a dropout layer for regularization, then a pooling layer. It is common to define CNN layers in groups of two in order to give the model a good chance of learning features from the input data. CNNs learn very quickly, so the dropout layer is intended to help slow down the learning process and hopefully result in a better final model. The pooling layer reduces the learned features to 1/4 their size, consolidating them to only the most essential elements.

After the CNN and pooling, the learned features are flattened to one long vector, which is fused with features coming from other dimensions,  and pass through a fully connected layer before the output layer used to make a prediction. The fully connected layer ideally provides a buffer between the learned features and the output with the intent of interpreting the learned features before making a prediction.

For this model, we will use a standard configuration of 64 parallel feature maps and a kernel size of 3. The feature maps are the number of times the input is processed or interpreted, whereas the kernel size is the number of input time steps considered as the input sequence is read or processed onto the feature maps.

In [169]:
import torch
import os
import numpy as np
import csv
import math
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.metrics import accuracy_score

## Model 

In [None]:
class ConvNet1D(nn.Module):
    def __init__(self,n_features = 1, n_classes = 2, n_timesteps = 128, kernel = 5 ):
        super().__init__()
        self.n_features = n_features
        self.n_classes = n_classes
        self.n_timesteps = window_size
        self.kernel_size = kernel
        # with convolution
        self.conv1 = nn.Conv1d(in_channels=n_features, out_channels=64, kernel_size=self.kernel_size )
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=self.kernel_size )
        self.dropout = nn.Dropout(0)
        self.maxpool = nn.MaxPool1d(2)
        self.flatten = nn.Flatten()
        # Calculate the size of the input to the Linear layer
        input_size = 64 * ((self.n_timesteps - self.kernel_size*2) // 2)  # Adjust based on your specific input size
        self.fc1 = nn.Linear(input_size, 100)
        self.fc2 = nn.Linear(100, self.n_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


## Data 

In [113]:
data_path = 'C:/Users/khokhlovam/Documents/kotelnikov/data/data_lstm_test_complet.csv'
df =pd.read_csv(data_path, header=None, names=range(1564)) # 
df.head(15)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1554,1555,1556,1557,1558,1559,1560,1561,1562,1563
0,control,4,freq,17.4,11.2,30.5,19.2,8.8,22.9,6.9,...,,,,,,,,,,
1,control,4,flash,1.0092,1.5232,0.793,1.152,1.3376,1.0076,1.0074,...,,,,,,,,,,
2,control,4,bandwidth,7.7,3.6,19.0,7.0,2.6,10.7,3.9,...,,,,,,,,,,
3,control,4,timeSignal,0.098,0.238,0.256,0.528,0.536,0.802,0.85,...,,,,,,,,,,
4,control,4,time_sincelast,0.098,0.14,0.018,0.272,0.008,0.266,0.048,...,,,,,,,,,,
5,control,4,ampl,115.189682,181.916809,107.035546,113.025248,204.781645,459.905485,206.880764,...,,,,,,,,,,
6,control,4,duratSec,0.058,0.136,0.026,0.06,0.152,0.044,0.146,...,,,,,,,,,,
7,control,4,frac,0.442529,0.321429,0.622951,0.364583,0.295455,0.467249,0.565217,...,,,,,,,,,,
8,control,4,phase,-0.176576,1.975381,-0.215273,0.658635,0.088833,-0.061623,-2.434454,...,,,,,,,,,,
9,control,6,freq,36.8,19.2,26.4,15.8,22.0,21.1,10.6,...,,,,,,,,,,


### Pre-processing
We need to clean the data (remove NaNs, and take each time only 128 dimensionalities sub-sequences), and then one sample per patient will be:

[9, 128]

NB: one of the features, time, can be removed, but so far I am leaving everything in for simplicity.

In [116]:
# helper script to cut the data into sub-parts:
def cut_and_store(df, N, save_folder):
    '''
    df : dataset with expected structure
    N (int): sequence to take length
    save_folder: folder to save the dataset
    '''
    # prepare a PD frame 
    new_df = pd.DataFrame(columns=['file_name', 'label', 'patient_id'])  
    for i in range(0,len(df),9): # go for each patient 
        # for each patient, make csv files     
        label = df.iloc[[i]][0].values[0] 
        patient_id = df.iloc[[i]][1].values[0] 
        val_range = df.iloc[[i]].values.tolist()[0]
        valid_indexes = [i for i in range(3,len(val_range)) if str(val_range[i]) !='nan']
        current_feat =  df.iloc[[i]].values.tolist()[0]
        current_feat_clean = [current_feat[x] for x in valid_indexes]
        num_wavetrains = len(current_feat_clean)   #update once based on frequency, overwritten next      
        for k in range(math.floor(num_wavetrains/N)-1): # for each window
            print(f'Patient {patient_id}, subset {k}, label {label}')            
            with open(save_folder + f'/{i}_{k}.csv', 'w', newline='') as myfile:
                wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
                for j in range(i, i+9): # all features used
                   current_feat =  df.iloc[[j]].values.tolist()[0]
                   current_feat_clean = [current_feat[x] for x in valid_indexes]
                   wr.writerow(current_feat_clean[k:k+N])
                   new_df.loc[len(new_df.index)] = [f'{i}_{k}.csv',label, patient_id] 
    # save for dataloader
    new_df.to_csv(save_folder+f'/all_data_{N}.csv')  


In [117]:
cut_and_store(df, 128, 'C:/Users/khokhlovam/Documents/kotelnikov/data/lstm_trials/n128_9_features')

Patient 4, subset 0, label control
Patient 4, subset 1, label control
Patient 4, subset 2, label control
Patient 4, subset 3, label control
Patient 4, subset 4, label control
Patient 4, subset 5, label control
Patient 4, subset 6, label control
Patient 4, subset 7, label control
Patient 4, subset 8, label control
Patient 6, subset 0, label control
Patient 6, subset 1, label control
Patient 6, subset 2, label control
Patient 6, subset 3, label control
Patient 6, subset 4, label control
Patient 6, subset 5, label control
Patient 6, subset 6, label control
Patient 6, subset 7, label control
Patient 3, subset 0, label control
Patient 3, subset 1, label control
Patient 3, subset 2, label control
Patient 3, subset 3, label control
Patient 3, subset 4, label control
Patient 3, subset 5, label control
Patient 3, subset 6, label control
Patient 3, subset 7, label control
Patient 0, subset 0, label control
Patient 0, subset 1, label control
Patient 0, subset 2, label control
Patient 0, subset 3,

In [130]:
from torch.utils.data import Dataset, DataLoader
class PDControlDataset(Dataset):
    """Neurogenertive features dataset."""

    def __init__(self, csv_file, root_dir):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
           
        """
        self.df = pd.read_csv(csv_file) # 
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        file_path = os.path.join(self.root_dir,
                                self.df.iloc[idx, 1])
        label = self.convert_label(self.df.iloc[idx, 2])
        features  = pd.read_csv(file_path,  header=None).values
        sample = {'data': features, 'label': label}


        return sample
    
    def convert_label(self, label):
        if label == 'control':
            return 0
        elif label == 'PDL':
            return 1
        elif label == 'PDR':
            return 1
        elif label == 'ET':
            return 2    
        else:
            raise Exception('ONLY control, PDL, PDR en ET are currently supported')

In [142]:
folder_p = 'C:/Users/khokhlovam/Documents/kotelnikov/data/lstm_trials/n128_9_features/'              
dataset = PDControlDataset(folder_p +'all_data_128.csv', root_dir=folder_p)

In [134]:
# check data statistics
labels = [0]*len(dataset)
for i in range(len(dataset)):
    sample = dataset[i]
    labels[i] = sample['label']
values, counts = np.unique(labels, return_counts=True)
print(values, counts)

[0 1 2] [ 612 1080  945]


In [150]:
for i in range(len(dataset)):
    sample = dataset[i]
    labels[i] = sample['label']
    data = sample['data']
    print(data.shape)
    break

(9, 128)


In [153]:
dataloader = DataLoader(dataset, batch_size=1,shuffle=True)  
for sample in dataloader:
    print(sample['data'].shape)
    break

torch.Size([1, 9, 128])


## Training

In [None]:
n_features = 9 #
n_classes = 3
batch_size = 4
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = ConvNet1D(n_features = 9, n_classes = 3, n_timesteps = 128)
# opt and loss
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
# Number of training epochs
num_epochs = 35
# data
dataloader = DataLoader(dataset, batch_size=batch_size,shuffle=True)  

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    total_loss = 0.0
    total_predictions = []
    total_labels = []
    for batch in dataloader:  # Use your data loader
        batch_data = batch['data'].float().to(device)
        batch_labels = batch['label'].long().to(device)
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(batch_data)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        total_predictions += outputs.argmax(dim=1).cpu().numpy().tolist()
        total_labels += batch_labels.cpu().numpy().tolist()

    # Calculate and print the average loss for this epoch
    average_loss = total_loss / len(dataloader)
    
    # Calculate accuracy
    accuracy = accuracy_score(total_labels, total_predictions)
    
    print(f'Epoch [{epoch+1}/{num_epochs}] - Loss: {average_loss:.4f} - Accuracy: {accuracy:.4f}')


# After training, you can save the model
torch.save(model.state_dict(), '35_epoch.pth')

Epoch [1/35] - Loss: 1.1933 - Accuracy: 0.3587
Epoch [2/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [3/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [4/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [5/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [6/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [7/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [8/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [9/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [10/35] - Loss: 1.1924 - Accuracy: 0.3584
Epoch [11/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [12/35] - Loss: 1.1924 - Accuracy: 0.3584
Epoch [13/35] - Loss: 1.1924 - Accuracy: 0.3584
Epoch [14/35] - Loss: 1.1924 - Accuracy: 0.3584
Epoch [15/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [16/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [17/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [18/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [19/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [20/35] - Loss: 1.1935 - Accuracy: 0.3584
Epoch [21/35] - Loss: 1.1935 - Accuracy: 0.3584
E

## Verification of the model based on Keras model from the author

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1d_6 (Conv1D)            (None, 126, 64)           1792      
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 124, 64)           12352     
_________________________________________________________________
dropout_2 (Dropout)          (None, 124, 64)           0         
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 62, 64)            0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 3968)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               396900    
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 202       
=================================================================
Total params: 411,246
Trainable params: 411,246
Non-trainable params: 0


In [91]:
# torch.Size([64, 64, 126])
# torch.Size([64, 64, 62])
# torch.Size([64, 3968])
# torch.Size([64, 100])
# torch.Size([64, 2])

print(model)

ConvNet1D(
  (conv1): Conv1d(1, 64, kernel_size=(3,), stride=(1,))
  (relu): ReLU()
  (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,))
  (dropout): Dropout(p=0.5, inplace=False)
  (maxpool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=3968, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
)


In [None]:
class AttentionLayer(nn.Module):
    def __init__(self):
        super(AttentionLayer, self).__init__()

    def forward(self, x):
        # Calculate attention weights (assuming x is the output from the previous layer)
        attention_weights = torch.softmax(x, dim=1)
        # Apply attention to the input
        attended_input = torch.sum(x * attention_weights, dim=1)
        return attended_input

In [None]:
class CustomModel(nn.Module):
    def __init__(self, n_timesteps, n_features, n_outputs):
        super(CustomModel, self).__init()
        self.conv1 = nn.Conv1d(in_channels=n_features, out_channels=64, kernel_size=3)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3)
        self.dropout = nn.Dropout(0.5)
        self.maxpool = nn.MaxPool1d(2)
        self.flatten = nn.Flatten()
        input_size = 64 * ((n_timesteps - 4) // 2)
        self.fc1 = nn.Linear(input_size, 100)
        self.attention = AttentionLayer()  # Add the attention layer
        self.fc2 = nn.Linear(100, n_outputs)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.attention(x)  # Apply attention
        x = self.fc2(x)
        x = self.softmax(x)
        return x

# Example input shape
n_timesteps = 128
n_features = 1
n_outputs = 10

model = CustomModel(n_timesteps, n_features, n_outputs)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
# In this example, an AttentionLayer is added before the last fully connected layer (fc2). The attention mechanism applies attention weights to the output of the previous layer and calculates a weighted sum, which is then passed to the last layer. You can customize the attention mechanism further based on your specific requirements.





