In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import os
import glob
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from pathlib import Path

In [2]:


class TimeSeriesDataset(Dataset):
    def __init__(self, x_set, y_set, seq_len):
        self.x, self.y = x_set, y_set
        self.seq_len = seq_len

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.seq_len)))


    def __getitem__(self, idx):
        start_idx = idx * self.seq_len
        end_idx = (idx + 1) * self.seq_len

        batch_x = self.x[start_idx:end_idx]
        batch_y = self.y[start_idx:end_idx]

        # Count the occurrences of each row
        unique_rows, counts = np.unique(batch_y, axis=0, return_counts=True)

        # Get the index of the row with the highest count
        most_common_row_index = np.argmax(counts)

        # Get the most common row
        most_common_row = unique_rows[most_common_row_index]
        
        
        batch_y_bin = most_common_row

        # Convert NumPy arrays to PyTorch tensors
        batch_x = torch.from_numpy(batch_x)
        batch_y = torch.from_numpy(batch_y)
        batch_y_bin = torch.from_numpy(batch_y_bin)

        # Pad sequences to ensure they have the same length within the batch
        pad_len = self.seq_len - batch_x.shape[0]
        if pad_len > 0:
            pad_shape = (pad_len,) + batch_x.shape[1:]
            pad_shape_y = (pad_len,) + batch_y.shape[1:]

            batch_x = torch.cat([batch_x, torch.zeros(pad_shape)], dim=0)
            batch_y = torch.cat([batch_y, torch.zeros(pad_shape_y)], dim=0)

        return batch_x, batch_y, batch_y_bin

    def on_epoch_end(self):
        indices = np.arange(len(self.x))
        np.random.shuffle(indices)
        self.x = self.x[indices]
        self.y = self.y[indices]


In [3]:
# target output size of 5

class GlobalMaxPooling1D(nn.Module):

    def __init__(self, data_format='channels_last'):
        super(GlobalMaxPooling1D, self).__init__()
        self.data_format = data_format
        self.step_axis = 1 if self.data_format == 'channels_last' else 2

    def forward(self, input):
        return torch.max(input, axis=self.step_axis).values
    

m = GlobalMaxPooling1D()
input = torch.randn(1, 64, 8)
output = m(input)
print(output.shape)



torch.Size([1, 8])


In [4]:

class TransformerModel(nn.Module):
    
    def __init__(self, input_dim, output_dim, d_model, nhead, num_layers, dropout=0.1):
        super().__init__()
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout), num_layers=num_layers
        )
        
        self.max_pool = GlobalMaxPooling1D()
        self.fc = nn.Linear(input_dim, d_model)
        self.out = nn.Linear(d_model, output_dim)
        
    def forward(self, x):
        x = self.fc(x)
        
        x = self.max_pool(x)
  
        x = self.transformer(x)

        x = self.out(x)

        return x
    


In [5]:
def generate_data(subject_id, task, features, batch_size, seq_len):    
    
    csv_path = './ProcessedDatasets/' + task
    # csv_path = './Dataset'
    
    csv_files = glob.glob(csv_path + "/*.csv")
    
    train_df_list = []
    test_df_list = []
    
    for file in csv_files:
        if(subject_id in file):
            test_df_list.append(pd.read_csv(file))
#             print(file)
        else:
            train_df_list.append(pd.read_csv(file))
            

    print('Train Subject Trials: ',len(train_df_list))
    print('Test Subject Trials: ',len(test_df_list))
    
    # Concatenate all DataFrames
    train_df   = pd.concat(train_df_list, ignore_index=True)
    test_df   = pd.concat(test_df_list, ignore_index=True)

    
    lb = preprocessing.LabelBinarizer()

    train_labels= train_df.pop('label')
    train_features = train_df

    test_labels= test_df.pop('label')
    test_features = test_df


    all_class_names = ["G1", 'G2', 'G3', 'G4', 'G5', 'G6', 'G8', 'G9', 'G10', 'G11', 'G12', 'G13', 'G14', 'G15']
    lb.fit(all_class_names)

    train_labels = lb.transform(train_labels)
    test_labels = lb.transform(test_labels)
    
    train_x = train_features.to_numpy()
    train_y = train_labels

    test_x = test_features.to_numpy()
    test_y = test_labels
    
    train_x = train_x[:,:features]
    test_x = test_x[:,:features]
    

    train_dataset = TimeSeriesDataset(train_x, train_y, seq_len)
    test_dataset = TimeSeriesDataset(test_x, test_y, seq_len)
    
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    return train_dataloader, test_dataloader
 
    
    
features = 10
batch_size = 30
seq_len = 30
output_dim = 14

train_dataloader, test_dataloader = generate_data("S02","Knot_Tying",features, batch_size, seq_len)

for idx,batch in enumerate(train_dataloader):
    
    print(idx, batch[0].shape ,batch[1].shape ,batch[2].shape)
    break

Train Subject Trials:  32
Test Subject Trials:  4
0 torch.Size([30, 30, 10]) torch.Size([30, 30, 14]) torch.Size([30, 14])


In [6]:
print(torch.__version__)

2.0.1+cu117


In [7]:



def train_loop(dataloader,model,optimizer,criterion, epochs):
    
    # training loop
    for epoch in range(epochs):
        running_loss = 0.0
        for i, batch in enumerate(dataloader):
            optimizer.zero_grad()
            x, y, y_seq = batch
            x = x.to(torch.float32)
            y = y.to(torch.float32)
            y_seq = y_seq.to(torch.float32)
            
            y_pred = model(x)

            # print(y_pred.shape, y_seq.shape)
            
            loss = criterion(y_pred, y_seq)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(dataloader):.6f}")


In [8]:
def evaluation_loop(dataloader, model,criterion):

    model.eval()

    total_accuracy = []
    for i, batch in enumerate(test_dataloader):
        x, y, y_seq = batch
        x = x.to(torch.float32)
        y = y.to(torch.float32)
        y_seq = y_seq.to(torch.float32)
        
        y_pred = model(x)
        
        total_inputs = 0
        true_pred = []
        
        
        for idx,y in enumerate(y_pred):
            
            total_inputs += 1
            
            output_argmax = torch.argmax(y)
            gt_argmax = torch.argmax(y_seq[idx])
        
            if(output_argmax == gt_argmax):
                true_pred.append(output_argmax)
                
            accuracy = len(true_pred)/total_inputs
            
            # print("Accuracy: ",accuracy)
            total_accuracy.append(accuracy)
            
            
                
        loss = criterion(y_pred, y_seq)
        # print(i, "Loss: ", loss)
        
    avg_accuracy = np.average(total_accuracy)
    print("Average accuracy: ", avg_accuracy)
    return avg_accuracy
    
    

In [9]:
features = 22
batch_size = 30
seq_len = 10
output_dim = 14

task = "Knot_Tying"

epochs = 100

# best config yet: d_model = 64,  nhead=4, num_layers=2
d_model = 64
nhead=4
num_layers=2

model = TransformerModel(input_dim=features, output_dim=output_dim, d_model=d_model, nhead=nhead, num_layers=num_layers)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

subjects = ['S02','S03','S04','S05','S06','S07','S08','S09']

accuracy = []
for subject in subjects:
    
    train_dataloader, test_dataloader = generate_data(subject,task,features, batch_size, seq_len)

    train_loop(dataloader=train_dataloader, model=model, optimizer=optimizer, criterion=criterion, epochs=epochs)
    
    acc = evaluation_loop(dataloader=test_dataloader, model=model, criterion=criterion)
    
    accuracy.append({'subject':subject, 'accuracy':acc})

print(accuracy)




Train Subject Trials:  32
Test Subject Trials:  4
Epoch 1, Loss: 1.673736
Epoch 2, Loss: 1.424627
Epoch 3, Loss: 1.292166
Epoch 4, Loss: 1.177386
Epoch 5, Loss: 1.086739
Epoch 6, Loss: 1.003562
Epoch 7, Loss: 0.947532
Epoch 8, Loss: 0.882792
Epoch 9, Loss: 0.835498
Epoch 10, Loss: 0.801189
Epoch 11, Loss: 0.751390
Epoch 12, Loss: 0.722252
Epoch 13, Loss: 0.682676
Epoch 14, Loss: 0.645591
Epoch 15, Loss: 0.612214
Epoch 16, Loss: 0.589057
Epoch 17, Loss: 0.555386
Epoch 18, Loss: 0.530337
Epoch 19, Loss: 0.501017
Epoch 20, Loss: 0.480705
Epoch 21, Loss: 0.468379
Epoch 22, Loss: 0.465215
Epoch 23, Loss: 0.432521
Epoch 24, Loss: 0.399681
Epoch 25, Loss: 0.379789
Epoch 26, Loss: 0.361954
Epoch 27, Loss: 0.343322
Epoch 28, Loss: 0.342154
Epoch 29, Loss: 0.339299
Epoch 30, Loss: 0.310688
Epoch 31, Loss: 0.290178
Epoch 32, Loss: 0.274740
Epoch 33, Loss: 0.268666
Epoch 34, Loss: 0.258436
Epoch 35, Loss: 0.233461
Epoch 36, Loss: 0.251513
Epoch 37, Loss: 0.244225
Epoch 38, Loss: 0.230365
Epoch 39,

KeyboardInterrupt: 

In [77]:
# 2. Create model save path 
MODEL_PATH = "./checkpoints/"
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_NAME = str(d_model) + "_" + str(nhead) + "_" + str(num_layers) + "_" + MODEL_NAME
MODEL_SAVE_PATH = MODEL_PATH +"/"+ MODEL_NAME



# 3. Save the model state dict 
print(f"Saving model to: {MODEL_SAVE_PATH}")

torch.save(obj=model.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f=MODEL_SAVE_PATH) 


print("done saving!")

Saving model to: ./checkpoints//64_4_2_01_pytorch_workflow_model_0.pth
done saving!


In [94]:
def evaluation_loop():

    model.eval()

    total_accuracy = []
    for i, batch in enumerate(test_dataloader):
        x, y, y_seq = batch
        x = x.to(torch.float32)
        y = y.to(torch.float32)
        y_seq = y_seq.to(torch.float32)
        
        y_pred = model(x)
        
        total_inputs = 0
        true_pred = []
        
        
        for idx,y in enumerate(y_pred):
            
            total_inputs += 1
            
            output_argmax = torch.argmax(y)
            gt_argmax = torch.argmax(y_seq[idx])
        
            if(output_argmax == gt_argmax):
                true_pred.append(output_argmax)
                
            accuracy = len(true_pred)/total_inputs
            
            # print("Accuracy: ",accuracy)
            total_accuracy.append(accuracy)
            
            
                
        loss = criterion(y_pred, y_seq)
        # print(i, "Loss: ", loss)
        
    print("Average accuracy: ", np.average(total_accuracy))
    
    
evaluation_loop()

Average accuracy:  0.28562257379625866
