In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn
import matplotlib.pyplot as plt
from sklearn.model_selection import ShuffleSplit
import os
# device = torch.device("cuda:0")
device = torch.device("cpu")

In [2]:
data_path = '../data/Training Data/Normal'
normal_subj_files = os.listdir(data_path)

df_list = list()
for idx, fname in enumerate(normal_subj_files):
    df_temp = pd.read_csv(f'{data_path}/{normal_subj_files[0]}', index_col=0)
    df_temp['subj'] = np.repeat(idx, len(df_temp))

    df_list.append(df_temp)
df = pd.concat(df_list)

In [3]:
class model_ann(nn.Module):
    def __init__(self, input_size, output_size, layer_size):
        super(model_ann, self).__init__()
        self.input_size,  self.layer_size, self.output_size = input_size, layer_size, output_size

        #List layer sizes
        self.layer_hidden = np.concatenate([[input_size], layer_size, [output_size]])
        
        #Compile layers into lists
        self.layer_list = nn.ModuleList(
            [nn.Linear(in_features=self.layer_hidden[idx], out_features=self.layer_hidden[idx+1]) for idx in range(len(self.layer_hidden)-1)] )        
 
    def forward(self, x):
        #Encoding step
        for idx in range(len(self.layer_list)):
            x = torch.tanh(self.layer_list[idx](x))

        return x

class model_ann_autoencoder(nn.Module):
    def __init__(self, input_size, output_size, encoder_layer_size, decoder_layer_size, bottleneck = 10):
        super(model_ann_autoencoder, self).__init__()
        self.input_size, self.output_size = input_size, output_size
        self.encoder_layer_size, self.decoder_layer_size = encoder_layer_size, decoder_layer_size

        self.encoder = model_ann(input_size, bottleneck, layer_size=self.encoder_layer_size)
        self.decoder = model_ann(bottleneck, input_size, layer_size=self.decoder_layer_size)

    def forward(self, x):
        out = self.encoder(x)
        out = self.decoder(out)

        return out

In [4]:
class SEE_Dataset(torch.utils.data.Dataset):
     def __init__(self, cv_dict, fold, partition, df, device=device):
          self.cv_dict = cv_dict
          self.fold = fold
          self.partition = partition
          self.df = df

          self.subj_idx = cv_dict[fold][partition]
          self.num_subj = len(self.subj_idx) 
          self.X_tensor = self.process_dfs(self.df)

     def __len__(self):
        #'Denotes the total number of samples'
        return self.num_subj

     def process_dfs(self, df):
          df_filtered = df[np.in1d(df['subj'], self.subj_idx)]
          return torch.tensor(df_filtered.values).to(device)
          
     def __getitem__(self, slice_index):
          return self.X_tensor[slice_index]

In [5]:
num_subjects = len(normal_subj_files)
cv_split = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)
val_split = ShuffleSplit(n_splits=1, test_size=.25, random_state=0)
cv_dict = {}
for fold, (train_val_idx, test_idx) in enumerate(cv_split.split(np.arange(num_subjects))):
    for t_idx, v_idx in val_split.split(train_val_idx): #No looping, just used to split train/validation sets
        cv_dict[fold] = {'train_idx':train_val_idx[t_idx], 
                         'test_idx':test_idx, 
                         'validation_idx':train_val_idx[v_idx]} 

In [6]:
batch_size = 256
num_cores = 1

train_params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': num_cores, 'pin_memory':False}
train_eval_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': num_cores, 'pin_memory':False}
validation_params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': num_cores, 'pin_memory':False}
test_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': num_cores, 'pin_memory':False}


In [37]:
for batch_x in training_generator:
    pass

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/users/ntolley/.conda/envs/metanets/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/users/ntolley/.conda/envs/metanets/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/users/ntolley/.conda/envs/metanets/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_1509750/1548013941.py", line 21, in __getitem__
    return self.X_tensor[slice_index]
RuntimeError: CUDA error: initialization error
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.



In [7]:
fold = 0

# Generators
training_set = SEE_Dataset(cv_dict, fold, 'train_idx', df)

training_generator = torch.utils.data.DataLoader(training_set, **train_params)
training_eval_generator = torch.utils.data.DataLoader(training_set, **train_eval_params)

validation_set = SEE_Dataset(cv_dict, fold, 'validation_idx', df)
validation_generator = torch.utils.data.DataLoader(validation_set, **validation_params)

testing_set = SEE_Dataset(cv_dict, fold, 'test_idx', df)
testing_generator = torch.utils.data.DataLoader(testing_set, **test_params)

data_arrays = (training_set, validation_set, testing_set)
generators = (training_generator, training_eval_generator, validation_generator, testing_generator)



In [21]:
#Helper function to pytorch train networks for decoding
def train_validate_model(model, optimizer, criterion, max_epochs, training_generator, validation_generator, device, print_freq=10, early_stop=20):
    train_loss_array = []
    validation_loss_array = []
    # Loop over epochs
    min_validation_loss, min_validation_std, min_validation_counter, min_validation_epoch = np.inf, np.inf, 0, 0
    for epoch in range(max_epochs):
        #___Train model___
        model.train()
        train_batch_loss = []
        validation_batch_loss = []
        for batch_x in training_generator:
            optimizer.zero_grad() # Clears existing gradients from previous epoch
            batch_x = batch_x.float().to(device)

            output = model(batch_x)
            train_loss = criterion(output, batch_x)
            train_loss.backward() # Does backpropagation and calculates gradients
            optimizer.step() # Updates the weights accordingly

            train_batch_loss.append(train_loss.item())
        
        train_loss_array.append(train_batch_loss)

        #___Evaluate Model___
        with torch.no_grad():
            model.eval()
            #Generate train set predictions
            for batch_x in validation_generator:
                batch_x = batch_x.float().to(device)

                output = model(batch_x)
                validation_loss = criterion(output, batch_x)

                validation_batch_loss.append(validation_loss.item())

        validation_loss_array.append(validation_batch_loss)

        #Compute average loss on batch
        train_epoch_loss = np.mean(train_batch_loss)
        train_epoch_std = np.std(train_batch_loss)
        validation_epoch_loss = np.mean(validation_batch_loss)
        validation_epoch_std = np.std(validation_batch_loss)

       #Check if validation loss reaches minimum 
        if validation_epoch_loss < min_validation_loss:
            print('*',end='')
            min_validation_loss = np.copy(validation_epoch_loss)
            min_validation_std = np.copy(validation_epoch_std)
            min_validation_counter = 0
            min_validation_epoch = np.copy(epoch+1)

            min_train_loss = np.copy(train_epoch_loss)
            min_train_std = np.copy(train_epoch_std)
            
        else:
            print('.',end='')
            min_validation_counter += 1

        #Print Loss Scores
        if (epoch+1)%print_freq == 0:
            print('')
            print('Epoch: {}/{} ...'.format(epoch+1, max_epochs), end=' ')
            print('Train Loss: {:.4f}  ... Validation Loss: {:.4f}'.format(train_epoch_loss,validation_epoch_loss))
        
        #Early stop if no validation improvement over set number of epochs
        if min_validation_counter > early_stop:
            print(' Early Stop; Min Epoch: {}'.format(min_validation_epoch))
            break

    loss_dict = {'min_validation_loss':min_validation_loss, 'min_validation_std':min_validation_std,'min_validation_epoch':min_validation_epoch, 
    'min_train_loss':min_train_loss, 'min_train_std':min_train_std,
    'train_loss_array':train_loss_array, 'validation_loss_array':validation_loss_array, 'max_epochs':max_epochs}
    return loss_dict

In [25]:
#Define hyperparameters
lr = 1e-1
weight_decay = 1e-4
max_epochs = 1000
input_size = training_set[:2].shape[1]
criterion = nn.MSELoss()

bottleneck = 10

encoder_layer_size = [100, 50, 20]
decoder_layer_size = [10, 50, 100]

model = model_ann_autoencoder(input_size=input_size, output_size=input_size,
                              encoder_layer_size=encoder_layer_size, decoder_layer_size=decoder_layer_size,
                              bottleneck=bottleneck).to(device)

# Define Loss, Optimizerints h
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

#Train model
loss_dict = train_validate_model(model, optimizer, criterion, max_epochs, training_generator, validation_generator, device, 10, 5)

*.*******.
Epoch: 10/1000 ... Train Loss: 279.4181  ... Validation Loss: 215.7805
..... Early Stop; Min Epoch: 9
