In [1]:
## Import dependencies

import numpy as np
from os import path
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import time


# Set default plot size
plt.rcParams["figure.figsize"] = (30,20)

# Define number of epochs used later in training
num_epochs = 1000

# Identification part of the filenames
base_name = 'MimicTissueRange'

## Train Transformer on dataset of 44 metabolites

In [2]:
# Name variable used for saving model metrics, name should reflect model used, dataset used, and other information such as # of epochs
ModelName = f"Transformer_44met_{base_name}Dist_TrainingAndValidation_ForManuscript_" + str(num_epochs) +"ep"

# Set the random seed
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics/') 
seed = 1
torch.manual_seed(seed)
np.save(ModelName + "_Seed.npy", seed)

In [3]:
## Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load training data and max value from testing and training datasets
spectra = np.load(f'Dataset44_{base_name}_ForManuscript_Spec.npy')
conc1 = np.load(f'Dataset44_{base_name}_ForManuscript_Conc.npy')

In [4]:
## Prepare to switch data from CPU to GPU

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    device = torch.device("cuda")          # A CUDA device object
    print("Using GPU for training.")
else:
    device = torch.device("cpu")           # A CPU object
    print("CUDA is not available. Using CPU for training.")

Using GPU for training.


In [5]:
## Set up data for testing and training

# Split into testing and training data
X_train, X_test, y_train, y_test = train_test_split(spectra, conc1, test_size = 0.2, random_state = 1)

# Tensorize and prepare datasets
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()


# Move the input data to the GPU device
X_train = X_train.to(device)
X_test = X_test.to(device)
#spectraVal = torch.tensor(spectraVal).float().to(device)   # Confusing names, these spectra are the 5000 spectra generated like the training dataset
#ValSpectra = torch.tensor(ValSpectra).float().to(device)   # Confusing names, these spectra are the 10 representative example spectra

# Move the target data to the GPU device
y_train = y_train.to(device)
y_test = y_test.to(device)
#concVal = torch.tensor(concVal).float().to(device)
#ValConc = torch.tensor(ValConc).float().to(device)

# More data prep?
datasets = torch.utils.data.TensorDataset(X_train, y_train)
Test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
train_iter = torch.utils.data.DataLoader(datasets, batch_size = 76, shuffle=True)
test_iter = torch.utils.data.DataLoader(Test_datasets, batch_size = 76, shuffle=True)

In [6]:
del X_train
del X_test
del y_train
del y_test
del spectra
del conc1
del datasets
del Test_datasets

In [7]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class Transformer(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(Transformer, self).__init__()
        self.input_dim = input_dim
        self.d_model = d_model
        self.embedding = nn.Linear(input_dim, d_model)
        self.positional_encoding = PositionalEncoding(d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.decoder = nn.Linear(23552, 44)

    def forward(self, x):
        # Binning
        batch_size, seq_length = x.size()
        num_bins = seq_length // self.input_dim
        x = x.view(batch_size, num_bins, self.input_dim)  # (batch_size, num_bins, input_dim)
        
        # Embedding
        x = self.embedding(x)  # (batch_size, num_bins, d_model)
        
        # Add positional encoding
        x = self.positional_encoding(x)
        
        # Transformer Encoder
        x = x.permute(1, 0, 2)  # (num_bins, batch_size, d_model)
        x = self.transformer_encoder(x)  # (num_bins, batch_size, d_model)
        x = x.permute(1, 0, 2)  # (batch_size, num_bins, d_model)
        
        # Reconstruct original sequence
        x = x.reshape(batch_size, num_bins * d_model)
        
        # Decoding
        x = self.decoder(x)  # (batch_size, output_dim)
        
        return x

# Parameters
input_dim = 1000  # Size of each bin
d_model = 512     # Embedding dimension
nhead = 1         # Number of attention heads
num_encoder_layers = 1  # Number of transformer encoder layers
dim_feedforward = 2048  # Feedforward dimension
dropout = 0.0     # Dropout rate


In [8]:
class RelativeAbsoluteError(nn.Module):
    def __init__(self):
        super(RelativeAbsoluteError, self).__init__()

    def forward(self, y_pred, y_true):
        # Compute the mean of the true values
        y_mean = torch.mean(y_true)
        
        # Compute the absolute differences
        absolute_errors = torch.abs(y_true - y_pred)
        mean_absolute_errors = torch.abs(y_true - y_mean)
        
        # Compute RAE
        rae = torch.sum(absolute_errors) / torch.sum(mean_absolute_errors)
        return rae

In [9]:
def train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path):
    criterion = RelativeAbsoluteError()
    optimizer = optim.AdamW(model.parameters(), lr=0.00014656680844397094, weight_decay=0.01)
    
    
    train_losses = []
    test_losses = []
    best_test_loss = float('inf')
    epochs_no_improve = 0
    patience = 50  # Set how many epochs without improvement in validation loss constitutes early stopping

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
            test_losses.append(test_loss)
            
            
        if (epoch + 1) % 1 == 0:  # The last number here denotes how often to print loss metrics in terms of epochs
            print(f'Epoch [{epoch + 1}/{num_epochs}], '
                  f'Train Loss: {train_loss:.4f}, '
                  f'Test Loss: {test_loss:.4f}')
            
    
            
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            epochs_no_improve = 0
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_path)
        else:
            epochs_no_improve += 1
            
        if epochs_no_improve >= patience:
            print(f'Early stopping at epoch {epoch + 1}')
            break



    return train_losses, test_losses


def train_or_load_model(model, train_loader, test_loader, num_epochs, save_path):
    train_losses = []
    test_losses = []
    is_model_trained = False  # Initialize flag

    if os.path.isfile(save_path):
        print("Loading pretrained model from {}".format(save_path))
        checkpoint = torch.load(save_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = optim.Adam(model.parameters())  
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
    
    else:
        print("No pretrained model found. Training from scratch.")
        #optimizer = optim.Adam(model.parameters())  
        train_losses, test_losses = train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path)
        is_model_trained = True  # Set flag to True after training
        # Save losses per epoch
        np.save(ModelName + "_TrainLoss.npy", train_losses)
        np.save(ModelName + "_TestLoss.npy", test_losses)
    
    return train_losses, test_losses, is_model_trained  # Return the losses and flag

In [10]:
## Instantiate model and train

# For timing cell run time
start_time = time.time()


# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Create model
model_aq = Transformer(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout)

# Move the model to the GPU device
model_aq.to(device)

# Define the path to save and load the model parameters
save_path = ModelName + '_Params.pt'

# Call the function
train_losses, test_losses, is_model_trained = train_or_load_model(model_aq, train_iter, test_iter, num_epochs, save_path)


# Finish timing cell run time
end_time = time.time()
execution_time = end_time - start_time
if is_model_trained:
    np.save(ModelName + "_ExecutionTime.npy", execution_time)
    print("Execution time:", execution_time, "seconds")



No pretrained model found. Training from scratch.
Epoch [1/1000], Train Loss: 13836.4640, Test Loss: 3232.1946
Epoch [2/1000], Train Loss: 12731.1668, Test Loss: 3182.4869
Epoch [3/1000], Train Loss: 12542.5493, Test Loss: 3117.9344
Epoch [4/1000], Train Loss: 12392.9384, Test Loss: 3043.4357
Epoch [5/1000], Train Loss: 11510.6033, Test Loss: 2648.9709
Epoch [6/1000], Train Loss: 9227.8658, Test Loss: 2071.7111
Epoch [7/1000], Train Loss: 6465.2252, Test Loss: 1321.1304
Epoch [8/1000], Train Loss: 4396.8285, Test Loss: 931.4767
Epoch [9/1000], Train Loss: 3253.9212, Test Loss: 706.6391
Epoch [10/1000], Train Loss: 2648.0299, Test Loss: 603.9884
Epoch [11/1000], Train Loss: 2237.1674, Test Loss: 513.6633
Epoch [12/1000], Train Loss: 1939.0038, Test Loss: 456.5478
Epoch [13/1000], Train Loss: 1761.3367, Test Loss: 418.0207
Epoch [14/1000], Train Loss: 1609.7947, Test Loss: 383.3949
Epoch [15/1000], Train Loss: 1486.2968, Test Loss: 362.5640
Epoch [16/1000], Train Loss: 1400.6910, Test Lo

Epoch [138/1000], Train Loss: 509.9788, Test Loss: 132.2023
Epoch [139/1000], Train Loss: 515.9284, Test Loss: 137.2457
Epoch [140/1000], Train Loss: 514.6821, Test Loss: 138.1641
Epoch [141/1000], Train Loss: 509.8636, Test Loss: 132.0860
Epoch [142/1000], Train Loss: 502.3449, Test Loss: 131.5747
Epoch [143/1000], Train Loss: 508.4917, Test Loss: 130.4316
Epoch [144/1000], Train Loss: 511.9495, Test Loss: 138.0981
Epoch [145/1000], Train Loss: 510.1927, Test Loss: 139.5447
Epoch [146/1000], Train Loss: 501.9686, Test Loss: 131.9195
Epoch [147/1000], Train Loss: 499.0474, Test Loss: 131.4478
Epoch [148/1000], Train Loss: 502.6798, Test Loss: 131.3015
Epoch [149/1000], Train Loss: 496.7619, Test Loss: 129.6077
Epoch [150/1000], Train Loss: 496.9531, Test Loss: 132.9086
Epoch [151/1000], Train Loss: 500.5271, Test Loss: 130.5740
Epoch [152/1000], Train Loss: 496.5750, Test Loss: 132.6987
Epoch [153/1000], Train Loss: 502.6896, Test Loss: 137.4241
Epoch [154/1000], Train Loss: 493.8289, 

Epoch [275/1000], Train Loss: 388.5766, Test Loss: 106.7766
Epoch [276/1000], Train Loss: 397.8535, Test Loss: 109.6110
Epoch [277/1000], Train Loss: 390.0503, Test Loss: 107.7326
Epoch [278/1000], Train Loss: 388.1240, Test Loss: 104.5921
Epoch [279/1000], Train Loss: 389.1622, Test Loss: 106.6425
Epoch [280/1000], Train Loss: 390.2501, Test Loss: 105.2767
Epoch [281/1000], Train Loss: 384.7438, Test Loss: 102.6991
Epoch [282/1000], Train Loss: 388.0283, Test Loss: 104.4576
Epoch [283/1000], Train Loss: 389.2076, Test Loss: 107.9862
Epoch [284/1000], Train Loss: 389.3650, Test Loss: 106.1995
Epoch [285/1000], Train Loss: 385.9729, Test Loss: 107.2766
Epoch [286/1000], Train Loss: 384.9992, Test Loss: 105.1381
Epoch [287/1000], Train Loss: 385.5937, Test Loss: 105.4339
Epoch [288/1000], Train Loss: 384.5894, Test Loss: 102.9632
Epoch [289/1000], Train Loss: 381.4131, Test Loss: 104.0429
Epoch [290/1000], Train Loss: 383.8904, Test Loss: 106.2855
Epoch [291/1000], Train Loss: 379.9534, 

Epoch [413/1000], Train Loss: 331.0202, Test Loss: 92.7855
Epoch [414/1000], Train Loss: 336.7430, Test Loss: 91.5621
Epoch [415/1000], Train Loss: 333.1576, Test Loss: 92.6724
Epoch [416/1000], Train Loss: 328.0366, Test Loss: 93.0980
Epoch [417/1000], Train Loss: 331.7569, Test Loss: 92.0454
Epoch [418/1000], Train Loss: 330.3578, Test Loss: 98.5369
Epoch [419/1000], Train Loss: 328.5321, Test Loss: 91.3574
Epoch [420/1000], Train Loss: 330.0599, Test Loss: 93.7558
Epoch [421/1000], Train Loss: 331.9176, Test Loss: 91.6887
Epoch [422/1000], Train Loss: 326.8030, Test Loss: 92.4907
Epoch [423/1000], Train Loss: 330.4883, Test Loss: 92.0301
Epoch [424/1000], Train Loss: 329.7256, Test Loss: 93.3730
Epoch [425/1000], Train Loss: 325.9574, Test Loss: 91.8800
Epoch [426/1000], Train Loss: 329.4077, Test Loss: 93.7110
Epoch [427/1000], Train Loss: 330.6450, Test Loss: 91.2167
Epoch [428/1000], Train Loss: 326.7374, Test Loss: 89.6186
Epoch [429/1000], Train Loss: 326.6930, Test Loss: 95.35

Epoch [552/1000], Train Loss: 295.2943, Test Loss: 85.6771
Epoch [553/1000], Train Loss: 295.9494, Test Loss: 88.7717
Epoch [554/1000], Train Loss: 292.8998, Test Loss: 85.7404
Epoch [555/1000], Train Loss: 297.2839, Test Loss: 84.0512
Epoch [556/1000], Train Loss: 292.4745, Test Loss: 83.0622
Epoch [557/1000], Train Loss: 368.6809, Test Loss: 83.1892
Epoch [558/1000], Train Loss: 295.4824, Test Loss: 82.1068
Epoch [559/1000], Train Loss: 291.7710, Test Loss: 81.6899
Epoch [560/1000], Train Loss: 285.0770, Test Loss: 84.0581
Epoch [561/1000], Train Loss: 285.6394, Test Loss: 82.3943
Epoch [562/1000], Train Loss: 289.6228, Test Loss: 85.6859
Epoch [563/1000], Train Loss: 289.6583, Test Loss: 83.0140
Epoch [564/1000], Train Loss: 288.2956, Test Loss: 83.4297
Epoch [565/1000], Train Loss: 289.1333, Test Loss: 81.4023
Epoch [566/1000], Train Loss: 288.5280, Test Loss: 82.9174
Epoch [567/1000], Train Loss: 293.7944, Test Loss: 84.6688
Epoch [568/1000], Train Loss: 293.9603, Test Loss: 89.85

Epoch [691/1000], Train Loss: 307.3005, Test Loss: 79.0801
Epoch [692/1000], Train Loss: 263.8059, Test Loss: 79.9569
Epoch [693/1000], Train Loss: 264.2235, Test Loss: 77.2703
Epoch [694/1000], Train Loss: 265.3916, Test Loss: 77.8109
Epoch [695/1000], Train Loss: 278.5166, Test Loss: 77.5794
Epoch [696/1000], Train Loss: 266.2540, Test Loss: 76.9948
Epoch [697/1000], Train Loss: 265.8578, Test Loss: 76.3568
Epoch [698/1000], Train Loss: 267.8675, Test Loss: 79.1641
Epoch [699/1000], Train Loss: 265.7712, Test Loss: 78.2383
Epoch [700/1000], Train Loss: 266.5876, Test Loss: 78.6793
Epoch [701/1000], Train Loss: 272.8744, Test Loss: 84.3433
Epoch [702/1000], Train Loss: 270.1265, Test Loss: 77.2749
Epoch [703/1000], Train Loss: 267.6934, Test Loss: 77.7830
Epoch [704/1000], Train Loss: 269.4237, Test Loss: 78.3567
Epoch [705/1000], Train Loss: 270.8575, Test Loss: 76.2839
Epoch [706/1000], Train Loss: 266.2198, Test Loss: 77.4498
Epoch [707/1000], Train Loss: 264.4489, Test Loss: 76.46

In [11]:
np.array(test_losses).min()

73.32267376035452

In [12]:
## Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load validation dataset
spectraVal = np.load(f'Dataset44_{base_name}_ForManuscript_Val_Spec.npy')
concVal = np.load(f'Dataset44_{base_name}_ForManuscript_Val_Conc.npy')



# Load representative validation spectra and concentrations
# Load spectra of varied concentrations (all metabolites at X-mM from 0.005mm to 20mM)
ConcSpec = np.load(f'Concentration_44met_{base_name}_ForManuscript_Spec.npy')
ConcConc = np.load(f'Concentration_44met_{base_name}_ForManuscript_Conc.npy')  
#  Load uniform concentration distribution validation spectra
UniformSpec = np.load(f'UniformDist_44met_{base_name}_ForManuscript_Spec.npy')
UniformConc = np.load(f'UniformDist_44met_{base_name}_ForManuscript_Conc.npy')  
#  Load low concentration uniform concentration distribution validation spectra
LowUniformSpec = np.load(f'LowUniformDist_44met_{base_name}_ForManuscript_Spec.npy')
LowUniformConc = np.load(f'LowUniformDist_44met_{base_name}_ForManuscript_Conc.npy')
#  Load tissue mimicking concentration distribution validation spectra
MimicTissueRangeSpec = np.load(f'MimicTissueRange_44met_{base_name}_ForManuscript_Spec.npy')
MimicTissueRangeConc = np.load(f'MimicTissueRange_44met_{base_name}_ForManuscript_Conc.npy')
#  Load liver tissue mimicking concentration distribution (high relative glucose) validation spectra
MimicTissueRangeGlucSpec = np.load(f'MimicTissueRangeGluc_44met_{base_name}_ForManuscript_Spec.npy')
MimicTissueRangeGlucConc = np.load(f'MimicTissueRangeGluc_44met_{base_name}_ForManuscript_Conc.npy')
#  Load high dynamic range #2 validation spectra
HighDynamicRange2Spec = np.load(f'HighDynRange2_44met_{base_name}_ForManuscript_Spec.npy')
HighDynamicRange2Conc = np.load(f'HighDynRange2_44met_{base_name}_ForManuscript_Conc.npy') 
#  Load varied SNR validation spectra
SNR_Spec = np.load(f'SNR_44met_{base_name}_ForManuscript_Spec.npy')
SNR_Conc = np.load(f'SNR_44met_{base_name}_ForManuscript_Conc.npy')
#  Load random singlet validation spectra
Singlet_Spec = np.load(f'Singlet_44met_{base_name}_ForManuscript_Spec.npy')
Singlet_Conc = np.load(f'Singlet_44met_{base_name}_ForManuscript_Conc.npy')
#  Load random qref checker validation spectra
QrefSensSpec = np.load(f'QrefSensitivity_44met_{base_name}_ForManuscript_Spec.npy')
QrefSensConc = np.load(f'QrefSensitivity_44met_{base_name}_ForManuscript_Conc.npy')
#  Load other validation spectra
OtherValSpectra = np.load(f'OtherVal_44met_{base_name}_ForManuscript_Spec.npy')
OtherValConc = np.load(f'OtherVal_44met_{base_name}_ForManuscript_Conc.npy')



# Move the input data to the GPU device
spectraVal = torch.tensor(spectraVal).float().to(device)   
concVal = torch.tensor(concVal).float().to(device)
ConcSpec = torch.tensor(ConcSpec).float().to(device)   
ConcConc = torch.tensor(ConcConc).float().to(device)
UniformSpec = torch.tensor(UniformSpec).float().to(device)   
UniformConc = torch.tensor(UniformConc).float().to(device)
LowUniformSpec = torch.tensor(LowUniformSpec).float().to(device)   
LowUniformConc = torch.tensor(LowUniformConc).float().to(device)
MimicTissueRangeSpec = torch.tensor(MimicTissueRangeSpec).float().to(device)   
MimicTissueRangeConc = torch.tensor(MimicTissueRangeConc).float().to(device)
MimicTissueRangeGlucSpec = torch.tensor(MimicTissueRangeGlucSpec).float().to(device)   
MimicTissueRangeGlucConc = torch.tensor(MimicTissueRangeGlucConc).float().to(device)
HighDynamicRange2Spec = torch.tensor(HighDynamicRange2Spec).float().to(device)   
HighDynamicRange2Conc = torch.tensor(HighDynamicRange2Conc).float().to(device)
SNR_Spec = torch.tensor(SNR_Spec).float().to(device)   
SNR_Conc = torch.tensor(SNR_Conc).float().to(device)
Singlet_Spec = torch.tensor(Singlet_Spec).float().to(device)   
Singlet_Conc = torch.tensor(Singlet_Conc).float().to(device)
QrefSensSpec = torch.tensor(QrefSensSpec).float().to(device)   
QrefSensConc = torch.tensor(QrefSensConc).float().to(device)
OtherValSpectra = torch.tensor(OtherValSpectra).float().to(device)   
OtherValConc = torch.tensor(OtherValConc).float().to(device)

In [13]:
## Make sure best parameters are being utilized

# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Define the path where you saved your model parameters
save_path = ModelName + '_Params.pt'

# Load the entire dictionary from the saved file
checkpoint = torch.load(save_path)

# Instantiate the model
model_aq = Transformer(input_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout)

# Load the model's state dictionary from the loaded dictionary
model_aq.load_state_dict(checkpoint['model_state_dict'])

# Move the model to the GPU 
model_aq.to(device)



Transformer(
  (embedding): Linear(in_features=1000, out_features=512, bias=True)
  (positional_encoding): PositionalEncoding()
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.0, inplace=False)
        (dropout2): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (decoder): Linear(in_features=23552, out_features=44, bias=True)
)

In [14]:
Prediction = model_aq(spectraVal[0].unsqueeze(0))

Prediction

tensor([[2.3337, 0.9213, 0.0636, 0.3121, 0.0530, 0.6237, 0.0922, 0.4905, 1.1159,
         0.6209, 0.4462, 0.1481, 0.7735, 0.2407, 0.6769, 0.6654, 0.0361, 0.4316,
         0.0187, 0.2812, 0.1021, 0.7759, 0.0389, 1.1435, 0.9037, 0.1123, 0.1313,
         0.9291, 0.4156, 0.0120, 2.3224, 0.2581, 0.9490, 0.8404, 0.0444, 0.4894,
         0.7083, 0.1321, 1.8045, 0.1069, 5.3482, 0.0520, 2.6141, 0.0489]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [15]:
Prediction.detach().cpu().numpy()[0]

array([2.333733  , 0.9212808 , 0.06363979, 0.31206876, 0.05298594,
       0.6237164 , 0.09217171, 0.4905454 , 1.1158792 , 0.6209356 ,
       0.44618523, 0.1480578 , 0.7735436 , 0.24072139, 0.67694354,
       0.66536504, 0.03608605, 0.43159965, 0.01867029, 0.28118742,
       0.10209971, 0.77590173, 0.03888998, 1.143466  , 0.90365875,
       0.11231674, 0.13134061, 0.9290956 , 0.4156268 , 0.01203237,
       2.322426  , 0.25811413, 0.94901276, 0.84044015, 0.04440038,
       0.48938176, 0.7083239 , 0.13206197, 1.8044686 , 0.10688232,
       5.3482356 , 0.05198409, 2.6140697 , 0.04888411], dtype=float32)

In [16]:
## Compute absolute percent error statistics on validation set

APEs = []
MAPEs = []

for i in np.arange(5000):
    GroundTruth = concVal[i].cpu().numpy()  # Move GroundTruth tensor to CPU and convert to NumPy array
    model_aq.eval()
    Prediction = model_aq(spectraVal[i].unsqueeze(0))

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()[0]

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ValExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ValExamples_MAPEs.npy", np.array(MAPEs))



print('Overall MAPE: ',np.array(MAPEs).mean())


Overall MAPE:  14.815657159146923


In [17]:
## Compute absolute percent error statistics on concentration varied validation spectra

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = ConcConc[i]
    model_aq.eval()
    Prediction = model_aq(ConcSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ConcExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ConcExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Concentrations:",ConcConc[i][0].item())

Overall MAPE:  inf
--------------------
inf  - Concentrations: 0.0
65.4  - Concentrations: 0.004999999888241291
11.96  - Concentrations: 0.02500000037252903
3.09  - Concentrations: 0.10000000149011612
1.5  - Concentrations: 0.25
0.78  - Concentrations: 0.5
0.63  - Concentrations: 1.0
0.63  - Concentrations: 2.5
9.22  - Concentrations: 10.0
25.6  - Concentrations: 20.0


In [18]:
## Compute absolute percent error statistics on uniform distribution validation spectra

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = UniformConc[i]
    model_aq.eval()
    Prediction = model_aq(UniformSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "UniformExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "UniformExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Min Value:", np.round(UniformConc[i].min().item(),4), " - Mean Value:", np.round(UniformConc[i].mean().item(),1))

Overall MAPE:  15.570964
--------------------
11.07  - Min Value: 0.6783  - Mean Value: 9.2
19.53  - Min Value: 0.0096  - Mean Value: 10.3
32.27  - Min Value: 0.147  - Mean Value: 10.5
7.67  - Min Value: 0.5572  - Mean Value: 8.5
17.66  - Min Value: 1.3567  - Mean Value: 10.6
10.8  - Min Value: 0.6332  - Mean Value: 10.9
15.84  - Min Value: 0.7017  - Mean Value: 11.0
20.66  - Min Value: 0.3674  - Mean Value: 8.9
9.49  - Min Value: 0.8387  - Mean Value: 9.8
10.73  - Min Value: 1.0913  - Mean Value: 11.1


In [19]:
## Compute absolute percent error statistics on low concentration uniform distribution validation spectra

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = LowUniformConc[i]
    model_aq.eval()
    Prediction = model_aq(LowUniformSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "LowUniformExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "LowUniformExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Min Value:", np.round(LowUniformConc[i].min().item(),4), " - Mean Value:", np.round(LowUniformConc[i].mean().item(),1))

Overall MAPE:  4.450469
--------------------
3.22  - Min Value: 0.0111  - Mean Value: 0.1
4.44  - Min Value: 0.0103  - Mean Value: 0.1
2.65  - Min Value: 0.0153  - Mean Value: 0.1
5.2  - Min Value: 0.0117  - Mean Value: 0.1
4.86  - Min Value: 0.0089  - Mean Value: 0.1
4.96  - Min Value: 0.0075  - Mean Value: 0.1
4.13  - Min Value: 0.0117  - Mean Value: 0.1
4.23  - Min Value: 0.0052  - Mean Value: 0.1
7.44  - Min Value: 0.008  - Mean Value: 0.1
3.37  - Min Value: 0.0134  - Mean Value: 0.1


In [20]:
## Compute absolute percent error statistics on tissue mimicking distribution validation spectra

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = MimicTissueRangeConc[i]
    model_aq.eval()
    Prediction = model_aq(MimicTissueRangeSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "MimicTissueRangeExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "MimicTissueRangeExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Min Value:", np.round(MimicTissueRangeConc[i].min().item(),4), " - Mean Value:", np.round(MimicTissueRangeConc[i].mean().item(),1))

Overall MAPE:  10.067224
--------------------
25.9  - Min Value: 0.008  - Mean Value: 0.8
17.37  - Min Value: 0.009  - Mean Value: 0.9
15.87  - Min Value: 0.0138  - Mean Value: 1.5
3.89  - Min Value: 0.0107  - Mean Value: 0.7
5.35  - Min Value: 0.0191  - Mean Value: 0.7
7.56  - Min Value: 0.0186  - Mean Value: 0.8
5.08  - Min Value: 0.0175  - Mean Value: 0.8
5.86  - Min Value: 0.0238  - Mean Value: 1.3
4.82  - Min Value: 0.0168  - Mean Value: 0.7
8.98  - Min Value: 0.0171  - Mean Value: 0.9


In [21]:
## Compute absolute percent error statistics on tissue mimicking distribution validation spectra (high relative glucose concentration)

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = MimicTissueRangeGlucConc[i]
    model_aq.eval()
    Prediction = model_aq(MimicTissueRangeGlucSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "MimicTissueRangeGlucExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "MimicTissueRangeGlucExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Min Value:", np.round(MimicTissueRangeGlucConc[i].min().item(),4), " - Mean Value:", np.round(MimicTissueRangeGlucConc[i].mean().item(),1))

Overall MAPE:  14.23889
--------------------
9.94  - Min Value: 0.013  - Mean Value: 0.6
10.67  - Min Value: 0.0115  - Mean Value: 0.4
10.53  - Min Value: 0.0115  - Mean Value: 0.4
11.1  - Min Value: 0.0115  - Mean Value: 0.6
26.55  - Min Value: 0.0115  - Mean Value: 1.0
21.87  - Min Value: 0.0115  - Mean Value: 1.1
20.27  - Min Value: 0.0115  - Mean Value: 0.8
5.98  - Min Value: 0.0115  - Mean Value: 0.5
8.1  - Min Value: 0.0115  - Mean Value: 0.5
17.38  - Min Value: 0.0115  - Mean Value: 1.1


In [22]:
## Compute absolute percent error statistics on a further high dynamic range dataset

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = HighDynamicRange2Conc[i]
    model_aq.eval()
    Prediction = model_aq(HighDynamicRange2Spec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "HighDynamicRange2Examples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "HighDynamicRange2Examples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - Min Value:", np.round(HighDynamicRange2Conc[i].min().item(),4), " - Mean Value:", np.round(HighDynamicRange2Conc[i].mean().item(),1))

Overall MAPE:  226.72726
--------------------
148.22  - Min Value: 0.0062  - Mean Value: 2.1
143.85  - Min Value: 0.006  - Mean Value: 3.7
200.69  - Min Value: 0.0066  - Mean Value: 4.3
668.0  - Min Value: 0.0094  - Mean Value: 4.3
160.2  - Min Value: 0.0068  - Mean Value: 4.9
161.31  - Min Value: 0.005  - Mean Value: 3.8
184.88  - Min Value: 0.0101  - Mean Value: 3.2
43.67  - Min Value: 0.0062  - Mean Value: 3.2
257.96  - Min Value: 0.0053  - Mean Value: 5.3
298.49  - Min Value: 0.0054  - Mean Value: 2.5


In [23]:
ConcConc[5]

tensor([0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000],
       device='cuda:0')

In [24]:
model_aq(ConcSpec[5])

tensor([[0.5048, 0.5004, 0.4908, 0.4981, 0.5041, 0.5038, 0.5011, 0.5036, 0.4945,
         0.4938, 0.5030, 0.5041, 0.5015, 0.5043, 0.5032, 0.5054, 0.5100, 0.4990,
         0.4981, 0.4995, 0.5019, 0.5036, 0.5059, 0.5053, 0.5026, 0.5019, 0.4987,
         0.4981, 0.4912, 0.5024, 0.5006, 0.5125, 0.5077, 0.4954, 0.5026, 0.5055,
         0.5012, 0.4935, 0.5095, 0.5028, 0.5008, 0.4987, 0.5008, 0.5049]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [25]:
model_aq(SNR_Spec[1])

tensor([[0.4342, 0.4331, 0.4204, 0.4248, 0.4359, 0.4304, 0.4321, 0.4311, 0.4259,
         0.4241, 0.4322, 0.4334, 0.4326, 0.4334, 0.4455, 0.4333, 0.4398, 0.4300,
         0.4297, 0.4306, 0.4308, 0.4377, 0.4295, 0.4360, 0.4302, 0.4320, 0.4256,
         0.4303, 0.4246, 0.4321, 0.4305, 0.4388, 0.4404, 0.4247, 0.4311, 0.4340,
         0.4341, 0.4318, 0.4315, 0.4341, 0.4269, 0.4286, 0.4325, 0.4303]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [26]:
## Compute absolute percent error statistics on a examples of varying SNR

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = 0.43
    model_aq.eval()
    Prediction = model_aq(SNR_Spec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth - Prediction_cpu[0][metabolite]) / GroundTruth
        APE.append(abs(per_err))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "SNR_Examples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "SNR_Examples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2))

Overall MAPE:  1.3034025244178533
--------------------
0.85
0.85
0.88
0.93
1.26
1.43
1.34
1.78
2.06
1.67


In [27]:
## Compute absolute percent error statistics on a dataset with singlets added at random

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = 0.43
    model_aq.eval()
    Prediction = model_aq(Singlet_Spec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth - Prediction_cpu[0][metabolite]) / GroundTruth
        APE.append(abs(per_err))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "SingletExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "SingletExamples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2))

Overall MAPE:  9.50344510738744
--------------------
0.95
1.0
0.95
2.11
12.27
12.35
12.19
14.13
17.57
21.5


In [28]:
## Compute absolute percent error statistics on a examples of varying SNR

APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = 0.43
    model_aq.eval()
    Prediction = model_aq(QrefSensSpec[i])

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth - Prediction_cpu[0][metabolite]) / GroundTruth
        APE.append(abs(per_err))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "QrefSensitivity_Examples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "QrefSensitivity_Examples_MAPEs.npy", np.array(MAPEs))



## Output metrics
print('Overall MAPE: ',np.array(MAPEs).mean())
print("--------------------")
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2))

Overall MAPE:  18.488614384644126
--------------------
1.9
6.41
10.44
13.92
17.48
20.75
24.18
27.28
30.21
32.32


In [29]:
Pred = model_aq(OtherValSpectra[0])
Pred[0][Pred[0] < 0] = 0
print("Sinusoidal Baseline 1")
print(Pred[0])
print("___________")
print("___________")

Pred = model_aq(OtherValSpectra[1])
Pred[0][Pred[0] < 0] = 0
print("Sinusoidal Baseline 2")
print(Pred[0])
print("___________")
print("___________")

Pred = model_aq(OtherValSpectra[2])
Pred[0][Pred[0] < 0] = 0
print("HD-Range 1 - 0.01s and 20s")
print(Pred[0])

Pred = model_aq(OtherValSpectra[3])
Pred[0][Pred[0] < 0] = 0
print("HD-Range 2 - 0s and 20s")
print(Pred[0])

Sinusoidal Baseline 1
tensor([0.4559, 0.5460, 0.3815, 0.5380, 0.4864, 0.4588, 0.4742, 0.5381, 0.5798,
        0.4013, 0.4210, 0.5915, 0.5148, 0.7289, 0.3608, 0.5935, 0.5451, 0.4544,
        0.4672, 0.4064, 0.4432, 0.7241, 0.3726, 0.5065, 0.5047, 0.4725, 0.5286,
        0.5825, 0.4927, 0.4381, 0.4490, 0.7894, 0.4607, 0.4164, 0.4677, 0.4346,
        0.4603, 0.4022, 0.6309, 0.3826, 0.4047, 0.4669, 0.5181, 0.4620],
       device='cuda:0', grad_fn=<SelectBackward0>)
___________
___________
Sinusoidal Baseline 2
tensor([0.3747, 0.4841, 0.1776, 0.5057, 0.4133, 0.4280, 0.4177, 0.2908, 0.4894,
        0.4910, 0.4216, 0.4560, 0.7120, 0.3901, 0.6029, 0.2290, 0.6512, 0.4970,
        0.4538, 0.3792, 0.3742, 0.4425, 0.5656, 0.5495, 0.4136, 0.4477, 0.2798,
        0.1375, 0.5198, 0.3970, 0.4517, 0.3706, 0.3611, 0.4266, 0.5344, 0.4279,
        0.5371, 0.6458, 0.5007, 0.3589, 0.4376, 0.6444, 0.3110, 0.2789],
       device='cuda:0', grad_fn=<SelectBackward0>)
___________
___________
HD-Range 1 - 0.01s a