In [2]:
## Import dependencies

import numpy as np
from os import path
import matplotlib.pyplot as plt
import os
import nmrglue as ng
import seaborn as sns
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import time


# Set default plot size
plt.rcParams["figure.figsize"] = (30,20)

# Define number of epochs used later in training
num_epochs = 300

## Train CRNN on dataset of 8 metabolites

In [3]:
# Name variable used for saving model metrics, name should reflect model used, dataset used, and other information such as # of epochs
ModelName = "CRNN_NonIncremental_8Met" + str(num_epochs) +"ep"

# Set the random seed
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics/') 
seed = 1 
torch.manual_seed(seed)
np.save(ModelName + "_Seed.npy", seed)

In [4]:
## Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load training data and max value from testing and training datasets
spectra = np.load('Dataset8_Spec.npy')
conc1 = np.load('Dataset8_Conc.npy')

# Load validation dataset
spectraVal = np.load('Dataset8_Val_Spec.npy')
concVal = np.load('Dataset8_Val_Conc.npy')

# Load representative validation spectra
ValSpectra = np.load("Dataset8_RepresentativeExamples_Spectra.npy")
ValConc = np.load("Dataset8_RepresentativeExamples_Concentrations.npy")
ValSpecNames = np.load("Dataset8_RepresentativeExamples_VariableNames.npy")

In [5]:
## Prepare to switch data from CPU to GPU

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    device = torch.device("cuda")          # A CUDA device object
    print("Using GPU for training.")
else:
    device = torch.device("cpu")           # A CPU object
    print("CUDA is not available. Using CPU for training.")

Using GPU for training.


In [6]:
## Set up data for testing and training

# Split into testing and training data
X_train1, X_test1, y_train1, y_test1 = train_test_split(spectra, conc1, test_size = 0.2, random_state = 1)

# Tensorize and prepare datasets
X_train = torch.tensor(X_train1).float()
y_train = torch.tensor(y_train1).float()
X_test = torch.tensor(X_test1).float()
y_test = torch.tensor(y_test1).float()


# Move the input data to the GPU device
X_train = X_train.to(device)
X_test = X_test.to(device)
spectraVal = torch.tensor(spectraVal).float().to(device)   # Confusing names, these spectra are the 5000 spectra generated like the training dataset
ValSpectra = torch.tensor(ValSpectra).float().to(device)   # Confusing names, these spectra are the 10 representative example spectra

# Move the target data to the GPU device
y_train = y_train.to(device)
y_test = y_test.to(device)
concVal = torch.tensor(concVal).float().to(device)
ValConc = torch.tensor(ValConc).float().to(device)

# More data prep?
datasets = torch.utils.data.TensorDataset(X_train, y_train)
Test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
train_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in datasets]
test_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in Test_datasets]
train_iter = torch.utils.data.DataLoader(train_dataset_reshaped, batch_size=128, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_dataset_reshaped, batch_size=128, shuffle=True)

In [8]:
## Define NN model object, define some parameters, and instantiate model

# Define some model & training parameters
criterion = nn.MSELoss(reduction='sum')


class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(3, stride=2)
        self.conv2 = nn.Conv1d(32, 32, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(3, stride=2)
        self.conv3 = nn.Conv1d(32, 32, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(3, stride=2)
        self.flatten = nn.Flatten()
        self.lstm = nn.LSTM(32 * 5749, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 8)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # permute to (batch_size, features, sequence_length)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = self.flatten(x)
        # Reshape for LSTM
        x = x.view(x.size(0), 1, -1)  # (batch_size, 1, flattened_features)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(x)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.fc2(x)
        return x


In [7]:
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(3, stride=2)
        self.conv2 = nn.Conv1d(32, 32, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(3, stride=2)
        self.conv3 = nn.Conv1d(32, 32, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(3, stride=2)
        self.lstm = nn.LSTM(32, hidden_size=64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 8)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # permute to (batch_size, sequence_length, features)
        x = nn.functional.relu(self.mp1(self.conv1(x)))
        x = nn.functional.relu(self.mp2(self.conv2(x)))
        x = nn.functional.relu(self.mp3(self.conv3(x)))
        x = x.permute(0, 2, 1)  # reshape for LSTM: (batch_size, features, sequence_length)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [28]:
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(3, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(3, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(3, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 8)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [29]:
def train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    train_losses = []
    test_losses = []
    best_test_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
            test_losses.append(test_loss)

        if (epoch + 1) % 1 == 0:  # The last number here denotes how often to print loss metrics in terms of epochs
            print(f'Epoch [{epoch + 1}/{num_epochs}], '
                  f'Train Loss: {train_loss:.4f}, '
                  f'Test Loss: {test_loss:.4f}')
            
        '''
        # Save model at specific epochs
        if epoch + 1 in [1000, 10000, 50000]:
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'{save_path}_epoch_{epoch+1}.pt')
        '''

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            # Save model when test loss improves
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_path)

    return train_losses, test_losses


def train_or_load_model(model, train_loader, test_loader, num_epochs, save_path):
    train_losses = []
    test_losses = []
    is_model_trained = False  # Initialize flag

    if os.path.isfile(save_path):
        print("Loading pretrained model from {}".format(save_path))
        checkpoint = torch.load(save_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = optim.Adam(model.parameters())  
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
    
    else:
        print("No pretrained model found. Training from scratch.")
        #optimizer = optim.Adam(model.parameters())  
        train_losses, test_losses = train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path)
        is_model_trained = True  # Set flag to True after training
        # Save losses per epoch
        np.save(ModelName + "_TrainLoss.npy", train_losses)
        np.save(ModelName + "_TestLoss.npy", test_losses)
    
    return train_losses, test_losses, is_model_trained  # Return the losses and flag


In [None]:
## Instantiate model and train

# For timing cell run time
start_time = time.time()


# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Create model
model_aq = NMR_Model_Aq()

# Move the model to the GPU device
model_aq.to(device)

# Define the path to save and load the model parameters
save_path = ModelName + '_Params.pt'

# Call the function
train_losses, test_losses, is_model_trained = train_or_load_model(model_aq, train_iter, test_iter, num_epochs, save_path)


# Finish timing cell run time
end_time = time.time()
execution_time = end_time - start_time
if is_model_trained:
    np.save(ModelName + "_ExecutionTime.npy", execution_time)
    print("Execution time:", execution_time, "seconds")

No pretrained model found. Training from scratch.
Epoch [1/300], Train Loss: 5646645.3164, Test Loss: 1091841.8340
Epoch [2/300], Train Loss: 4282539.5820, Test Loss: 1064087.3252
Epoch [3/300], Train Loss: 4136648.7598, Test Loss: 1015007.3994
Epoch [4/300], Train Loss: 3901510.4004, Test Loss: 948968.1919
Epoch [5/300], Train Loss: 3477720.4609, Test Loss: 810045.4653
Epoch [6/300], Train Loss: 3063922.8359, Test Loss: 759010.9697
Epoch [7/300], Train Loss: 2923595.3145, Test Loss: 730164.3770
Epoch [8/300], Train Loss: 2698282.0605, Test Loss: 658161.5181
Epoch [9/300], Train Loss: 2546131.2832, Test Loss: 639778.9561
Epoch [10/300], Train Loss: 2504713.5586, Test Loss: 638837.4302
Epoch [11/300], Train Loss: 2469862.7559, Test Loss: 617498.6060
Epoch [12/300], Train Loss: 2310428.5439, Test Loss: 547742.9355
Epoch [13/300], Train Loss: 2082294.8193, Test Loss: 514527.9229
Epoch [14/300], Train Loss: 1977951.7031, Test Loss: 483053.7922
Epoch [15/300], Train Loss: 1763321.0273, Test

In [None]:
# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')
np.save(ModelName + "_TrainLoss.npy", train_losses)
np.save(ModelName + "_TestLoss.npy", test_losses)

In [None]:

# Plot the data
plt.plot(np.arange(num_epochs)+1, train_losses, label='Train Loss')
plt.plot(np.arange(num_epochs)+1, test_losses, label='Test Loss')

# Track the previous minimum test loss and its index
prev_min_loss = test_losses[0]
prev_min_index = 0

# Annotate each local minimum test loss with arrows
for idx, loss in enumerate(test_losses[1:], start=1):
    if loss < prev_min_loss:
        plt.annotate('Min', xy=(idx+1, loss), xytext=(idx+1, loss + 5000),
                     arrowprops=dict(facecolor='red', shrink=0.05))
        prev_min_loss = loss
        prev_min_index = idx
        
# Add x and y labels
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')

# Change axis size
plt.rcParams['axes.labelsize'] = 45  # Change label font size

# Change tick size
plt.tick_params(axis='x', labelsize=30)  # Change tick size for x-axis
plt.tick_params(axis='y', labelsize=30)  # Change tick size for y-axis

# Plot legend, and display figure
plt.legend(fontsize = 40)
plt.show()

In [None]:
## Make sure best parameters are being utilized

# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Define the path where you saved your model parameters
save_path = ModelName + '_Params.pt'

# Load the entire dictionary from the saved file
checkpoint = torch.load(save_path)

# Instantiate the model
model_aq = NMR_Model_Aq()

# Load the model's state dictionary from the loaded dictionary
model_aq.load_state_dict(checkpoint['model_state_dict'])

# Move the model to the GPU 
model_aq.to(device)

In [None]:
## Switch to directory for saving model metrics

os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics')   

In [None]:
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(3, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(3, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(3, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 8)

    def forward(self, x):
        #x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_train, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_train.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TrainRMSE",err)

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_test, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_test.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TestRMSE",err)

In [None]:
## Test model on validation dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(spectraVal, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, concVal.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "ValRMSE",err)

In [None]:
APEs = []
MAPEs = []

for i in np.arange(8):
    GroundTruth = ValConc[i]
    Prediction = model_aq(ValSpectra[i].unsqueeze(1))

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(8):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ValExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ValExamples_MAPEs.npy", np.array(MAPEs))


In [None]:
MAPEs

## Train CNN on dataset of 21 metabolites

In [None]:
# Define number of epochs used later in training
num_epochs = 300

# Name variable used for saving model metrics, name should reflect model used, dataset used, and other information such as # of epochs
ModelName = "CNN_NonIncremental_21Met" + str(num_epochs) +"ep"

# Set the random seed
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics/') 
seed = 1 
torch.manual_seed(seed)
np.save(ModelName + "_Seed.npy", seed)

In [None]:
## Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load training data and max value from testing and training datasets
spectra = np.load('Dataset21_Spec.npy')
conc1 = np.load('Dataset21_Conc.npy')

# Load validation dataset
spectraVal = np.load('Dataset21_Val_Spec.npy')
concVal = np.load('Dataset21_Val_Conc.npy')

# Load representative validation spectra
ValSpectra = np.load("Dataset21_RepresentativeExamples_Spectra.npy")
ValConc = np.load("Dataset21_RepresentativeExamples_Concentrations.npy")
ValSpecNames = np.load("Dataset21_RepresentativeExamples_VariableNames.npy")

In [None]:
## Prepare to switch data from CPU to GPU

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    device = torch.device("cuda")          # A CUDA device object
    print("Using GPU for training.")
else:
    device = torch.device("cpu")           # A CPU object
    print("CUDA is not available. Using CPU for training.")

In [None]:
## Set up data for testing and training

# Split into testing and training data
X_train1, X_test1, y_train1, y_test1 = train_test_split(spectra, conc1, test_size = 0.2, random_state = 0)

# Tensorize and prepare datasets
X_train = torch.tensor(X_train1).float()
y_train = torch.tensor(y_train1).float()
X_test = torch.tensor(X_test1).float()
y_test = torch.tensor(y_test1).float()


# Move the input data to the GPU device
X_train = X_train.to(device)
X_test = X_test.to(device)
spectraVal = torch.tensor(spectraVal).float().to(device)   # Confusing names, these spectra are the 5000 spectra generated like the training dataset
ValSpectra = torch.tensor(ValSpectra).float().to(device)   # Confusing names, these spectra are the 10 representative example spectra

# Move the target data to the GPU device
y_train = y_train.to(device)
y_test = y_test.to(device)
concVal = torch.tensor(concVal).float().to(device)
ValConc = torch.tensor(ValConc).float().to(device)

# More data prep?
datasets = torch.utils.data.TensorDataset(X_train, y_train)
Test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
train_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in datasets]
test_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in Test_datasets]
train_iter = torch.utils.data.DataLoader(train_dataset_reshaped, batch_size=128, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_dataset_reshaped, batch_size=128, shuffle=True)

In [None]:
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(2, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(2, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 21)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
def train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    train_losses = []
    test_losses = []
    best_test_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
            test_losses.append(test_loss)

        if (epoch + 1) % 1 == 0:  # The last number here denotes how often to print loss metrics in terms of epochs
            print(f'Epoch [{epoch + 1}/{num_epochs}], '
                  f'Train Loss: {train_loss:.4f}, '
                  f'Test Loss: {test_loss:.4f}')

        '''
        # Save model at specific epochs
        if epoch + 1 in [1000, 10000, 50000]:
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'{save_path}_epoch_{epoch+1}.pt')
        '''

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            # Save model when test loss improves
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_path)

    return train_losses, test_losses


def train_or_load_model(model, train_loader, test_loader, num_epochs, save_path):
    train_losses = []
    test_losses = []
    is_model_trained = False  # Initialize flag

    if os.path.isfile(save_path):
        print("Loading pretrained model from {}".format(save_path))
        checkpoint = torch.load(save_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = optim.Adam(model.parameters())  
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
    
    else:
        print("No pretrained model found. Training from scratch.")
        #optimizer = optim.Adam(model.parameters())  
        train_losses, test_losses = train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path)
        is_model_trained = True  # Set flag to True after training
        # Save losses per epoch
        np.save(ModelName + "_TrainLoss.npy", train_losses)
        np.save(ModelName + "_TestLoss.npy", test_losses)
    
    return train_losses, test_losses, is_model_trained  # Return the losses and flag


In [None]:
## Instantiate model and train

# For timing cell run time
start_time = time.time()


# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Create model
model_aq = NMR_Model_Aq()

# Move the model to the GPU device
model_aq.to(device)

# Define the path to save and load the model parameters
save_path = ModelName + '_Params.pt'

# Call the function
train_losses, test_losses, is_model_trained = train_or_load_model(model_aq, train_iter, test_iter, num_epochs, save_path)


# Finish timing cell run time
end_time = time.time()
execution_time = end_time - start_time
if is_model_trained:
    np.save(ModelName + "_ExecutionTime.npy", execution_time)
    print("Execution time:", execution_time, "seconds")

In [None]:
# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')
np.save(ModelName + "_TrainLoss.npy", train_losses)
np.save(ModelName + "_TestLoss.npy", test_losses)

In [None]:

# Plot the data
plt.plot(np.arange(num_epochs)+1, train_losses, label='Train Loss')
plt.plot(np.arange(num_epochs)+1, test_losses, label='Test Loss')

# Track the previous minimum test loss and its index
prev_min_loss = test_losses[0]
prev_min_index = 0

# Annotate each local minimum test loss with arrows
for idx, loss in enumerate(test_losses[1:], start=1):
    if loss < prev_min_loss:
        plt.annotate('Min', xy=(idx+1, loss), xytext=(idx+1, loss + 5000),
                     arrowprops=dict(facecolor='red', shrink=0.05))
        prev_min_loss = loss
        prev_min_index = idx
        
# Add x and y labels
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')

# Change axis size
plt.rcParams['axes.labelsize'] = 45  # Change label font size

# Change tick size
plt.tick_params(axis='x', labelsize=30)  # Change tick size for x-axis
plt.tick_params(axis='y', labelsize=30)  # Change tick size for y-axis

# Plot legend, and display figure
plt.legend(fontsize = 40)
plt.show()

In [None]:
## Make sure best parameters are being utilized

# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Define the path where you saved your model parameters
save_path = ModelName + '_Params.pt'

# Load the entire dictionary from the saved file
checkpoint = torch.load(save_path)

# Instantiate the model
model_aq = NMR_Model_Aq()

# Load the model's state dictionary from the loaded dictionary
model_aq.load_state_dict(checkpoint['model_state_dict'])

# Move the model to the GPU 
model_aq.to(device)

In [None]:
## Switch to directory for saving model metrics

os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics')   

In [None]:
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(2, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(2, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 21)

    def forward(self, x):
        #x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_train, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_train.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TrainRMSE",err)

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_test, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_test.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TestRMSE",err)

In [None]:
## Test model on validation dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(spectraVal, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, concVal.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "ValRMSE",err)

In [None]:
APEs = []
MAPEs = []

for i in np.arange(8):
    GroundTruth = ValConc[i]
    Prediction = model_aq(ValSpectra[i].unsqueeze(1))

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(21):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ValExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ValExamples_MAPEs.npy", np.array(MAPEs))


In [None]:
MAPEs

## Train CNN on dataset of 58 metabolites

In [None]:
# Define number of epochs used later in training
num_epochs = 300

# Name variable used for saving model metrics, name should reflect model used, dataset used, and other information such as # of epochs
ModelName = "CNN_NonIncremental_58Met" + str(num_epochs) +"ep"

# Set the random seed
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics/') 
seed = 1 
torch.manual_seed(seed)
np.save(ModelName + "_Seed.npy", seed)

In [None]:
## Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load training data and max value from testing and training datasets
spectra = np.load('Dataset58_Spec.npy')
conc1 = np.load('Dataset58_Conc.npy')

# Load validation dataset
spectraVal = np.load('Dataset58_Val_Spec.npy')
concVal = np.load('Dataset58_Val_Conc.npy')

# Load representative validation spectra
ValSpectra = np.load("Dataset58_RepresentativeExamples_Spectra.npy")
ValConc = np.load("Dataset58_RepresentativeExamples_Concentrations.npy")
ValSpecNames = np.load("Dataset58_RepresentativeExamples_VariableNames.npy")

In [None]:
## Prepare to switch data from CPU to GPU

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    device = torch.device("cuda")          # A CUDA device object
    print("Using GPU for training.")
else:
    device = torch.device("cpu")           # A CPU object
    print("CUDA is not available. Using CPU for training.")

In [None]:
## Set up data for testing and training

# Split into testing and training data
X_train1, X_test1, y_train1, y_test1 = train_test_split(spectra, conc1, test_size = 0.2, random_state = 1)

# Tensorize and prepare datasets
X_train = torch.tensor(X_train1).float()
y_train = torch.tensor(y_train1).float()
X_test = torch.tensor(X_test1).float()
y_test = torch.tensor(y_test1).float()


# Move the input data to the GPU device
X_train = X_train.to(device)
X_test = X_test.to(device)
spectraVal = torch.tensor(spectraVal).float().to(device)   # Confusing names, these spectra are the 5000 spectra generated like the training dataset
ValSpectra = torch.tensor(ValSpectra).float().to(device)   # Confusing names, these spectra are the 10 representative example spectra

# Move the target data to the GPU device
y_train = y_train.to(device)
y_test = y_test.to(device)
concVal = torch.tensor(concVal).float().to(device)
ValConc = torch.tensor(ValConc).float().to(device)

# More data prep?
datasets = torch.utils.data.TensorDataset(X_train, y_train)
Test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
train_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in datasets]
test_dataset_reshaped = [(data.unsqueeze(1), label) for data, label in Test_datasets]
train_iter = torch.utils.data.DataLoader(train_dataset_reshaped, batch_size=128, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_dataset_reshaped, batch_size=128, shuffle=True)

In [None]:
## Define NN model object, define some parameters, and instantiate model

# Define some model & training parameters
criterion = nn.MSELoss(reduction='sum')


# Define model
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(2, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(2, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 58)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
def train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    train_losses = []
    test_losses = []
    best_test_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
            test_losses.append(test_loss)

        if (epoch + 1) % 1 == 0:  # The last number here denotes how often to print loss metrics in terms of epochs
            print(f'Epoch [{epoch + 1}/{num_epochs}], '
                  f'Train Loss: {train_loss:.4f}, '
                  f'Test Loss: {test_loss:.4f}')

        '''
        # Save model at specific epochs
        if epoch + 1 in [1000, 10000, 50000]:
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'{save_path}_epoch_{epoch+1}.pt')
        '''

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            # Save model when test loss improves
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, save_path)

    return train_losses, test_losses


def train_or_load_model(model, train_loader, test_loader, num_epochs, save_path):
    train_losses = []
    test_losses = []
    is_model_trained = False  # Initialize flag

    if os.path.isfile(save_path):
        print("Loading pretrained model from {}".format(save_path))
        checkpoint = torch.load(save_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer = optim.Adam(model.parameters())  
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        
    
    else:
        print("No pretrained model found. Training from scratch.")
        #optimizer = optim.Adam(model.parameters())  
        train_losses, test_losses = train_and_save_best_model(model, train_loader, test_loader, num_epochs, save_path)
        is_model_trained = True  # Set flag to True after training
        # Save losses per epoch
        np.save(ModelName + "_TrainLoss.npy", train_losses)
        np.save(ModelName + "_TestLoss.npy", test_losses)
    
    return train_losses, test_losses, is_model_trained  # Return the losses and flag


In [None]:
## Instantiate model and train

# For timing cell run time
start_time = time.time()


# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Create model
model_aq = NMR_Model_Aq()

# Move the model to the GPU device
model_aq.to(device)

# Define the path to save and load the model parameters
save_path = ModelName + '_Params.pt'

# Call the function
train_losses, test_losses, is_model_trained = train_or_load_model(model_aq, train_iter, test_iter, num_epochs, save_path)


# Finish timing cell run time
end_time = time.time()
execution_time = end_time - start_time
if is_model_trained:
    np.save(ModelName + "_ExecutionTime.npy", execution_time)
    print("Execution time:", execution_time, "seconds")

In [None]:
# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')
np.save(ModelName + "_TrainLoss.npy", train_losses)
np.save(ModelName + "_TestLoss.npy", test_losses)

In [None]:

# Plot the data
plt.plot(np.arange(num_epochs)+1, train_losses, label='Train Loss')
plt.plot(np.arange(num_epochs)+1, test_losses, label='Test Loss')

# Track the previous minimum test loss and its index
prev_min_loss = test_losses[0]
prev_min_index = 0

# Annotate each local minimum test loss with arrows
for idx, loss in enumerate(test_losses[1:], start=1):
    if loss < prev_min_loss:
        plt.annotate('Min', xy=(idx+1, loss), xytext=(idx+1, loss + 5000),
                     arrowprops=dict(facecolor='red', shrink=0.05))
        prev_min_loss = loss
        prev_min_index = idx
        
# Add x and y labels
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')

# Change axis size
plt.rcParams['axes.labelsize'] = 45  # Change label font size

# Change tick size
plt.tick_params(axis='x', labelsize=30)  # Change tick size for x-axis
plt.tick_params(axis='y', labelsize=30)  # Change tick size for y-axis

# Plot legend, and display figure
plt.legend(fontsize = 40)
plt.show()

In [None]:
## Make sure best parameters are being utilized

# Switch to directory for saving model parameters
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/SavedParamsAndTrainingMetrics')

# Define the path where you saved your model parameters
save_path = ModelName + '_Params.pt'

# Load the entire dictionary from the saved file
checkpoint = torch.load(save_path)

# Instantiate the model
model_aq = NMR_Model_Aq()

# Load the model's state dictionary from the loaded dictionary
model_aq.load_state_dict(checkpoint['model_state_dict'])

# Move the model to the GPU 
model_aq.to(device)

In [None]:
## Switch to directory for saving model metrics

os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics')   

In [None]:
## Define NN model object, define some parameters, and instantiate model

# Define some model & training parameters
criterion = nn.MSELoss(reduction='sum')


# Define model
class NMR_Model_Aq(nn.Module):
    def __init__(self):
        super(NMR_Model_Aq, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, padding=1)
        self.mp1 = nn.MaxPool1d(2, stride=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=1)
        self.mp2 = nn.MaxPool1d(2, stride=2)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.mp3 = nn.MaxPool1d(2, stride=2)
        self.gru = nn.GRU(128, 64, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(64, 200)
        self.fc2 = nn.Linear(200, 58)

    def forward(self, x):
        #x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = self.mp1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = self.mp2(x)
        x = nn.functional.relu(x)
        x = self.conv3(x)
        x = self.mp3(x)
        x = nn.functional.relu(x)
        x = x.permute(0, 2, 1)  # Reshape for GRU: (batch_size, sequence_length, features)
        x, _ = self.gru(x)
        x = x[:, -1, :]  # Take the output of the last time step
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_train, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_train.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TrainRMSE",err)

In [None]:
## Test model on training dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(X_test, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, y_test.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "TestRMSE",err)

In [None]:
## Test model on validation dataset and deterine RMSE

# Decrease the batch size
loader = torch.utils.data.DataLoader(spectraVal, batch_size=32)

# Initialize an empty list to store the outputs
all_outputs = []

# Iterate over the batches in the data loader
for batch in loader:
    # Move the batch to the GPU if available
    batch = batch.to(device)  # Assuming device is defined and indicates GPU
    
    # Forward pass through the model
    outputs = model_aq(batch.unsqueeze(1))  # Assuming your model takes 1D input
    
    # Move the outputs to CPU and append to the list
    all_outputs.append(outputs.cpu().detach().numpy())

# Concatenate the outputs from all batches
outputs_cpu = np.concatenate(all_outputs)

# Compute RMSE
err = np.sqrt(mean_squared_error(outputs_cpu, concVal.cpu().detach().numpy()))
print('model err: ', err)  # Print RMSE

np.save(ModelName + "ValRMSE",err)

In [None]:
APEs = []
MAPEs = []

for i in np.arange(8):
    GroundTruth = ValConc[i]
    Prediction = model_aq(ValSpectra[i].unsqueeze(1))

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(58):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ValExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ValExamples_MAPEs.npy", np.array(MAPEs))


In [None]:
MAPEs