In [1]:
## Import dependencies

import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
import time
import optuna
import pandas as pd

# Set default plot size
plt.rcParams["figure.figsize"] = (30,20)

## Optimize CNN architecture and hyperparameters on dataset of 44 metabolites

In [2]:
# Define number of epochs used later in training (this will have to be redefined later......)
num_epochs = 5000

# Name variable used for saving model metrics, name should reflect model used, dataset used, and other information such as # of epochs
ModelName = "CNN_Opt_Dist5_RAE_44Met" + str(num_epochs) +"ep"

# Set the random seed
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics/') 
seed = 1 
torch.manual_seed(seed)
np.save(ModelName + "_Seed.npy", seed)

In [3]:
# Load training and testing datasets, validation datasets, and representative example spectra 

# Switch to directory containing datasets
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/GeneratedDataAndVariables')

# Load training data and max value from testing and training datasets
spectra = np.load('Dataset44_Dist5_Spec.npy')
conc1 = np.load('Dataset44_Dist5_Conc.npy')

# Load validation dataset
spectraVal = np.load('Dataset44_Dist5_Val_Spec.npy')
concVal = np.load('Dataset44_Dist5_Val_Conc.npy')

# Load representative validation spectra
ValSpectra = np.load("Dataset44_Dist5_RepresentativeExamples_Spectra.npy")
ValConc = np.load("Dataset44_Dist5_RepresentativeExamples_Concentrations.npy")
ValSpecNames = np.load("Dataset44_Dist5_RepresentativeExamples_VariableNames.npy")

In [4]:
## Prepare to switch data from CPU to GPU

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    device = torch.device("cuda")          # A CUDA device object
    print("Using GPU for training.")
else:
    device = torch.device("cpu")           # A CPU object
    print("CUDA is not available. Using CPU for training.")

Using GPU for training.


In [5]:
## Set up data for testing and training

# Split into testing and training data
X_train, X_test, y_train, y_test = train_test_split(spectra, conc1, test_size = 0.2, random_state = 1)

# Tensorize and prepare datasets
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()


# Move the input data to the GPU device
X_train = X_train.to(device)
X_test = X_test.to(device)
spectraVal = torch.tensor(spectraVal).float().to(device)   # Confusing names, these spectra are the 5000 spectra generated like the training dataset
#ValSpectra = torch.tensor(ValSpectra).float().to(device)   # Confusing names, these spectra are the 10 representative example spectra

# Move the target data to the GPU device
y_train = y_train.to(device)
y_test = y_test.to(device)
concVal = torch.tensor(concVal).float().to(device)
#ValConc = torch.tensor(ValConc).float().to(device)

# More data prep?
datasets = torch.utils.data.TensorDataset(X_train, y_train)
Test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
Val_datasets = torch.utils.data.TensorDataset(spectraVal, concVal)
#train_iter = torch.utils.data.DataLoader(datasets, batch_size = 128, shuffle=True)
#test_iter = torch.utils.data.DataLoader(Test_datasets, batch_size = 128, shuffle=True)

In [6]:
del X_train
del X_test
del y_train
del y_test
del spectra
del conc1
del spectraVal
del concVal

In [7]:
class RelativeAbsoluteError(nn.Module):
    def __init__(self):
        super(RelativeAbsoluteError, self).__init__()

    def forward(self, y_pred, y_true):
        # Compute the mean of the true values
        y_mean = torch.mean(y_true)
        
        # Compute the absolute differences
        absolute_errors = torch.abs(y_true - y_pred)
        mean_absolute_errors = torch.abs(y_true - y_mean)
        
        # Compute RAE
        rae = torch.sum(absolute_errors) / torch.sum(mean_absolute_errors)
        return rae
    
    
    
# MAPE loss function for directly comparing models despite loss function used
class MAPELoss(nn.Module):
    def __init__(self):
        super(MAPELoss, self).__init__()

    def forward(self, y_pred, y_true):
        loss = torch.mean(torch.abs((y_true - y_pred) / y_true))
        return loss * 100  # To get percentage

In [None]:
## Optimization function

# Switch to directory for saving weights
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelOptimizations/BestWeights')

# Define file name for best model weights
save_path = ModelName + '_Params.pt'


# Define the objective function to be minimized.
def objective(trial):
    

    # Suggest values of the hyperparameters using a trial object.
    n_conv_layers = trial.suggest_int('num_conv_layers', 1, 4)

    # Make some empty variables for use in model building
    layers = []
    
    # Define other hyperparameters
    kernel_size = trial.suggest_int('kernel_size', 3, 9)
    num_channels = trial.suggest_int('num_channels', 5, 50)
    pooling_type = trial.suggest_categorical('pooling_type', ['none', 'max', 'avg'])
    pool_stride = trial.suggest_int('pool_stride', 1, 2)
    conv_stride = 1
    pool_kernel = 2
    
    n_fc_layers = 2  # Number of fully connected layers

    
    # Build the CNN architecture
    in_channels = 1  # Assuming 1 input channel
    in_features = 46000  # Number of input features
    dilation = 1
    padding = 1
    for i in range(n_conv_layers):
        out_channels = num_channels
        layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation, padding=padding))
        layers.append(nn.ReLU())
        in_features = ((in_features + 2*padding - dilation*(kernel_size - 1) - 1)/conv_stride) + 1
        in_features = int(in_features)
        
        in_channels = out_channels
        
        if pooling_type == 'none':
            pass
        elif pooling_type == 'max':
            layers.append(nn.MaxPool1d(pool_kernel, stride=pool_stride, dilation=dilation, padding=padding))
            in_features = ((in_features + 2*padding - dilation*(pool_kernel - 1) - 1)/pool_stride) + 1
            in_features = int(in_features)
        else:
            layers.append(nn.AvgPool1d(pool_kernel, stride=pool_stride, padding=padding))
            in_features = ((in_features + 2*padding - pool_kernel)/pool_stride) + 1
            in_features = int(in_features)
                
    # Flatten the output for fully connected layers
    layers.append(nn.Flatten())
    
    # Add fully connected layers
    for _ in range(n_fc_layers):
        out_features = 200
        layers.append(nn.Linear(in_features*in_channels, out_features))
        layers.append(nn.ReLU())
        in_features = out_features
        in_channels = 1
    
    # Add the final fully connected layer
    layers.append(nn.Linear(in_features, 44))   # For quantifying 44 metabolites
    
    model = nn.Sequential(*layers)
    model.to(device)  # Move the model to the GPU    

    
     # Train and evaluate the model to obtain the loss
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
                     
    '''
    # Add (or don't add) L2 regularization
    reg_type = trial.suggest_categorical('regularization', ['none', 'l2'])
    reg_strength = trial.suggest_float('reg_strength', 1e-6, 1e-3)
    
    if optimizer_name == "Adam":
        if reg_type == 'l2':
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=reg_strength)
        else:
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == "RMSprop":
        if reg_type == 'l2':
            optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=reg_strength)
        else:
            optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)    
    else:
        optimizer_name == "SGD"
        if reg_type == 'l2':
            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=reg_strength)
        else:
            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
    '''
    
        
    ## Split training data appropriately, selecting the batch size as a hyperparameter
    bs = 16
    train_loader = torch.utils.data.DataLoader(datasets, batch_size=bs, shuffle=True)
    test_loader = torch.utils.data.DataLoader(Test_datasets, batch_size=bs, shuffle=False)
    val_loader = torch.utils.data.DataLoader(Val_datasets, batch_size=bs, shuffle=False)
    
    num_epochs = 500
    
    criterion = RelativeAbsoluteError()
    criterion2 = MAPELoss()
    criterion3 = nn.MSELoss()  #This just for direct comparison with past MSE trained models
    best_test_loss = float('inf')
    
    start = time.time()
    for epoch in range(num_epochs):
        start2 = time.time()
        running_train_loss = 0.
        running_test_loss = 0.
        
        # Training phase
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move input data to the GPU
            optimizer.zero_grad()
            inputs = inputs.unsqueeze(1)
            outputs = model(inputs)
            loss = criterion(outputs, labels)  # Make sure labels are properly loaded and passed here
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()* inputs.size(0)
        
        # Testing phase
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)  # Move input data to the GPU
                inputs = inputs.unsqueeze(1)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_test_loss += loss.item()* inputs.size(0)
                
                
        ## Also compute just MAPE loss for model optimization
        running_test_loss2 = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.unsqueeze(1)
                outputs = model(inputs)
                loss2 = criterion2(outputs, targets)
                running_test_loss2 += loss2.item() * inputs.size(0)
                
                
        ## Also compute just MSE loss for more direct comparison with previous models
        running_test_loss3 = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.unsqueeze(1)
                outputs = model(inputs)
                loss3 = criterion3(outputs, targets)
                running_test_loss3 += loss3.item()

                
        if running_test_loss2 < best_test_loss:
            best_test_loss = running_test_loss2
  


        # Print loss every ___ epochs
        if epoch % 1 == 0:
            end_time2 = time.time()
            print(f'Epoch {epoch + 1}/{num_epochs} | Train Loss: {running_train_loss:.3f} | Test Loss: {running_test_loss:.3f} | Test Loss [MAPE]: {running_test_loss2:.3f} | Test Loss [MSE]: {running_test_loss3:.3f}', "--time--", end_time2-start2)

            
        # Prune considered after a certain number of epochs)
        if epoch > 20 and trial.should_prune():
            raise optuna.TrialPruned()
            
    end_time = time.time()
    print(end_time - start)   
    
    return best_test_loss


## Create study and optimize
study_name = "CNN44_Dist5_RAE_Opt"
storage = "sqlite:///CNN44_Dist5_RAE_Opt.db"  # SQLite database as storage
study = optuna.create_study(direction='minimize',  sampler = optuna.samplers.TPESampler(seed = 1), 
                            study_name=study_name, storage=storage,
                            pruner = optuna.pruners.MedianPruner(n_warmup_steps=30))



study.optimize(objective, n_trials=50)

[I 2024-07-03 12:28:22,959] A new study created in RDB with name: CNN44_Dist5_RAE_Opt


Epoch 1/500 | Train Loss: 15261.125 | Test Loss: 3659.900 | Test Loss [MAPE]: 901713.773 | Test Loss [MSE]: 185991.707 --time-- 22.40710973739624


In [13]:
# Switch to directory for saving weights
os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelOptimizations/BestWeights/')

loaded_study = optuna.load_study(study_name="CNN44_Opt", storage="sqlite:///CNN44_Opt_30trials.db")

In [16]:
# Get the best parameters and best value from the study
best_params = loaded_study.best_params
best_value = loaded_study.best_value

# Create a dictionary to store hyperparameters and best loss
data = {
    'Hyperparameter': list(best_params.keys()) + ['Best Loss'],
    'Value': list(best_params.values()) + [best_value]
}

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

         Hyperparameter     Value
0         n_conv_layers         3
1           kernel_size         9
2          pooling_type      none
3           pool_stride         2
4   regularization_type      none
5       batch_norm_used     False
6           n_fc_layers         1
7   conv_0_out_channels        24
8   conv_1_out_channels        55
9   conv_2_out_channels        27
10      fc_out_features       252
11        learning_rate  0.000117
12           batch_size        64
13            optimizer   RMSprop
14            Best Loss  69.48823


In [10]:
optuna.visualization.plot_optimization_history(study)

NameError: name 'study' is not defined

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [11]:
optuna.visualization.plot_contour(study)

NameError: name 'study' is not defined

In [12]:
optuna.visualization.plot_rank(study)

NameError: name 'study' is not defined

In [13]:
optuna.visualization.plot_slice(study)

NameError: name 'study' is not defined

In [14]:
optuna.visualization.plot_terminator_improvement(study)

NameError: name 'study' is not defined

In [15]:
optuna.visualization.plot_timeline(study)

NameError: name 'study' is not defined

In [16]:
optuna.visualization.plot_edf(study)

NameError: name 'study' is not defined

In [18]:
# Define file name for best model weights
save_path = ModelName + '_Params_30trials.pt'

def get_best_model(study):
    # Get the best trial
    best_trial = study.best_trial
    
    # Extract the best parameters
    best_params = best_trial.params
    
    # Load the entire dictionary from the saved file
    checkpoint = torch.load(save_path)
    
    # Load the model architecture from the checkpoint
    model_architecture = checkpoint['model_architecture']
    
    # Initialize the model and move it to the appropriate device
    model = model_architecture.to(device)
    
    # Load the model's state dictionary from the loaded dictionary
    model.load_state_dict(checkpoint['model_state_dict'])

    return model

best_model = get_best_model(loaded_study).eval()

In [19]:
criterion = torch.nn.MSELoss()


test_loader = torch.utils.data.DataLoader(datasets, batch_size=64, shuffle=False)
TheLoss = 0

# Testing phase
best_model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move input data to the GPU
        inputs = inputs.unsqueeze(1)
        outputs = best_model(inputs)
        loss = criterion(outputs, labels)
        TheLoss += loss.item()
        
TheLoss

103.56735616922379

In [14]:
X_train.size()

torch.Size([16000, 46000])

In [15]:
FirstEx = X_train.unsqueeze(1)[0]
FirstEx.size()

torch.Size([1, 46000])

In [16]:
inputs = FirstEx.to(device)
inputs = inputs.unsqueeze(1)
outputs = best_model(inputs)

outputs

tensor([[ 4.0532, 49.2963, 37.4884, 10.4130, 22.6152, 13.7744, 16.9744, 15.2754,
         33.8414, 39.4097, 36.6177, 16.6846, 23.7907, 18.3062, 30.6628, 30.9816,
         26.7715, 14.7555,  2.8625, 41.7423, 37.2032, 46.3311,  1.9252, 14.5158,
         26.6019, 32.6411, 39.1162, 36.9748, 35.2184, 33.2565, 31.9444, 36.7302,
         35.4330, 49.6390,  7.5264,  7.8743,  9.8146, 17.3302, 29.6859,  1.5771,
         35.7887, 34.5430, 44.2107, 41.5333]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [17]:
y_train[0]

tensor([ 4.0717, 49.5768, 38.3677, 11.3346, 22.3017, 12.9476, 16.9491, 16.3040,
        34.2957, 40.2190, 36.4923, 17.5898, 24.9011, 18.9798, 31.6682, 30.6385,
        27.1793, 15.0093,  3.1209, 42.3842, 37.3571, 47.2943,  1.7990, 14.4903,
        27.5353, 31.2422, 38.8733, 37.4527, 35.7897, 33.2167, 32.0472, 36.9620,
        35.3223, 49.3332,  7.2213,  8.1352,  9.6507, 18.3274, 29.5438,  2.0174,
        36.3401, 34.8497, 44.4357, 42.3643], device='cuda:0')

In [18]:
## Switch to directory for saving model metrics

os.chdir('/home/htjhnson/Desktop/DL-NMR-Optimization/ModelPerformanceMetrics')   

In [21]:
APEs = []
MAPEs = []

for i in np.arange(10):
    GroundTruth = ValConc[i]
    Prediction = best_model(ValSpectra[i].unsqueeze(1))

    # Move Prediction tensor to CPU and detach from computation graph
    Prediction_cpu = Prediction.detach().cpu().numpy()

    APE = []

    for metabolite in range(44):
        per_err = 100*(GroundTruth[metabolite] - Prediction_cpu[0][metabolite]) / GroundTruth[metabolite]
        APE.append(abs(per_err.cpu()))

    MAPE = sum(APE) / len(APE)

    APEs.append(APE)
    MAPEs.append(MAPE)


# Convert lists to numpy arrays and save
np.save(ModelName + "_" + "ValExamples_APEs.npy", np.array(APEs))
np.save(ModelName + "_" + "ValExamples_MAPEs.npy", np.array(MAPEs))


In [22]:
for i in np.arange(10):
    print(round(MAPEs[i].item(), 2), " - ",ValSpecNames[i])

68.09  -  AllAq1
7.34  -  AllAq5
1.63  -  AllAq25
11.71  -  AllAq50
3.01  -  ThreeAddedSinglets
8.0  -  ThirtyAddedSinglets
84.95  -  ShiftedSpec
17.5  -  SineBase
469.07  -  HighDynamicRange
inf  -  HalfZeros
