## Neural Network Isopotopomer Analysis

### Step 1 - Data simulation:

- All data now simulated, find sim data in the relevant sim save. More/diverse datasets can be created to train the networks even further

In [1]:
import pandas as pd
from metabolabpytools import isotopomerAnalysis
analysis = isotopomerAnalysis.IsotopomerAnalysisNN()

# Define the HSQC vector externally
hsqc_vector = [0, 1, 1] # For 3-carbon metabolites, all carbons participate in HSQC
n_carbons = len(hsqc_vector)

# Example usage
synthetic_distributions = analysis.generate_isotopomer_distributions(n_distributions=1000, n_carbons=n_carbons)  


# Simulate HSQC and GC-MS data for all distributions with the defined HSQC vector
combined_isotopomer_data, combined_hsqc_data, combined_gcms_data = analysis.simulate_hsqc_gcms(synthetic_distributions, hsqc_vector)

# Save the simulation data to a spreadsheet
analysis.save_simulation_data(combined_isotopomer_data, combined_hsqc_data, combined_gcms_data, hsqc_vector)

Data successfully saved to sim_data/sim_011.xlsx


### Step 2 - Data preparation:

-Using [0, 1, 1, 0] as example 

In [3]:
from metabolabpytools import isotopomerAnalysis
analysis = isotopomerAnalysis.IsotopomerAnalysisNN()

# Example usage for HSQC vector [0,1,1]
hsqc_vector = [0, 1, 1]
num_carbons = len(hsqc_vector)
isotopomer_data, hsqc_data, gcms_data = analysis.load_spreadsheet_by_hsqc_vector(hsqc_vector)

all_possible_hsqc_multiplets = analysis.generate_possible_hsqc_multiplets(hsqc_vector)

Y = analysis.collate_y_labels(isotopomer_data, num_carbons)

X_noisy = analysis.collate_x_labels_without_noise(hsqc_data, gcms_data, all_possible_hsqc_multiplets)


# # Now Y contains the isotopomer percentages for each sample, structured for training a neural network
print(Y[7])
print(X_noisy[7])

[36.30513711  2.93557073  0.          0.          0.         29.40075459
 31.35853757  0.        ]
[1.21043515e+00 9.66659397e+01 9.78736970e-02 2.02575141e+00
 9.79385109e+01 2.06148911e+00 3.63051371e+01 2.93557073e+00
 6.07592922e+01 0.00000000e+00]


### Step 3 - Train a Neural Network:

In [4]:
# Train the neural network
model, history = analysis.train_neural_network(X_noisy, Y, epochs=100, batch_size=32)

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 436.2444 - mae: 11.4618 - val_loss: 114.0747 - val_mae: 7.5071
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 95.6169 - mae: 6.2994 - val_loss: 75.6897 - val_mae: 5.3527
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 74.7373 - mae: 5.1113 - val_loss: 51.7806 - val_mae: 4.1372
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 51.3473 - mae: 3.8870 - val_loss: 42.5595 - val_mae: 3.5736
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 40.4634 - mae: 3.4238 - val_loss: 35.6619 - val_mae: 3.2319
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 33.9306 - mae: 3.1919 - val_loss: 32.2328 - val_mae: 3.0134
Epoch 7/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms

### Step 4 - Hyperparamter Tuning:

In [5]:
hsqc_vector = [0, 1, 1]  # Replace with your actual HSQC vector

# Tune the model, save it, and generate a summary
best_model, X_val, Y_val, mean_pred, std_dev_pred = analysis.tune_model(X_noisy, Y, hsqc_vector)

# Make predictions
predictions = best_model.predict(X_val)

# Example: Comparing normalized predictions with actual Y values
for i in range(5):
    print(f"Predicted: {predictions[i]}, Actual: {Y_val[i]}")

Reloading Tuner from tuning_dir\metabolite_tuning_0_1_1\tuner0.json
Search space summary
Default search space size: 10
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 6, 'step': 1, 'sampling': 'linear'}
units_0 (Int)
{'default': 64, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
l2_lambda (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
dropout_rate (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.5, 'step': 0.05, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
units_1 (Int)
{'default': 64, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_2 (Int)
{'default': 64, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
units_3 (Int)
{'default':

  saveable.load_own_variables(weights_store.get(inner_path))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.2612 - mae: 0.5057  
Validation Loss: 1.184056282043457, Validation MAE: 0.5080022215843201
Model saved as model_hsqc_0_1_1.keras in saved_models
Model summary saved as model_summaries\model_summary_model_hsqc_0_1_1.keras.csv
Sample 1 - Predicted Mean: [6.4169510e+01 3.8595462e+00 3.0425385e-02 1.5208347e+01 1.0706774e+01
 1.6648872e+00 4.3416262e+00 1.8878680e-02], Standard Deviation: [0.7759514  0.86551386 0.11955384 0.55404705 0.8135083  0.68452483
 0.43322873 0.09800316]
Sample 2 - Predicted Mean: [9.7873039e+01 1.7049019e-01 1.7444493e-01 7.3877174e-01 1.4755067e-02
 2.6346290e-01 0.0000000e+00 7.6505297e-01], Standard Deviation: [0.91222733 0.39468086 0.28344423 0.33390424 0.13124508 0.34827974
 0.         0.31846523]
Sample 3 - Predicted Mean: [9.2271423e+01 5.1886733e-03 0.0000000e+00 1.7194188e+00 1.9766326e-01
 4.3332912e-03 1.3607365e+00 4.4412503e+00], Standard Deviation: [0.7248941  0.03637074