## Neural Network Isopotopomer Analysis

### Step 1 - Data simulation:

- Simply change the HSQC vector to simulate the dataset for each Metabolite...

In [5]:
import pandas as pd
from metabolabpytools import isotopomerAnalysis
analysis = isotopomerAnalysis.IsotopomerAnalysisNN()

# Define the HSQC vector externally
hsqc_vector = [0, 1, 1] # For 3-carbon metabolites, all carbons participate in HSQC
n_carbons = len(hsqc_vector)

# Example usage
synthetic_distributions = analysis.generate_isotopomer_distributions(n_distributions=10000, n_carbons=n_carbons)  


# Simulate HSQC and GC-MS data for all distributions with the defined HSQC vector
combined_isotopomer_data, combined_hsqc_data, combined_gcms_data = analysis.simulate_hsqc_gcms(synthetic_distributions, hsqc_vector)

# Save the simulation data to a spreadsheet
analysis.save_simulation_data(combined_isotopomer_data, combined_hsqc_data, combined_gcms_data, hsqc_vector)

Data successfully saved to sim_data/sim_011.xlsx


### Step 2 - Data preparation:

-Using [0, 1, 1] as example 

In [5]:
from metabolabpytools import isotopomerAnalysis
analysis = isotopomerAnalysis.IsotopomerAnalysisNN()

# Example usage for HSQC vector [0,1,1]
hsqc_vector = [0, 1, 1]
num_carbons = len(hsqc_vector)
isotopomer_data, hsqc_data, gcms_data = analysis.load_spreadsheet_by_hsqc_vector(hsqc_vector)

all_possible_hsqc_multiplets = analysis.generate_possible_hsqc_multiplets(hsqc_vector)

Y = analysis.collate_y_labels(isotopomer_data, num_carbons)

X_noisy = analysis.collate_x_labels_without_noise(hsqc_data, gcms_data, all_possible_hsqc_multiplets)


# # Now Y contains the isotopomer percentages for each sample, structured for training a neural network
print(Y[7])
print(X_noisy[7])

[66.75361619  8.81435076  0.          0.         13.32781216  0.
  8.95823859  2.1459823 ]
[ 5.9248416   0.          0.          0.         80.39001317  0.
 66.75361619 22.14216292  8.95823859  2.1459823 ]


### Step 3 - Train a Neural Network:

In [6]:
# Train the neural network
model, history = analysis.train_neural_network(X_noisy, Y, epochs=100, batch_size=32)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 82.8759 - mae: 5.0800 - val_loss: 43.1794 - val_mae: 3.3263
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 43.2522 - mae: 3.2169 - val_loss: 40.9224 - val_mae: 3.1834
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 41.2654 - mae: 3.0925 - val_loss: 40.6295 - val_mae: 3.1516
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 41.3648 - mae: 3.0766 - val_loss: 40.2354 - val_mae: 3.0545
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 41.0125 - mae: 3.0616 - val_loss: 40.0134 - val_mae: 3.0348
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 40.1907 - mae: 3.0077 - val_loss: 40.0336 - val_mae: 3.0999
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

### Step 4 - Hyperparamter Tuning:

In [7]:
hsqc_vector = [0, 1, 1]  # Replace with your actual HSQC vector
from metabolabpytools import isotopomerAnalysis
analysis = isotopomerAnalysis.IsotopomerAnalysisNN()


# Tune the model, save it, and generate a summary
best_model, X_val, Y_val, mean_pred, std_dev_pred = analysis.tune_model(X_noisy, Y, hsqc_vector)

# Make predictions
predictions = best_model.predict(X_val)

# Example: Comparing normalized predictions with actual Y values
for i in range(5):
    print(f"Predicted: {predictions[i]}, Actual: {Y_val[i]}")

Trial 1 Complete [00h 02m 09s]
val_loss: 24.612380981445312

Best val_loss So Far: 24.612380981445312
Total elapsed time: 00h 02m 09s
Results summary
Results in tuning_dir\metabolite_tuning_0_1_1
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0 summary
Hyperparameters:
num_layers: 6
units_0: 256
l2_lambda: 0.0005196196720783069
dropout_rate: 0.35
learning_rate: 0.0004731616841237643
units_1: 64
units_2: 64
units_3: 64
units_4: 64
units_5: 64
Score: 24.612380981445312


  saveable.load_own_variables(weights_store.get(inner_path))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 24.9118 - mae: 3.0731 
Validation Loss: 24.953990936279297, Validation MAE: 3.0751802921295166
Model saved as model_hsqc_0_1_1.keras in saved_models
Model summary saved as model_summaries\model_summary_model_hsqc_0_1_1.keras.csv
Sample 1 - Predicted Mean: [38.756763   28.186247    0.77291363  1.3284677  23.250557    2.140741
  4.6960964   0.8683415 ], Standard Deviation: [3.2958903 4.0296903 2.271225  2.2773743 4.7236233 3.0484605 3.6987307
 2.3460479]
Sample 2 - Predicted Mean: [54.86672    5.886317  17.609383   1.9431293  5.4999166 10.570859
  2.8621068  0.7615194], Standard Deviation: [3.7529109 1.7066189 3.5601623 1.6974937 1.1273539 4.3162346 1.9361151
 1.2097173]
Sample 3 - Predicted Mean: [45.95286    7.1281343 23.394268   2.2295096  6.8449    10.910547
  2.4487073  1.0909852], Standard Deviation: [4.242777  2.0748525 4.211551  2.11601   1.2468145 4.8846188 1.9756453
 1.5751593]
Sample 4 - Predicted