# Mapping 1D representation of a Coulomb Matrix to XYZ coordinates using Bayesian Optimization

In [1]:
from generatexyz import Generatexyz
import numpy as np
from gan import load_weight
from gan import GenAdvNetwork
import tensorflow as tf
from utils import random_generator
from reformatToXyz import format_xyz_samples
import os
import pathlib
from reformatToXyz import MapCoulomb1dToXYZ
from formatData import loadData
from sklearn.model_selection import train_test_split
from coulombToTraj import getFlattenedXyz

2023-04-03 19:47:51.349153: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Generating the Sample Space

In [2]:
# benzene_xyz = [0.000000000000000000e+00,1.397000000000000020e+00,0.000000000000000000e+00,
#                1.209799999999999986e+00,6.985000000000000098e-01,0.000000000000000000e+00,
#                1.209799999999999986e+00,-6.985000000000000098e-01,0.000000000000000000e+00,
#                0.000000000000000000e+00,-1.397000000000000020e+00,0.000000000000000000e+00,
#                -1.209799999999999986e+00,-6.985000000000000098e-01,0.000000000000000000e+00,
#                -1.209799999999999986e+00,6.985000000000000098e-01,0.000000000000000000e+00,
#                0.000000000000000000e+00,2.480999999999999872e+00,0.000000000000000000e+00,
#                2.148600000000000065e+00,1.240499999999999936e+00,0.000000000000000000e+00,
#                2.148600000000000065e+00,-1.240499999999999936e+00,0.000000000000000000e+00,
#                0.000000000000000000e+00,-2.480999999999999872e+00,0.000000000000000000e+00,
#                -2.148600000000000065e+00,-1.240499999999999936e+00,0.000000000000000000e+00,
#                -2.148600000000000065e+00,1.240499999999999936e+00,0.000000000000000000e+00]
# benzene_xyz = np.array(benzene_xyz)
# gen_obj = Generatexyz(1000000, 1000, 12)
# gen_obj.generate_samples(benzene_xyz)
# gen_obj.sorting_by_coulomb_matrix()

### Using GAN to generate 1D Coulomb Matrix Representation

In [3]:
latent_dim_ = 78
batch_size_ = 32
weights_path = "/home/panthibivek/thesis/GAN_pkg/runs/train/exp2/weights/"
GanModel = GenAdvNetwork(latent_dim=latent_dim_, batch_size=batch_size_)
GanModel.compile(
    generator_opt=tf.keras.optimizers.Adam(learning_rate=0.001),
    discriminator_opt=tf.keras.optimizers.Adam(learning_rate=0.001),
    disc_loss=tf.keras.losses.BinaryCrossentropy(),
    gen_loss=tf.keras.losses.MAE
)

random_data = random_generator((20000, latent_dim_))

GanModel.generator(random_data)
GanModel.load_weights(weights_path) 
generated_output = GanModel.generator(random_data)
arr = generated_output.numpy()
arr_new = arr.reshape((20000,78))

Model: "generator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 13)                1027      
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 13)                0         
                                                                 
 reshape (Reshape)           (None, 13, 1)             0         
                                                                 
 conv1d_transpose (Conv1DTra  (None, 39, 128)          640       
 nspose)                                                         
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 39, 128)           0         
                                                                 
 batch_normalization (BatchN  (None, 39, 128)          512       
 ormalization)                                           

2023-04-03 19:47:52.682965: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_executor.cc:843] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-03 19:47:52.683052: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_executor.cc:843] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-03 19:47:52.739138: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_executor.cc:843] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-03 19:47:52.739206: I tensorflow/compiler/xla/stream_executor/rocm/rocm_gpu_executor.cc:843] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-04-03 19:47:52.739247: I tensorflow/compiler/xla/stream_executo

Model: "discriminator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 26, 64)            320       
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 26, 64)            0         
                                                                 
 conv1d_2 (Conv1D)           (None, 13, 128)           32896     
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 13, 128)           0         
                                                                 
 batch_normalization_2 (Batc  (None, 13, 128)          512       
 hNormalization)                                                 
                                                                 
 flatten (Flatten)           (None, 1664)              0         
                                                     

2023-04-03 19:47:52.959108: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.959744: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.960587: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.961040: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.969742: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.970292: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.993378: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:52.993968: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:53.009899: I tensorflow/core/common_runtime/gpu_fusion_pass.cc:507] ROCm Fusion is enabled.
2023-04-03 19:47:53

### Using KDTree to find the closest xyz corordiants in the Sample Space
#### Time Complexity:
For generating KDtree : O(N log(N)) <br>
For finding each sample : O(log(N)) <br> <br>
Note: The defination of class MapCoulomb1dToXYZ is in reformatTOXyz.py

In [None]:
parent_dir_coulomb1D_to_xyz = pathlib.Path(os.path.abspath(os.path.dirname('__file__')) + "/data/MoleculesMappedFromSampleSpace")
parent_dir_coulomb1D_to_xyz.mkdir(parents=True, exist_ok=True)

mapping_obj = MapCoulomb1dToXYZ(abs_path_xyz_dirname=str(parent_dir_coulomb1D_to_xyz), 
                                sorted_xyz_file="/home/panthibivek/thesis/GAN_pkg/data/exp/totalSortedTraj.txt", 
                                sorted_coulomb1D_file="/home/panthibivek/thesis/GAN_pkg/data/exp/totalSortedCoulomb1D.txt")
get_xyz_small_batch = mapping_obj.generateXYZ(coulomb1D_arr=arr_new)