# Mapping 1D representation of a Coulomb Matrix to XYZ coordinates using Bayesian Optimization

In [None]:
from generatexyz import Generatexyz
import numpy as np
from gan import load_weight
from gan import GenAdvNetwork
import tensorflow as tf
from utils import random_generator
from reformatToXyz import format_xyz_samples
import os
import pathlib
from reformatToXyz import MapCoulomb1dToXYZ
from formatData import loadData
from sklearn.model_selection import train_test_split
from coulombToTraj import getFlattenedXyz

### Generating the Sample Space

In [2]:
# benzene_xyz = [0.000000000000000000e+00,1.397000000000000020e+00,0.000000000000000000e+00,
#                1.209799999999999986e+00,6.985000000000000098e-01,0.000000000000000000e+00,
#                1.209799999999999986e+00,-6.985000000000000098e-01,0.000000000000000000e+00,
#                0.000000000000000000e+00,-1.397000000000000020e+00,0.000000000000000000e+00,
#                -1.209799999999999986e+00,-6.985000000000000098e-01,0.000000000000000000e+00,
#                -1.209799999999999986e+00,6.985000000000000098e-01,0.000000000000000000e+00,
#                0.000000000000000000e+00,2.480999999999999872e+00,0.000000000000000000e+00,
#                2.148600000000000065e+00,1.240499999999999936e+00,0.000000000000000000e+00,
#                2.148600000000000065e+00,-1.240499999999999936e+00,0.000000000000000000e+00,
#                0.000000000000000000e+00,-2.480999999999999872e+00,0.000000000000000000e+00,
#                -2.148600000000000065e+00,-1.240499999999999936e+00,0.000000000000000000e+00,
#                -2.148600000000000065e+00,1.240499999999999936e+00,0.000000000000000000e+00]
# benzene_xyz = np.array(benzene_xyz)
# gen_obj = Generatexyz(1000000, 1000, 12)
# gen_obj.generate_samples(benzene_xyz)
# gen_obj.sorting_by_coulomb_matrix()

### Using GAN to generate 1D Coulomb Matrix Representation

In [4]:
latent_dim_ = 78
batch_size_ = 32
weights_path = "/home/panthibivek/thesis/GAN_pkg/runs/train/exp2/weights/"
GanModel = GenAdvNetwork(latent_dim=latent_dim_, batch_size=batch_size_)
GanModel.compile(
    generator_opt=tf.keras.optimizers.Adam(learning_rate=0.001),
    discriminator_opt=tf.keras.optimizers.Adam(learning_rate=0.001),
    disc_loss=tf.keras.losses.BinaryCrossentropy(),
    gen_loss=tf.keras.losses.MAE
)

random_data = random_generator((1000, latent_dim_))

GanModel.generator(random_data)
GanModel.load_weights(weights_path) 
generated_output = GanModel.generator(random_data)
arr = generated_output.numpy()
arr_new = arr.reshape((1000,78))

Model: "generator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 13)                1027      
                                                                 
 leaky_re_lu_5 (LeakyReLU)   (None, 13)                0         
                                                                 
 reshape_1 (Reshape)         (None, 13, 1)             0         
                                                                 
 conv1d_transpose_2 (Conv1DT  (None, 39, 128)          640       
 ranspose)                                                       
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 39, 128)           0         
                                                                 
 batch_normalization_3 (Batc  (None, 39, 128)          512       
 hNormalization)                                         

### Using KDTree to find the closest xyz corordiants in the Sample Space
#### Time Complexity:
For generating KDtree : O(N log(N)) <br>
For finding each sample : O(log(N)) <br> <br>
Note: The defination of class MapCoulomb1dToXYZ is in reformatTOXyz.py

In [5]:
parent_dir_coulomb1D_to_xyz = pathlib.Path(os.path.abspath(os.path.dirname('__file__')) + "/data/MoleculesMappedFromSampleSpace")
parent_dir_coulomb1D_to_xyz.mkdir(parents=True, exist_ok=True)

mapping_obj = MapCoulomb1dToXYZ(abs_path_xyz_dirname=str(parent_dir_coulomb1D_to_xyz), 
                                sorted_xyz_file="/home/panthibivek/thesis/GAN_pkg/data/exp/totalSortedTraj.txt", 
                                sorted_coulomb1D_file="/home/panthibivek/thesis/GAN_pkg/data/exp/totalSortedCoulomb1D.txt")
get_xyz_small_batch = mapping_obj.generateXYZ(coulomb1D_arr=arr_new)