# Benchmarking:

In [1]:
import numpy as np
from formatData import loadData
from qml.kernels import gaussian_kernel
from qml.math import cho_solve
from utils import unison_shuffle
from sklearn.model_selection import train_test_split
from gan import GenAdvNetwork
import tensorflow as tf
from utils import random_generator

2023-04-04 19:51:09.471535: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### Comparing the performance of KRR model trained in one trajectory and testing in: 
- another trajectory

### Loading Data

In [2]:
"""
Loading data
Note than we can only specify absolute location of the raw data
"""
molRep2D, energies = loadData(12, "/home/panthibivek/thesis/GAN_pkg/data/traj.xyz")
X_train, X_test, y_train, y_test = train_test_split(molRep2D,energies,
                                   test_size=0.1)

print("Training data size:", X_train.shape)
print("Test data size:", X_test.shape)

input data size: (10000, 78)
output data size: (10000,)
Training data size: (9000, 78)
Test data size: (1000, 78)


### Regression using parameters from Baseline Experiment

In [3]:
"""
Regression
"""
def regression(X : np.array, Y : np.array, sigma : float, lambda_ : float):
    K = gaussian_kernel(X, X, sigma)
    K[np.diag_indices_from(K)] += lambda_
    alpha = cho_solve(K, Y)
    return alpha

"""
Prediction
"""
def prediction(X : np.array, arr_to_predict : np.array, sigma : float, alpha_ : np.array):
    Ks = gaussian_kernel(arr_to_predict, X, sigma)
    return np.dot(Ks, alpha_)

In [4]:
sigma = 150.0
lambda_ = 1e-8
temp_alpha = regression(X_train, y_train, sigma, lambda_)
prediction_arr = prediction(X_train, X_test, sigma, temp_alpha)
mean_abs_errors = np.mean(np.abs(prediction_arr - y_test))
print("Mean Absolute Error : {}".format(mean_abs_errors))

Mean Absolute Error : 0.0060216329802570525


### Loading new Trajectories Dataset

In [5]:
energies_arr = []
energyFilename = "/home/panthibivek/thesis/GAN_pkg/data/MD/traj1_energies.txt"
with open("/home/panthibivek/thesis/GAN_pkg/data/MD/properties/trajectory1/es_energies/ZINDO_verytight.dat", 'r') as f:
    for line in f:
        if line:
            lst = line.strip().split('\t')
            energies_arr.append(float(lst[-1]))
energies_arr = np.array(energies_arr)
np.savetxt(fname=energyFilename, X=energies_arr, delimiter='\n')

In [6]:
"""
Loading data
Note than we can only specify absolute location of the raw data
"""
newTrajMolRep, newTrajEnergies = loadData(max_size=12, 
                                filename="/home/panthibivek/thesis/GAN_pkg/data/MD/trajectory1.xyz", 
                                energyFilename=energyFilename,
                                xyzdirname="/data/Traj1/AllMolecules",
                                coulombMtxFilename="/data/Traj1/lower_coulomb_mtx_array.txt")

input data size: (10001, 78)
output data size: (10001,)


In [7]:
prediction_arr = prediction(X_train, newTrajMolRep, sigma, temp_alpha)
mean_abs_errors = np.mean(np.abs(prediction_arr - newTrajEnergies))
print("Mean Absolute Error : {}".format(mean_abs_errors))

Mean Absolute Error : 0.8422718782240015


#### Comparing the performance of KRR model trained in one trajectory and samples from GAN model and testing in:
- same trajectory samples

In [8]:
"""
Loading data
Note than we can only specify absolute location of the raw data
"""
ganTrajMolRep, ganTrajEnergies = loadData(max_size=12, 
                                filename="demo_name.xyz", 
                                energyFilename="/home/panthibivek/thesis/GAN_pkg/config_files_orca/calculate_energies.txt",
                                xyzdirname="/config_files_orca/MoleculesMappedFromSampleSpace",
                                coulombMtxFilename="/config_files_orca/gan_lower_coulomb_mtx_array.txt")

input data size: (20000, 78)
output data size: (20000,)


In [9]:
sigma = 150.0
lambda_ = 1e-8
new_X_train = np.concatenate((X_train, ganTrajMolRep), axis=0)
new_Y_train = np.concatenate((y_train, ganTrajEnergies))

temp_alpha = regression(new_X_train, new_Y_train, sigma, lambda_)
prediction_arr = prediction(new_X_train, X_test, sigma, temp_alpha)
mean_abs_errors = np.mean(np.abs(prediction_arr - y_test))
print("Mean Absolute Error : {}".format(mean_abs_errors))

Mean Absolute Error : 0.095650597553392


#### Comparing the performance of KRR model trained in one trajectory and samples from GAN model and testing in:
- different trajectory samples

In [10]:
prediction_arr = prediction(new_X_train, newTrajMolRep, sigma, temp_alpha)
mean_abs_errors = np.mean(np.abs(prediction_arr - newTrajEnergies))
print("Mean Absolute Error : {}".format(mean_abs_errors))

Mean Absolute Error : 0.6868081691684245
