# Storage of 'good' descriptor parameter configurations for hydrogenMD

## Distance 2b parameters

### There is substantial differences when varying the covariance_type paramter, so there will be differing list for each covariance_type


In [None]:
# Parameter Lists for covariance_type = pp :: piecewise polynomial no need for theta_uniform
cutoff_pp = [4,5]
n_sparse_pp = [15,20]
delta_pp = [1]
theta_uniform_pp = [1.25,1.5]
default_sigma_pp = ['0.0000001 0.0000001 0.0 0.0','0.00000001 0.00000001 0.0 0.0']
sparse_method_pp = ['uniform','cur_points']
covariance_type_pp = ['PP'] # , 'Gaussian', 'BOND_REAL_SPACE','dot_product']


In [None]:
# Parameter Lists for covariance_type = ard_se :: squared exponential
cutoff_ardse = [4,5]
n_sparse_ardse = [15,20,25]
delta_ardse = [1]
theta_uniform_ardse = [1,1.5,1.75]
default_sigma_ardse = ['0.0000001 0.0000001 0.0 0.0','0.00000001 0.00000001 0.0 0.0']
sparse_method_ardse = ['uniform']
covariance_type_ardse = ['ARD_SE'] # , 'Gaussian', 'BOND_REAL_SPACE','dot_product']


In [None]:
# Parameter Lists for covariance_type = gaussian  :: Gaussian
cutoff_gauss = [5]
n_sparse_gauss = [30]
delta_gauss = [1]
theta_uniform_gauss = [10]
default_sigma_gauss = ['0.0 0.0 0.0 0.0']
sparse_method_gauss = ['uniform']
covariance_type_gauss = ['Gaussian'] #, 'BOND_REAL_SPACE','dot_product']

## Simple test run of the 'good' parameters

In [None]:
# Importing everthing we need 
import sys
sys.path.append('../')

from GAP.gapmodels import GAPModel
from GAP.gapmodels import Split
from GAP.gapdescriptors import distance_2b
from GAP.gapplot import QualityPlot
import matplotlib.pyplot as plt
from textwrap import wrap

In [None]:
## Import Hydrogen_MD Data and split into training and test data, use split 80%/20%
data='/Users/simon/simon_ml/GAP/hydrogen_md.xyz'
train_percentage = 0.8
split = Split(data, train_percentage)

#Perform the acutal split
train_file = 'train.xyz'
test_file = 'test.xyz'
split.split(train_file, test_file)

# Get the splitted data
train,test = split.get_splitted_data()

# Get the names of the data files containing the splitted data, just for completeness purposes. These should be 
# the same as the passed files at the split.split() method.
train_file, test_file = split.get_splitted_data_files()

## Iterate over list of configurations and perform training and fitting. Then see the quality of the fit using  gapplot.QualityPlot.

### Pythonic way of getting all list combinations - set the paramter-lists to created paramters

In [None]:
import itertools
li = [cutoff,covariance_type,delta, theta_uniform, n_sparse, sparse_method, default_sigma]
comb = list(itertools.product(*li))
print(comb)
print(len(comb))

### Perform iteration

In [None]:
for i,(cut,cov,delta,theta,nsparse,sparse,sigma) in enumerate(comb):
    descriptor_2b = distance_2b(cutoff=cut, covariance_type=cov,delta=delta,theta_uniform=theta,
                                n_sparse=nsparse, sparse_method=sparse,add_species='T')
    # Make model
    model = GAPModel()
    
    # Select filename in which potential should be stored 
    potential=f"GAP{i}.xml"
    
    # Get descriptor parameter string
    parameters = descriptor_2b.get_parameter_string()
    # Train model
    model.train(parameters, training_data=train_file,GAP_potential=potential,sigma=sigma,print_output=True)
    
    # Select filename in which predictions should be stored for train data 
    prediction_train = f"quip_2b_train{i}.xyz"
    
    # Predict energies on training data
    model.predict(Test_Data=train_file, GAP_potential=potential, QUIP_Prediction=prediction_train,print_output=True)
    
    # Select filename in which predictions should be stored for test data 
    prediction_test = f"quip_2b_test{i}.xyz"

    # Predict energies on test data
    model.predict(Test_Data=test_file, GAP_potential=potential, QUIP_Prediction=prediction_test)
    
    # Plot ennergies to see quality of the fit
    plot = QualityPlot()
    
    # Make subplots object, returning axis object which is going to be passed to QualityPlot
    figs, axs = plt.subplots(nrows=1,ncols=2,gridspec_kw={'wspace':1,'hspace':1})
    
    # Plot predicted energies of training data against real energies of training data
    plot.energies_on_energies(real_values=train_file, predicted_values=prediction_train, 
                            axis=axs[0],
                            title="\n".join(wrap(f"Energy of Training Data, cutoff = {cut},"
                            f"covariance_type = {cov}, delta = {delta},"
                            f"theta_uniform = {theta}, n_sparse = {nsparse} "
                            f"sparse_method = {sparse}, default_sigma = {sigma}",20)))
    
    # Plot predicted energies of test data against real energies of test data
    plot.energies_on_energies(real_values=test_file, predicted_values=prediction_test, 
                            axis=axs[1], 
                            title="\n".join(wrap(f"Energy of Test Data, cutoff = {cut},"
                            f"covariance_type = {cov}, delta = {delta},"
                            f"theta_uniform = {theta}, n_sparse = {nsparse} "
                            f"sparse_method = {sparse}, default_sigma = {sigma}",20)))
    
    
   
    
    

In [None]:
 for i,(cut,cov,delta,theta,nsparse,sparse,sigma) in enumerate(comb):
    # Plot ennergies to see quality of the fit
    plot = QualityPlot()
    
    # Make subplots object, returning axis object which is going to be passed to QualityPlot
    figs, axs = plt.subplots(nrows=1,ncols=2,gridspec_kw={'wspace':1,'hspace':1})
    
    # Plot predicted energies of training data against real energies of training data
    plot.energies_on_energies(real_values=train_file, predicted_values=f"quip_2b_train{i}.xyz", 
                            axis=axs[0],
                            title="\n".join(wrap(f"Energy of Training Data, cutoff = {cut},"
                            f"covariance_type = {cov}, delta = {delta},"
                            f"theta_uniform = {theta}, n_sparse = {nsparse} "
                            f"sparse_method = {sparse}, default_sigma = {sigma}",20)))
    
    # Plot predicted energies of test data against real energies of test data
    plot.energies_on_energies(real_values=test_file, predicted_values=f"quip_2b_test{i}.xyz", 
                            axis=axs[1], 
                            title="\n".join(wrap(f"Energy of Test Data, cutoff = {cut},"
                            f"covariance_type = {cov}, delta = {delta},"
                            f"theta_uniform = {theta}, n_sparse = {nsparse} "
                            f"sparse_method = {sparse}, default_sigma = {sigma}",20)))
    from matplotlib.backends.backend_pdf import PdfPages
    with PdfPages(f"plot_test{i}.pdf") as pdf:
        pdf.savefig()