# Optimize paramters passed to gap_fit with gradient descent method, but just on one data set (training or test)

## Omptimizing RMSE

In [None]:
# Importing everthing we need 
import sys
sys.path.append('../')

from GAP.gapmodels import GAPModel
from GAP.gapmodels import Split
from GAP.gapdescriptors import distance_2b
from GAP.gapplot import QualityPlot
import matplotlib.pyplot as plt
from textwrap import wrap

## fixed parameters 

In [None]:
cutoff = [3]
n_sparse = [15]
delta = [1]
sparse_method = ['UNIFORM']
covariance_type = ['ARD_SE']
sigma_fixed = ['0.0 0.0'] # sigma for hessian and virial stress is not used and hence won't be optimized

## variable parameters

In [None]:
theta = 1
sigma_energy = 1
sigma_force = 1




## Split up the Data 

In [None]:
## Import Hydrogen_MD Data and split into training and test data, use split 80%/20%
data='/Users/simon/simon_ml/tools/hydrogentuple.xyz'
train_percentage = 0.8
split = Split(data, train_percentage)

#Perform the acutal split
train_file = 'train.xyz'
test_file = 'test.xyz'
split.split(train_file, test_file)

## Make function that is used for optimization. RMSE takes the 3 variable parameters as input and outputs and RMSE error

In [None]:
data_file = 'train.xyz'
def RMSE_train(tef):
    theta,sigma_energy,sigma_force = tef[0],tef[1],tef[2]
    data_file = 'train.xyz'
    # Initialize all parameters, fixed and variable
    cutoff = 4
    n_sparse = 20
    delta = 1
    sparse_method = 'UNIFORM'
    covariance_type = 'ARD_SE'
    sigma_fixed = '0.0 0.0' # sigma for hessian and virial stress is not used and hence won't be optimized
    theta = theta
    sigma_energy = sigma_energy
    sigma_force = sigma_force
    sigma = f"{sigma_energy} {sigma_force} {sigma_fixed}"
    descriptor_2b = distance_2b(cutoff=cutoff, covariance_type=covariance_type,delta=delta,theta_uniform=theta,
                                n_sparse=n_sparse, sparse_method=sparse_method,add_species='T')
    
    # Get parameter string
    parameters = descriptor_2b.get_parameter_string()
    
    # Make model
    model = GAPModel()

    # Select filename in which potential should be stored 
    potential=f"GAP.xml"

    # Train model
    model.train(parameters, training_data=data_file,GAP_potential=potential,sigma=sigma,print_output=False)

    # Select filename in which predictions should be stored for prediction data 
    prediction_file= f"quip_2b.xyz"

    # Predict energies on training data
    model.predict(Test_Data=data_file, GAP_potential=potential, QUIP_Prediction=prediction_file,print_output=False)
    
    # Calculate RMSE error
    RMSE = model.energy_RMSE(data_file,prediction_file)/2 + model.force_RMSE(data_file,prediction_file)/6
    

    # Make Plot 
    plot = QualityPlot()
    # Make subplots object, returning axis object which is going to be passed to QualityPlot
    figs, axs = plt.subplots(nrows=1,ncols=2,gridspec_kw={'wspace':1,'hspace':1})

    # Plot predicted energies of training data against real energies of training data
    plot.energies_on_energies(real_values=data_file, predicted_values=prediction_file, 
                            axis=axs[0],
                            title="\n".join(wrap(f"Energy of Training Data"
                            f"theta_uniform = {theta}, sigma_energy = {sigma_energy}",20)))
    
    plot.forces_on_forces(real_values=data_file, predicted_values=prediction_file, 
                        axis=axs[1],
                        title="\n".join(wrap(f"Force of Training Data"
                        f"theta_uniform = {theta}, sigma_force = {sigma_force}",20)))
    # Get real and predicted energies
    #real_energies, predicted_energies = model.getEnergies('train.xyz','quip_2b.xyz')
    
    return RMSE

## Use Steepest decent method to optimize the RMSE function on training data

In [None]:
import scipy.optimize
initial_guess = [1,0.004,0.08]
result = scipy.optimize.minimize(RMSE_train,initial_guess,method='Nelder-Mead',
                                 options={'fatol':10e-5,'maxiter':100,'disp':True})


In [None]:
result

## Minimize RMSE, add up RMSE for training and validation data

In [None]:
data_file = 'train.xyz'
validation_file = 'test.xyz'
def RMSE_train_val(tef):
    theta,sigma_energy,sigma_force = tef[0],tef[1],tef[2]
    data_file = 'train.xyz'
    validation_file = 'test.xyz'
    # Initialize all parameters, fixed and variable
    cutoff = 4
    n_sparse = 20
    delta = 1
    sparse_method = 'UNIFORM'
    covariance_type = 'ARD_SE'
    sigma_fixed = '0.0 0.0' # sigma for hessian and virial stress is not used and hence won't be optimized
    theta = theta
    sigma_energy = sigma_energy
    sigma_force = sigma_force
    sigma = f"{sigma_energy} {sigma_force} {sigma_fixed}"
    descriptor_2b = distance_2b(cutoff=cutoff, covariance_type=covariance_type,delta=delta,theta_uniform=theta,
                                n_sparse=n_sparse, sparse_method=sparse_method,add_species='T')
    
    # Get parameter string
    parameters = descriptor_2b.get_parameter_string()
    
    # Make model
    model = GAPModel()

    # Select filename in which potential should be stored 
    potential=f"GAP.xml"

    # Train model
    model.train(parameters, training_data=data_file,GAP_potential=potential,sigma=sigma,print_output=False)

    # Select filename in which predictions should be stored for prediction data
    prediction_file_tr= f"quip_2btrain.xyz"
    prediction_file_val= f"quip_2bvalidate.xyz"

    # Predict energies on training data
    model.predict(Test_Data=data_file, GAP_potential=potential, QUIP_Prediction=prediction_file_tr,print_output=False)
    
    # Predict energies on validation data
    model.predict(Test_Data=validation_file, GAP_potential=potential, QUIP_Prediction=prediction_file_val,print_output=False)
    
    
    # Calculate RMSE of error
    rmse_energy_train = model.energy_RMSE(data_file,prediction_file_tr)
    rmse_force_train = model.force_RMSE(data_file,prediction_file_tr) 
    rmse_energy_validate = model.energy_RMSE(validation_file,prediction_file_val)
    rmse_force_validate = model.force_RMSE(validation_file,prediction_file_val)
    RMSE = rmse_energy_train/2 + rmse_force_train/6 + rmse_energy_validate/2 + rmse_force_validate/6
    

    # Make Plot 
    plot = QualityPlot()
    # Make subplots object, returning axis object which is going to be passed to QualityPlot
    figs, axs = plt.subplots(nrows=2,ncols=2,gridspec_kw={'wspace':1,'hspace':1})

    # Plot predicted energies of training data against real energies of training data
    plot.energies_on_energies(real_values=data_file, predicted_values=prediction_file_tr, 
                            axis=axs[0,0],
                            title="\n".join(wrap(f"Energy of Training Data "
                            f"theta_uniform = {theta}, sigma_energy = {sigma_energy}",20)))
    
    plot.forces_on_forces(real_values=data_file, predicted_values=prediction_file_tr, 
                        axis=axs[0,1],
                        title="\n".join(wrap(f"Force of Training Data "
                        f"theta_uniform = {theta}, sigma_force = {sigma_force}",20)))
    
    plot.energies_on_energies(real_values=validation_file, predicted_values=prediction_file_val, 
                            axis=axs[1,0],
                            title="\n".join(wrap(f"Energy of Validation Data "
                            f"theta_uniform = {theta}, sigma_energy = {sigma_energy}",20)))
    
    plot.forces_on_forces(real_values=validation_file, predicted_values=prediction_file_val, 
                        axis=axs[1,1],
                        title="\n".join(wrap(f"Force of Validation Data "
                        f"theta_uniform = {theta}, sigma_force = {sigma_force}",20)))
    # Get real and predicted energies
    #real_energies, predicted_energies = model.getEnergies('train.xyz','quip_2b.xyz')
    
    return RMSE

In [None]:
import scipy.optimize
initial_guess = [1,0.004,0.08]
result = scipy.optimize.minimize(RMSE_train_val,initial_guess,method='Nelder-Mead',
                                 options={'fatol':10e-5,'maxiter':100,'disp':True})

In [None]:
result