# MB-Fit tutorial (v20190924)

This notebook will walk you through the multiple possibilities one has to obtain many-body fits for multiple molecules. 



### Import the python library
Remember that in order to import the library without any errors, you need to perform the following operations in the bash terminal from which you are running the notebook. If you didn't do it, please, close the notebook and write in a bash terminal:
```sh
cd HOME/DIRECTORY/OF/mbfit
source install.sh
```
Now the following command should run without any problem.

In [None]:
# This is for testing purposes. Can be ignored.
%load_ext autoreload
%autoreload 2

In [None]:
# The library that will enable the fitting generation and energy calculation
import mbfit
# Some other useful libraries
import os

## Example 5. One-body MB-nrg PEF for NH4+

### 5.1. Define variables, filepaths, and folders to work in

In [None]:
main_dir = os.getcwd()

In [None]:
# The software that will be used to perform all the calculations
#code = "qchem"
code = "psi4"

# The quantum chemistry method we want to use
method = "HF"
#method = "MP2"
#method = "wb97m-v"

# Basis set to use. Must be pre-defined in the software. Custom basis sets not implemented yet.
basis = "STO-3G"

# Use counter-poise correction or not.
cp = False
#cp = True

# Number of threads and memory we would like to use
num_threads = 2
memory = "4GB"

# This is the path where all the log files will be stored.
log_path = "logs"

In [None]:
# Names that will identify the monomers. This is used for identification purposes only.
names = ["NH4+"]

# Number of atoms of each monomer
number_of_atoms = [5]

# Charge of each monomer
charges = [1]

# Spin multiplicity of each monomer
spin = [1]

# Use MB-pol for water (if applicable). 
# If 1 will use the Partridge-Shwenke PEF for water, with the position dependent charges.
use_mbpol = [0]

In [None]:
# Symmetry of the molecule
symmetry = ["A1B4"]

# SMILES string
smiles = ["N123H.H1.H2.H3"]

In [None]:
# Settings for monomer
mon_settings = "monomer_settings.ini"

my_settings_file = """
[files]
# Local path directory to write log files in
log_path = """ + log_path + """

[config_generator]
# what library to use for geometry optimization and normal mode generation
code = """ + code + """
# use geometric or linear progression for T and A in config generation, exactly 1 must be True
geometric = False
linear = False

[energy_calculator]
# what library to use for energy calculations
code = """ + code + """

[psi4]
# memory to use when doing a psi4 calculation
memory = """ + memory + """
# number of threads to use when executing a psi4 calculation
num_threads = """ + str(num_threads) + """

[qchem]
# number of threads to use when executing a qchem calculation
num_threads = """ + str(num_threads) + """

[molecule]
# name of fragments, seperated by commas
names = """ + names[0] + """
# number of atoms in each fragment, seperated by commas
fragments = """ + str(number_of_atoms[0]) + """
# charge of each fragment, seperated by commas
charges = """ + str(charges[0]) + """
# spin multiplicity of each fragment, seperated by commas
spins = """ + str(spin[0]) + """
# tag when putting geometries into database
tag = none
# Use or not MB-pol
use_mbpol = """ + str(use_mbpol[0]) + """
# symmetry of each fragment, seperated by commas
symmetry = """ + symmetry[0] + """
SMILES = """ + smiles[0] + """
"""

In [None]:
# Write the file:
ff = open(mon_settings,'w')
ff.write(my_settings_file)
ff.close()

In [None]:
# XYZ file that contains the unoptimized geommetry of monomer 1
unopt_mon = "monomer.xyz"

my_unopt_monomer = """5
unoptimized nh4+
N    0      0      0
H    0.20  -0.28  -0.11
H   -0.55  -0.81   0.36
H   -0.38   0.30  -0.92
H   -0.07   0.80   0.68
"""

In [None]:
# Write the file:
ff = open(unopt_mon,'w')
ff.write(my_unopt_monomer)
ff.close()

In [None]:
# XYZ file that contains the optimized geommetry of monomer 1
opt_mon = "monomer_opt.xyz"

# File where normal modes of monomer 1 will be outputed
normal_modes_mon = "monomer_normal_modes.dat"

In [None]:
# XYZ file with the configurations of the training set
training_configs = "training_configs.xyz"

# XYZ file with the configurations of the test set
test_configs = "test_configs.xyz"

# XYZ file with the training set that the codes need to perform the fit
# Configurations are the same as training_configs but this file
# has the energies in the comment line
training_set = "training_set.xyz"

# XYZ file with the test set that the codes need to perform the fit
# Configurations are the same as test_configs but this file
# has the energies in the comment line 
test_set = "test_set.xyz"

In [None]:
# PostgreSQL database that stores structures and energies
database_config = "local.ini"
client_name = "motzu the survivor"

In [None]:
my_database_settings = """[database]
host = piggy.pl.ucsd.edu
port = 5432
database = potential_fitting
username = potential_fitting
password = 9t8ARDuN2Wy49VtMOrcJyHtOzyKhkiId
"""

# Write the file. Remember to update the username and password!
ff = open(database_config,'w')
ff.write(my_database_settings)
ff.close()

In [None]:
# Input file for the polynomial generation
poly_in = "poly.in"

# Monomers 1 and 2 separated by '_'
molecule_in = symmetry[0]

# Directory where the polynomials will be generated
poly_directory = "polynomial_generation"

# Configuration file that contains all the monomer 
# and dimer information. Will be used to generate the 2B codes.
config = "config.ini"

# Degree of the polynomials
polynomial_order = 2

In [None]:
# Directory where mb-nrg fitting code will be stored
mbnrg_directory = "mb-nrg_fit"
mbnrg_fit_path = "fits_mbnrg"

In [None]:
# Number of configurations in the 1b training_set
num_training_configs = 500

# Number of configurations in the 1b test set
num_test_configs = 100

# Maximum energy allowed for distorted monomers (in kcal/mol)
mon_emax = 100.0

# Maximum binding energy allowed
bind_emax = 500.0

# Seeds to be used in the configuration generation to ensure different
# configurations for training and test
seed_training = 12345
seed_test = 54321

# IDs of the monomers (should be consistent with the 1B id for each)
mon_ids = ["nh4+"]

# Number of TTM-nrg fits to perform
num_mb_fits = 5

### 5.2. Generate polynomials

#### 5.2.1. Generate polynomial input file

In [None]:
help(mbfit.generate_poly_input)

In [None]:
mbfit.generate_poly_input(mon_settings, molecule_in, poly_in)

#### 5.2.2. Generate maple input files

In [None]:
help(mbfit.generate_polynomials)

In [None]:
mbfit.generate_polynomials(mon_settings, poly_in, polynomial_order, poly_directory, generate_direct_gradients=False)

#### 5.2.3. Optimize the polynomial evaluation

In [None]:
help(mbfit.execute_maple)

In [None]:
mbfit.execute_maple(mon_settings, poly_directory)

### 5.3. Geometry optimization and normal mode calculation

#### 5.3.1. Monomers

In [None]:
help(mbfit.optimize_geometry)

In [None]:
# Optimize monomer
mbfit.optimize_geometry(mon_settings, unopt_mon, opt_mon, method, basis)

In [None]:
help(mbfit.generate_normal_modes)

In [None]:
# Get its normal modes
mbfit.generate_normal_modes(mon_settings, opt_mon,normal_modes_mon, method, basis)

### 5.4. Training and test set generation

#### 5.4.1. Generate configurations 

In [None]:
help(mbfit.generate_normal_mode_configurations)

In [None]:
# Get some for the training set
mbfit.generate_normal_mode_configurations(mon_settings, opt_mon, 
                                                      normal_modes_mon, training_configs, 
                                                      number_of_configs=num_training_configs, 
                                                      seed=seed_training, classical=True)

In [None]:
# And some for the test set
mbfit.generate_normal_mode_configurations(mon_settings, opt_mon, 
                                                      normal_modes_mon, test_configs, 
                                                      number_of_configs=num_test_configs, 
                                                      seed=seed_test, classical=True)

#### 5.4.2. Add configurations to the database

In [None]:
help(mbfit.init_database)

In [None]:
# Add dimer training set configurations
mbfit.init_database(mon_settings, database_config, training_configs, method, basis, cp, "train_example5_nh4+", optimized = False)

# Add monomer 1 optimized geommetry to database (needed for binding energy)
mbfit.init_database(mon_settings, database_config, opt_mon, method, basis, cp, "train_example5_nh4+", optimized = True)

In [None]:
# Add dimer training set configurations
mbfit.init_database(mon_settings, database_config, test_configs, method, basis, cp, "test_example5_nh4+", optimized = False)

# Add monomer 1 optimized geommetry to database (needed for binding energy)
mbfit.init_database(mon_settings, database_config, opt_mon, method, basis, cp, "test_example5_nh4+", optimized = True)

#### 5.4.3. Calculate energy

In [None]:
help(mbfit.fill_database)

In [None]:
mbfit.fill_database(mon_settings, database_config, client_name, "train_example5_nh4+", "test_example5_nh4+", calculation_count = None)

#### 5.4.4. Training set and Test set generation

In [None]:
help(mbfit.generate_training_set)

In [None]:
# Generate training set
mbfit.generate_training_set(mon_settings, database_config, training_set, method, basis, cp, "train_example5_nh4+", e_bind_max = bind_emax, e_mon_max = mon_emax)

# Generate test set
mbfit.generate_training_set(mon_settings, database_config, test_set, method, basis, cp, "test_example5_nh4+", e_bind_max = bind_emax, e_mon_max = mon_emax)

### 5.5. Obtain charges, polarizabilities, and C6

In [None]:
help(mbfit.get_system_properties)

In [None]:
chg, pol, c6 = mbfit.get_system_properties(mon_settings, config, geo_paths = [opt_mon])

In [None]:
help(mbfit.write_config_file)

In [None]:
mbfit.write_config_file(mon_settings, config, chg, pol, [opt_mon], c6)

### 5.6. MB-nrg fit

#### 5.6.1. Obtain and compile the fitting code

In [None]:
help(mbfit.generate_mbnrg_fitting_code)

In [None]:
mbfit.generate_mbnrg_fitting_code(mon_settings, config, poly_in, poly_directory, polynomial_order, mbnrg_directory, use_direct=False)

In [None]:
help(mbfit.compile_fit_code)

In [None]:
mbfit.compile_fit_code(mon_settings, mbnrg_directory)

### 5.6.2. Perform the fit

In [None]:
help(mbfit.prepare_fits)

In [None]:
mbfit.prepare_fits(mon_settings, mbnrg_directory, 
                               training_set, mbnrg_fit_path, 
                               DE=20, alpha=0.0005, num_fits=num_mb_fits, 
                               ttm=False, over_ttm=False)

In [None]:
help(mbfit.execute_fits)

In [None]:
mbfit.execute_fits(mon_settings,mbnrg_fit_path)

In [None]:
help(mbfit.retrieve_best_fit)

In [None]:
mbfit.retrieve_best_fit(mon_settings, mbnrg_fit_path, fitted_nc_path = "mbnrg.nc")

### 5.7. Visualize the results

In [None]:
help(mbfit.get_correlation_data)

In [None]:
energies = mbfit.get_correlation_data(mon_settings, mbnrg_directory, mbnrg_fit_path,
                                       test_set, split_energy = 5.0)

### 5.8 Add files to MBX

In [None]:
help(mbfit.generate_MBX_files)

In [None]:
mbfit.generate_MBX_files(mon_settings, config, mon_ids, 
                                     do_ttmnrg=False, mbnrg_fits_path=mbnrg_fit_path,  
                                     MBX_HOME = None, version = "v1")