# MB-Fit tutorial (v20190924)

This notebook will walk you through the multiple possibilities one has to obtain many-body fits for multiple molecules. 



### Import the python library
Remember that in order to import the library without any errors, you need to perform the following operations in the bash terminal from which you are running the notebook. If you didn't do it, please, close the notebook and write in a bash terminal:
```sh
cd HOME/DIRECTORY/OF/mbfit
source install.sh
```
Now the following command should run without any problem.

In [None]:
# This is for testing purposes. Can be ignored.
%load_ext autoreload
%autoreload 2

In [None]:
# The library that will enable the fitting generation and energy calculation
import mbfit
# Some other useful libraries
import os

## Example 2. Two-body TTM-nrg PEF for CO2 - CO2

### 2.1. Define variables, filepaths, and folders to work in

In [None]:
main_dir = os.getcwd()

In [None]:
# The software that will be used to perform all the calculations
#code = "qchem"
code = "psi4"

# The quantum chemistry method we want to use
method = "HF"
#method = "MP2"
#method = "wb97m-v"

# Basis set to use. Must be pre-defined in the software. Custom basis sets not implemented yet.
basis = "STO-3G"

# Use counter-poise correction or not.
cp = False
#cp = True

# Number of threads and memory we would like to use
num_threads = 2
memory = "4GB"

# This is the path where all the log files will be stored.
log_path = "logs"

In [None]:
# Names that will identify the monomers. This is used for identification purposes only.
names = ["CO2","CO2"]

# Number of atoms of each monomer
number_of_atoms = [3,3]

# Charge of each monomer
charges = [0,0]

# Spin multiplicity of each monomer
spin = [1,1]

# Use MB-pol for water (if applicable). 
# If 1 will use the Partridge-Shwenke PEF for water, with the position dependent charges.
use_mbpol = [0,0]

In [None]:
# Symmetry of the molecule
symmetry = ["A1B2", "A1B2"]

# SMILES string
smiles = ["C(O)O", "C(O)O"]

In [None]:
# Settings for monomer
mon_settings = "monomer_settings.ini"

my_settings_file_mon = """
[files]
# Local path directory to write log files in
log_path = """ + log_path + """

[config_generator]
# what library to use for geometry optimization and normal mode generation
code = """ + code + """
# use geometric or linear progression for T and A in config generation, exactly 1 must be True
geometric = False
linear = False

[energy_calculator]
# what library to use for energy calculations
code = """ + code + """

[psi4]
# memory to use when doing a psi4 calculation
memory = """ + memory + """
# number of threads to use when executing a psi4 calculation
num_threads = """ + str(num_threads) + """

[qchem]
# number of threads to use when executing a qchem calculation
num_threads = """ + str(num_threads) + """

[molecule]
# name of fragments, seperated by commas
names = """ + names[0] + """
# number of atoms in each fragment, seperated by commas
fragments = """ + str(number_of_atoms[0]) + """
# charge of each fragment, seperated by commas
charges = """ + str(charges[0]) + """
# spin multiplicity of each fragment, seperated by commas
spins = """ + str(spin[0]) + """
# tag when putting geometries into database
tag = none
# Use or not MB-pol
use_mbpol = """ + str(use_mbpol[0]) + """
# symmetry of each fragment, seperated by commas
symmetry = """ + symmetry[0] + """
SMILES = """ + smiles[0] + """
"""

In [None]:
# Settings for dimer
dim_settings = "dimer_settings.ini"

my_settings_file_dim = """
[files]
# Local path directory to write log files in
log_path = """ + log_path + """

[config_generator]
# what library to use for geometry optimization and normal mode generation
code = """ + code + """
# use geometric or linear progression for T and A in config generation, exactly 1 must be True
geometric = False
linear = False

[energy_calculator]
# what library to use for energy calculations
code = """ + code + """

[psi4]
# memory to use when doing a psi4 calculation
memory = """ + memory + """
# number of threads to use when executing a psi4 calculation
num_threads = """ + str(num_threads) + """

[qchem]
# number of threads to use when executing a qchem calculation
num_threads = """ + str(num_threads) + """

[molecule]
# name of fragments, seperated by commas
names = """ + names[0] + "," + names[1] + """
# number of atoms in each fragment, seperated by commas
fragments = """ + str(number_of_atoms[0]) + """,""" + str(number_of_atoms[1]) + """
# charge of each fragment, seperated by commas
charges = """ + str(charges[0]) + """,""" + str(charges[1]) + """
# spin multiplicity of each fragment, seperated by commas
spins = """ + str(spin[0]) + """,""" + str(spin[1]) + """
# tag when putting geometries into database
tag = none
# Use or not MB-pol
use_mbpol = """ + str(use_mbpol[0]) + """,""" + str(use_mbpol[1]) + """
# symmetry of each fragment, seperated by commas
symmetry = """ + symmetry[0] + """,""" + symmetry[1] + """
SMILES = """ + smiles[0] + """,""" + smiles[1] + """
"""

In [None]:
# Write the files:
ff = open(mon_settings,'w')
ff.write(my_settings_file_mon)
ff.close()

ff = open(dim_settings,'w')
ff.write(my_settings_file_dim)
ff.close()

In [None]:
# XYZ file that contains the unoptimized geommetry of the monomer
unopt_mon = "monomer.xyz"

my_unopt_monomer = """3
unoptimized co2
C   0   0   0
O   1.3   0   0
O   -1.3  0   0
"""

In [None]:
# Write the file:
ff = open(unopt_mon,'w')
ff.write(my_unopt_monomer)
ff.close()

In [None]:
# XYZ file that contains the optimized geommetry of the monomer
opt_mon = "monomer_opt.xyz"

In [None]:
# XYZ file with the configurations of the training set
training_configs = "training_configs.xyz"

# XYZ file with the configurations of the test set
test_configs = "test_configs.xyz"

# XYZ file with the training set that the codes need to perform the fit
# Configurations are the same as training_configs but this file
# has the energies in the comment line
training_set = "training_set.xyz"

# XYZ file with the test set that the codes need to perform the fit
# Configurations are the same as test_configs but this file
# has the energies in the comment line 
test_set = "test_set.xyz"

In [None]:
# PostgreSQL database that stores structures and energies
database_config = "local.ini"
client_name = "pikachu"

In [None]:
my_database_settings = """[database]
host = piggy.pl.ucsd.edu
port = 5432
database = potential_fitting
username = potential_fitting
password = 9t8ARDuN2Wy49VtMOrcJyHtOzyKhkiId
"""

# Write the file. Remember to update the username and password!
ff = open(database_config,'w')
ff.write(my_database_settings)
ff.close()

In [None]:
# Monomers 1 and 2 separated by '_'
molecule_in = "_".join(symmetry)

# Configuration file that contains all the monomer 
# and dimer information. Will be used to generate the 2B codes.
config = "config.ini"

In [None]:
# Directory where ttm-nrg fitting code will be stored
ttmnrg_directory = "ttm-nrg_fit"
ttmnrg_fits_dir = "fits_ttmnrg"

In [None]:
# Number of configurations in the 2b training_set
num_training_configs = 300

# Number of configurations in the 2b test set
num_test_configs = 50

# Maximum energy allowed for distorted monomers (in kcal/mol)
mon_emax = 30.0

# Maximum binding energy allowed
bind_emax = 500.0

# Minimum and maximum distance between the two monomers
min_d_2b = 1.0
max_d_2b = 8.0

# Minimum fraction of the VdW distance that is allowed between any atoms that belong to different monomers
min_inter_d = 0.5

# Seeds to be used in the configuration generation to ensure different
# configurations for training and test
seed_training = 12345
seed_test = 54321

# IDs of the monomers (should be consistent with the 1B id for each)
mon_ids = ["co2","co2"]

# Number of TTM-nrg fits to perform
num_ttm_fits = 5

### 2.2. Geometry optimization

In [None]:
help(mbfit.optimize_geometry)

In [None]:
# Optimize monomer
mbfit.optimize_geometry(mon_settings, unopt_mon, opt_mon, method, basis)

### 2.3. Training and Test Set generation

#### 2.3.1. Generate configurations

In [None]:
help(mbfit.generate_2b_configurations)

In [None]:
# Training Set
mbfit.generate_2b_configurations(dim_settings, opt_mon, opt_mon, num_training_configs, training_configs, 
                                             min_distance = min_d_2b, 
                                             max_distance = max_d_2b, 
                                             min_inter_distance = min_inter_d, 
                                             progression = True, logarithmic = True,
                                             seed = seed_training)

In [None]:
# Test Set
mbfit.generate_2b_configurations(dim_settings, opt_mon, opt_mon, 
                                             num_test_configs, test_configs, 
                                             min_distance = min_d_2b, 
                                             max_distance = max_d_2b, 
                                             min_inter_distance = min_inter_d, 
                                             progression = True, logarithmic = True,
                                             seed = seed_test)

#### 2.3.2. Add configurations to the database

In [None]:
help(mbfit.init_database)

In [None]:
# Training set
mbfit.init_database(dim_settings, database_config, 
                                training_configs, method, basis, 
                                cp, "train_example2_co2", optimized = False)

In [None]:
# Test Set
mbfit.init_database(dim_settings, database_config, 
                                test_configs, method, basis, 
                                cp, "test_example2_co2", optimized = False)

In [None]:
# Add monomer optimized geommetry to database (needed for binding energy)
mbfit.init_database(mon_settings, database_config, opt_mon, 
                                method, basis, cp, "train_example2_co2", optimized = True)
mbfit.init_database(mon_settings, database_config, opt_mon, 
                                method, basis, cp, "test_example2_co2", optimized = True)

#### 2.3.3. Fill the database

In [None]:
help(mbfit.fill_database)

In [None]:
# Option 1: Use the database filler
mbfit.fill_database(dim_settings, database_config, 
                                client_name, 
                                "train_example2_co2", "test_example2_co2")

In [None]:
# Option 2: Write the jobs, run it externally, and retrieve the outputs
help(mbfit.make_jobs)
help(mbfit.read_jobs)

In [None]:
# Generate the jobs
jobs_folder = "jobs"
mbfit.make_jobs(dim_settings, database_config, 
                            client_name,jobs_folder, 
                            "train_example2_co2", "test_example2_co2")

In [None]:
# Run the jobs (can be done externally, supercomputer...)
import glob
if os.path.isdir(jobs_folder):
    os.chdir(jobs_folder)
    job_files = glob.glob('*.py')
    njobs = 0
    for this_job in job_files:
        njobs += 1
        print(njobs,"/",len(job_files))
        os.system("python3 " + this_job)
    os.chdir("../")

In [None]:
# Read the job outputs and store information in the database
if os.path.isdir(jobs_folder):
    mbfit.read_jobs(dim_settings, database_config, jobs_folder)

#### 2.3.4. Training set and Test set generation

In [None]:
help(mbfit.generate_training_set)

In [None]:
# Generate training set
mbfit.generate_training_set(dim_settings, database_config, training_set, 
                                        method, basis, cp, "train_example2_co2", 
                                        e_bind_max = bind_emax, e_mon_max = mon_emax)

# Generate test set
mbfit.generate_training_set(dim_settings, database_config, test_set, 
                                        method, basis, cp, "test_example2_co2", 
                                        e_bind_max = bind_emax, e_mon_max = mon_emax)

### 2.4. Obtain charges, polarizabilities, and C6

In [None]:
help(mbfit.get_system_properties)

In [None]:
chg, pol, c6 = mbfit.get_system_properties(dim_settings, config, geo_paths = [opt_mon, opt_mon])

In [None]:
help(mbfit.write_config_file)

In [None]:
mbfit.write_config_file(dim_settings, config, chg, pol, [opt_mon, opt_mon], c6)

### 2.5. Fitting the TTM-nrg PEF

#### 2.5.1. Obtain and compile the fitting code

In [None]:
help(mbfit.generate_ttmnrg_fitting_code)

In [None]:
mbfit.generate_ttmnrg_fitting_code(dim_settings, config, ttmnrg_directory)

In [None]:
help(mbfit.compile_fit_code)

In [None]:
mbfit.compile_fit_code(dim_settings, ttmnrg_directory)

#### 2.5.2. Perform the fit

In [None]:
help(mbfit.prepare_fits)

In [None]:
mbfit.prepare_fits(dim_settings, ttmnrg_directory, 
                               training_set, ttmnrg_fits_dir, 
                               DE=20, alpha=0.0005, num_fits=num_ttm_fits, 
                               ttm=True, over_ttm=False)

In [None]:
help(mbfit.execute_fits)

In [None]:
mbfit.execute_fits(dim_settings, ttmnrg_fits_dir)

In [None]:
help(mbfit.retrieve_best_fit)

In [None]:
mbfit.retrieve_best_fit(dim_settings, ttmnrg_fits_dir)

In [None]:
help(mbfit.update_config_with_ttm)

In [None]:
mbfit.update_config_with_ttm(dim_settings, ttmnrg_fits_dir, config)

### 2.6. Visualize the fits

In [None]:
help(mbfit.get_correlation_data)

In [None]:
energies = mbfit.get_correlation_data(dim_settings, ttmnrg_directory, ttmnrg_fits_dir,
                                                  test_set, min_energy_plot = -5.0, max_energy_plot = 50.0,
                                                  split_energy = 5.0, ttm=True)

### 2.7. Calculate the energy of configurations

In [None]:
help(mbfit.calculate_model_energies)

In [None]:
energies_from_configs = mbfit.calculate_model_energies(dim_settings, ttmnrg_directory, 
                                                                   ttmnrg_fits_dir, test_configs, ttm = True)

### 2.8. Add potential to MBX

In [None]:
help(mbfit.generate_MBX_files)

In [None]:
mbfit.generate_MBX_files(dim_settings, config, mon_ids, do_ttmnrg = True, MBX_HOME = None, version = "v1")