In [1]:
import os
import numpy as np
from tools.general_utils import work_json
from pyLAMMPS import LAMMPS_input, get_molecule_coordinates
from tools.system_setup import write_decoupling_ff, get_initial_conditions, create_composition_matrix, calculate_molecular_weight

# System setup

This notebook sets up the systems needed to compute the vapor liquid equilibrium for a specified mixture using the solvation free energy. The solvation free energy is computed using the decoupling approach and LAMMPS. For every component, the self-solvation free energy, as well as the solvation free energy in several mixture compositions is needed.

Essentially, the workflow can be summarized as follows:
1. Get intial molecule coordinates using SMILES and PubChem and provide a graph representation.
2. Use PLAYMOL to construct a system of any mixture.
3. Use the moleculegraph software and pyLAMMPS tools to generate LAMMPS data and input files via jinja2 templates.

## 1. Define general settings ##

1. Define paths to force field toml (in a moleculegraph understandable format), as well as to all jinja2 templates.
2. Define name, SMILES, and graph strings of the molecules under investigation. (further examples on constructing molecule graphs available at https://github.com/maxfleck/moleculegraph)
3. Define free energy settings (coupling lambdas for each species for each interaction type (van der Waals or Coulomb))

In [2]:
# 1: Define path to toml force field, SAFT files and jinja2 templates. Also define a databank file, containing all important information of the system
# Path to force field toml
force_field_path     = "input_files/force-fields/forcefield_model_fluid.toml"

# Path to SAFT related files
SAFT_parameter_file  = "input_files/SAFT/model_fluid.json"# "input_files/SAFT/SI_pcp-saft_parameters.json"
SAFT_binary_file     = "input_files/SAFT/binary_records_kij.json"

# Path to xyz template
template_xyz         = "input_files/templates/template_write_xyz.xyz"

# Path to playmol templates
playmol_force_field_template = "input_files/templates/template_playmol_forcefield.playmol"
playmol_input_template = "input_files/templates/template_playmol_input.mol"

# Path to LAMMPS templates
LAMMPS_data_template  = "input_files/templates/template_lammps_data.data"
LAMMPS_input_template = "input_files/templates/template_lammps_free_energy_lj.in"

# Path to mixture databank and definition of dataset for this mixture
databank_json          = "input_files/mixture_dataset.json"
mixture_dataset       = {}

# 2: Names, SMILES, and graphs of molecules (further examples on constructing molecule graphs available at https://github.com/maxfleck/moleculegraph)
# Define name of the mixture
system_name           = "mixture_LJ1_LJ2"

# Define unique condition of the mixture (e.g: a constant temperature or a constant pressure, as well as the compositions of the N-1 components at each statepoint. )
# Also specify the number of molecules that should be simulated in total and if the molecules posses partial charges.
system_key            = "90"
temperature           = 89.88
pressure              = None
compositions          = [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ]
total_no_molecules    = 900

# Define every component in the mixture 
molecule_name1        = "LJ1"
molecule_graph1       = "[LJ1]"
molecule_smiles1      = "[LJ1]"
molecule_charge1      = False

molecule_name2        = "LJ2"
molecule_graph2       = "[LJ2]"
molecule_smiles2      = "[LJ2]"
molecule_charge2      = False

# Define the whole mixture list
molecule_names_list   = [ molecule_name1, molecule_name2 ]
molecule_graphs_list  = [ molecule_graph1, molecule_graph2 ]
molecule_smiles_list  = [ molecule_smiles1, molecule_smiles2 ]
molecule_charges_list = [ molecule_charge1, molecule_charge2 ]
molecule_weights_list = [ np.round( calculate_molecular_weight(molecule_smiles) / 1000, 6 ) for molecule_smiles in molecule_smiles_list ]

# ********************* To do: fix that composition matrix is not only for 2 components *********************
# Define composition matrix [ [x1=[0.0, ... 1.0], x2, x3, ...], [x1, x2=[0.0, ..., 1.0], x3, ... ], ... ]
# This matrix contain per element a sublist with the concentration entries of all components. On the diagonal are always the 
# concentrations of the component starting from 0.0 ranging to 1.0
compositions          = create_composition_matrix( len(molecule_names_list), compositions )

# 3: Define free energy settings (coupling lambdas for each species for each interaction type (van der Waals or Coulomb))
# Define coupling pair style for van der Waals and for Coulomb interactions
rcut                  = 14
pair_style_coupling_vdw     = f"hybrid/overlay lj/cut {rcut} lj/cut/soft 1.0 0.5 {rcut}"
pair_style_coupling_coulomb = f"coul/long {rcut} coul/cut/soft 1.0 0.0 {rcut}"

# Define special bonds for 1-2, 1-3, and 1-4 interactions
sb_dict               = {"vdw":[0,0,0],"coulomb":[0,0,0]}

# Define which force field types should be constrained using the SHAKE algorithm
shake_dict            = {"atoms":[],"bonds":[],"angles":[]}

# Define the free energy method that should be utilized
free_energy_method    = "TI"

# Define the coupling lambdas that should be utilized for each component (for vdW and Coulomb. If a component is uncharged, leave the Coulomb list empty)
lambdas_coupling     = [ [ np.linspace(0,0.2,60).tolist()+np.linspace(0.21,1.0,21).tolist(), [] ], 
                         [ np.linspace(0,0.2,60).tolist()+np.linspace(0.21,1.0,21).tolist(), [] ] ]


# Write general information in the mixture databank
mixture_dataset[system_name] = {}

mixture_dataset[system_name]["general_information"] = { "molecule_names_list": molecule_names_list, "molecule_graphs_list": molecule_graphs_list,
                                                       "molecule_smiles_list": molecule_smiles_list, "molecule_charges_list": molecule_charges_list,
                                                       "molecule_weights_list": molecule_weights_list, "pair_style_coupling_vdw": pair_style_coupling_vdw, 
                                                       "pair_style_coupling_vdw": pair_style_coupling_vdw, "pair_style_coupling_coulomb":pair_style_coupling_coulomb,
                                                       "sb_dict": sb_dict, "shake_dict": shake_dict }

# Write free energy settings
mixture_dataset[system_name][system_key] = { "compositions": compositions }
mixture_dataset[system_name][system_key]["free_energy"] = { free_energy_method: { "lambdas_coupling": lambdas_coupling } }

# Define additional functions that can be parsed to the LAMMPS input class. They can operate with class atributes, if the input arguments have the same name as the class argument.
# Furthermore define possible external function inputs that are also passed to the functions (per function define a new dictionary with inputs).
# In this example the write_decoupling_ff is a function that writes the coupling van der Waals / Coulomb pair interactions. External paraemeters are the coupling lambda, free energy method, etc...
external_functions = [ write_decoupling_ff ]

## 2. Build each mixture system 

Build the systems to simulate the self-solvation energy of each molecule, as well as the mixture solvation free energies.

In [3]:
# Add thermodynamic properties to mixture dataset
mixture_dataset[system_name][system_key]["thermodynamic_settings"] = {}

# Decide if PLAYMOL should build system 
build_playmol = False

# Loop through the components and build each system and write LAMMPS data and input files
for i, molecule_name in enumerate(molecule_names_list):

    print(f"\nInsertion of {molecule_name}\n")

    # Define system name
    sub_system = f"{molecule_name}_coupled"

    # Path to working folder
    working_folder = f"simulation_systems/{system_name}/{system_key}/{sub_system}"
    
    # Get the mixture conditions with PC-SAFT based on the molefractions of the current molecule in the iteration. 
    # (Hence, pass always the smiles of the current component as first entry)
    remaining_idx = [index for index, _ in enumerate(molecule_smiles_list) if index != i]
    
    remaining_names   = np.array(molecule_names_list)[remaining_idx].tolist()
    remaining_graphs  = np.array(molecule_graphs_list)[remaining_idx].tolist()
    remaining_smiles  = np.array(molecule_smiles_list)[remaining_idx].tolist()
    remaining_charges = np.array(molecule_charges_list)[remaining_idx].tolist()

    temperatures, pressures, densities, activity_coeff = get_initial_conditions( SAFT_parameter_file = SAFT_parameter_file, molecule_smiles = [molecule_smiles_list[i], *remaining_smiles ], 
                                                                                 compositions = compositions[i][i], temperature = temperature, pressure = pressure )
    
    # Save estimated starting conditions in dataset
    mixture_dataset[system_name][system_key]["thermodynamic_settings"][molecule_name] =  { "composition": compositions[i][i], "temperatures": temperatures, 
                                                                                           "pressures": pressures, "densities": densities, "activity_coeff":activity_coeff }

    for j, (xi, temp, press, dens) in enumerate( zip( compositions[i][i], temperatures, pressures, densities ) ):

        print(f"\nState point: xi = {xi}, T = {temp}, p = {press}, rho_mass = {dens}\n")


        ## Define general settings for the statepoint ##

        # Differentiate between self-solvation free energy (at xi = 1.0) and solvation free energy in mixture
        mol_name_list    = [ molecule_name + "_coupled", molecule_name, *(remaining_names if xi < 1.0 else []) ]
        mol_graph_list   = [ molecule_graphs_list[i], molecule_graphs_list[i], *(remaining_graphs if xi < 1.0 else []) ]
        mol_smiles_list  = [ molecule_smiles_list[i], molecule_smiles_list[i], *(remaining_smiles if xi < 1.0 else []) ]
        mol_charges_list = [ molecule_charges_list[i], molecule_charges_list[i], *(remaining_charges if xi < 1.0 else []) ]

        # Define pair style for decoupling approach, if any component in the mixture is charged use Coulomb potential in LAMMPS.
        charged             = any( mol_charges_list )
        pair_style_coupling = f"{pair_style_coupling_vdw} {pair_style_coupling_coulomb}" if charged else f"{pair_style_coupling_vdw}"
 

        ## Get molecule xyz ##

        # Define output path for final xyz files
        xyz_destinations = [ f"{working_folder}/initial_coordinates/%s.xyz"%name for name in mol_name_list ]

        # Get the single molecule coordinates for each component (these are mixture dependent, as running atom numbers are introduced in the xyz files)
        get_molecule_coordinates( molecule_name_list = mol_name_list, molecule_graph_list = mol_graph_list, molecule_smiles_list = mol_smiles_list,
                                  xyz_destinations = xyz_destinations, template_xyz = template_xyz, verbose = False )


        ## Call the LAMMPS input class ##
        
        LAMMPS_class = LAMMPS_input( mol_str = mol_graph_list, ff_path = force_field_path )

        # Prepare LAMMPS force field with the given molecules
        LAMMPS_class.prepare_lammps_force_field()

        # Write PLAYMOL force field file using the LAMMPS_input class
        playmol_force_field_destination = f"{working_folder}/playmol_ff.playmol"

        LAMMPS_class.prepare_playmol_input( playmol_template = playmol_force_field_template, playmol_ff_path = playmol_force_field_destination )

        # Create per mixture the composition folder (if not already done)
        composition_path = f"{working_folder}/x{xi}"
        os.makedirs( composition_path, exist_ok = True )


        ## Prepare LAMMPS with molecules numbers and density of the system ##

        # Get the molecule numbers according to the mixture composition (utilize closing condition for current component) 
        # Also split the number current molecules into 1 couple molecule and N1 - 1 molecules
        remaining_numbers = [ ( comp[j] * total_no_molecules ).astype("int") for comp in np.array(compositions[i])[remaining_idx] ]
        
        # This is fixed for binary mixtures. Need to be adapted to more complex systems
        if xi == 0:
            molecule_numbers  = [ 1, 0, sum(remaining_numbers)-1]
        else:
            molecule_numbers  = [ 1, total_no_molecules - sum(remaining_numbers)-1, *(remaining_numbers if sum(remaining_numbers) > 0 else []) ]
        
        LAMMPS_class.prepare_lammps_data (nmol_list = molecule_numbers, density = dens )

        # Write PLAYMOL input and execute using the LAMMPS_input class (if wanted.) --> the xyz will be in the same folder as the .mol
        # and will have the same name, just .xyz instead of .mol
        playmol_input_destination = f"{composition_path}/build/{sub_system}.mol"
        
        if build_playmol:
            playmol_relative_ff_path  = os.path.relpath(playmol_force_field_destination, os.path.dirname(playmol_input_destination))
            playmol_relative_xyz_path = [ os.path.relpath(xyz, os.path.dirname(playmol_input_destination)) for xyz in xyz_destinations ]

            LAMMPS_class.write_playmol_input( playmol_template = playmol_input_template, playmol_path = playmol_input_destination, 
                                              playmol_ff_path = playmol_relative_ff_path, xyz_paths = playmol_relative_xyz_path )

        # Write LAMMPS data file using the generated xyz file
        system_xyz              = playmol_input_destination.replace( ".mol", ".xyz" )
        LAMMPS_data_destination = f"{composition_path}/lammps.data"

        LAMMPS_class.write_lammps_data( xyz_path = system_xyz, data_template = LAMMPS_data_template, data_path = LAMMPS_data_destination )


        ## Prepare LAMMPS input files ##

        print("\nPrepare LAMMPS input files\n")
        
        # Write LAMMPS input file using the decoupling approach (perform an individual simulation for every lambda)
        for k,lambda_coupling in enumerate( lambdas_coupling[i] ):
            lambda_key = "vdw" if k == 0 else "coulomb"

            for l,lamdas in enumerate(lambda_coupling):
                
                # Define lambda for van der Waals / Coulomb interaction coupling
                lambda_vdw     = lamdas if k == 0 else 1.0
                lambda_coulomb = lamdas if k == 1 else 1e-9

                # Define the sampling pertubation depending on the free energy method
                dlambda        = [-0.0001, 0.0001] if free_energy_method == "TI" else \
                                 [ 0.0 if l == 0 else lambda_coupling[l-1] - lamdas, 0.0 if l == len(lambda_coupling)-1 else lambda_coupling[l+1] - lamdas ]

                # Define sampling output files 
                free_energy_output_files  = [ f"fep{l}{l-1}.sampling", f"fep{l}{l+1}.sampling" ]

                # Define LAMMPS input destination
                LAMMPS_input_destination  = f"{composition_path}/{lambda_key}/{free_energy_method}/sim_{l}/lammps.in"
                relative_LAMMPS_data_path = os.path.relpath(LAMMPS_data_destination, os.path.dirname(LAMMPS_input_destination))
    
                LAMMPS_class.prepare_lammps_input( pair_style = pair_style_coupling, 
                                                   sb_dict    = sb_dict,
                                                   shake_dict = shake_dict )
                
                # Use the write coupling pair interactions function to prepare coupled pair interactions
                external_function_input = [ { "free_energy_method": free_energy_method, "lambda_vdw": lambda_vdw, "lambda_coulomb": lambda_coulomb,
                                              "dlambda": dlambda, "free_energy_output_files": free_energy_output_files  } ]

                LAMMPS_class.write_lammps_input( input_path = LAMMPS_input_destination, template_path = LAMMPS_input_template, data_file = relative_LAMMPS_data_path,
                                                 temperature = temp, pressure = press, equilibration_time = 4e6, production_time = 2e6,
                                                 external_functions = external_functions, external_function_input = external_function_input )


Insertion of LJ1


State point: xi = 0.0, T = 89.88, p = 1.113, rho_mass = 1344.59


Prepare LAMMPS input files


State point: xi = 0.1, T = 89.88, p = 1.873, rho_mass = 1331.61


Prepare LAMMPS input files


State point: xi = 0.2, T = 89.88, p = 2.593, rho_mass = 1317.69


Prepare LAMMPS input files


State point: xi = 0.3, T = 89.88, p = 3.283, rho_mass = 1302.72


Prepare LAMMPS input files


State point: xi = 0.4, T = 89.88, p = 3.949, rho_mass = 1286.55


Prepare LAMMPS input files


State point: xi = 0.5, T = 89.88, p = 4.602, rho_mass = 1269.01


Prepare LAMMPS input files


State point: xi = 0.6, T = 89.88, p = 5.249, rho_mass = 1249.88


Prepare LAMMPS input files


State point: xi = 0.7, T = 89.88, p = 5.9, rho_mass = 1228.89


Prepare LAMMPS input files


State point: xi = 0.8, T = 89.88, p = 6.569, rho_mass = 1205.65


Prepare LAMMPS input files


State point: xi = 0.9, T = 89.88, p = 7.267, rho_mass = 1179.69


Prepare LAMMPS input files


State point: xi = 1.0, T = 89.88

## 3. Add reference results to dataset

In [4]:
# Exerpimental, simulative, ... reference results
liquid_composition = [0.0, 0.1, 0.3, 0.5, 0.7 ,0.9, 1.0]
vapor_composition  = [0.0, 0.458, 0.741, 0.8554, 0.9237, 0.97586, 1.0]
pressure           = [1.10179879, 1.80711695, 3.49320298, 4.440583  , 5.65506575, 6.52314965, 7.84196941]
temperature        = []
source             = "https://doi.org/10.1016/0378-3812(95)02795-G"

# Add reference results to databank
mixture_dataset[system_name][system_key]["reference"] = { "liquid_composition": liquid_composition, "vapor_composition": vapor_composition, 
                                                          "pressure": pressure, "temperature": temperature, "source": source }

# Save the dataset for this mixture in databank
work_json( file_path = databank_json, data = mixture_dataset, to_do = "append" )