In [5]:
import os, shutil
import json, yaml, toml

from rdkit import Chem
from jinja2 import Template
from typing import List, Dict, Any, Callable
from MDSetup.tools.molecule import get_molecule_from_smiles
from MDSetup.tools.general import flatten_list, merge_nested_dicts, unique_by_key
from MDSetup.forcefield.writer import atoms_molecule, bonds_molecule, angles_molecule, dihedrals_molecule, SOFTWARE_LIST, SoftwareError, KwargsError
from MDSetup.forcefield.writer import atoms_topology, bonds_topology, angles_topology, dihedrals_topology, style_topology

In [None]:

from MDSetup.forcefield import forcefield

FOLDER_PRECISION = 1
JOB_PRECISION = 0

In [None]:
# shake_dict --> define directly via names and list
# fudgeLJ, fudgeQQ, mixing_rule --> from nonbonded 
# ["potential_kwargs", "rcut", "do_mixing", "mixing_rule"]
# tail, kspace_style, special_bonds, shake_dict

In [None]:
class MDsetup():
    """
    This class sets up structured and FAIR molecular dynamic simulations. It also has the capability to build a system based on a list of molecules.
    """

    def __init__( self, system_setup: str, simulation_default: str, simulation_ensemble: str, 
                  submission_command: str, simulation_sampling: str=""):
        """
        Initialize a new instance of the MDsetup class.

        Parameters:
         - system_setup (str): Path to the system setup YAML file. Containing all system settings.
         - simulation_default (str): Path to the simulation default YAML file. Containing all default MD settings.
         - simulation_ensemble (str): Path to the simulation ensemble YAML file. Containing all MD ensemble settings.
         - submission_command (str): Command to submit jobs to cluster.
         - simulation_sampling (str,optional): Path to the sampling YAML file. Containing all sampling settings. 
                                               This is only needed for LAMMPS.
        
        Returns:
            None
        """

        # Open the yaml files and extract the necessary information
        with open( system_setup ) as file: 
            self.system_setup = yaml.safe_load(file)
        
        with open( simulation_default ) as file:
            self.simulation_default = yaml.safe_load(file)

        with open( simulation_ensemble ) as file:
            self.simulation_ensemble = yaml.safe_load(file)

        if simulation_sampling:
            with open( simulation_sampling ) as file:
                self.simulation_sampling = yaml.safe_load(file)
        else:
            self.simulation_sampling = {}

        # Check for all necessary keys


        # Save molecules in the system (sort out molecules that are not present in system)
        self.system_molecules = [ mol for mol in self.system_setup["molecules"] if mol["number"] > 0 ]

        # Get the name (residue) list
        self.residues = [ mol["name"] for mol in self.system_molecules ]

        # Get molecular mass and number for each molecule
        self.molar_masses = [ Chem.Descriptors.MolWt( Chem.MolFromSmiles( mol["smiles"] ) ) for mol in self.system_molecules ]
        self.molecule_numbers = [ mol["number"] for mol in self.system_molecules ]

        # Get conversion from AA to nm/AA
        self.distance_conversion = 1/10 if self.system_setup["software"].lower() == "gromacs" else 1 if self.system_setup["software"].lower() == "lammps" else 1

        # Submission command for the cluster
        self.submission_command = submission_command

        # Create an analysis dictionary containing all files
        self.analysis_dictionary = {}

    def write_topology( self ):

        print("\nUtilize moleculegraph to generate molecule and topology files of every molecule in the system!\n")

        topology_folder = f'{self.system_setup["folder"]}/{self.system_setup["name"]}/topology'

        os.makedirs( topology_folder, exist_ok = True )

        if not any( self.system_setup["paths"]["force_field_paths"] ):
            raise KeyError("No force field paths provided in the system setup yaml file!")            

        # Prepare keyword arguments that are parsed to the functions 
        # Add all paths to templates and all nonbonded settings to template

        kwargs = { **self.system_setup["paths"], **self.simulation_default["nonbonded"] }

        if any( filter(lambda d: "nrexcl" in d, self.system_molecules ) ):
            kwargs["nrexcl"] = [ mol["nrexcl"] for mol in self.system_molecules ]

        # Call the force field class
        ff_molecules = forcefield( smiles = [ mol["smiles"] for mol in self.system_molecules ],
                                   force_field_paths = self.system_setup["paths"]["force_field_files"] 
                                 ) 

        # Write molecule files (including gro files in case of GROMACS)
        ff_molecules.write_molecule_files( molecule_template = self.system_setup["paths"]["molecule_template"], 
                                           molecule_path = topology_folder,
                                           residues = self.residues,
                                           **kwargs 
                                         )
        
        # Add writen molecule files to kwargs
        kwargs["molecule_files"] = ff_molecules.molecule_files

        # Add resiude and its numbers to kwargs
        kwargs["residue_dict"] = { mol["name"]: mol["number"] for mol in self.system_setup["molecules"] }

        # Write topology file
        ff_molecules.write_topology_file( topology_template = self.system_setup["paths"]["topology_template"],
                                          topology_path = topology_folder,
                                          system_name = self.system_setup["name"]
                                          **kwargs
                                        )

        

        print("Done! Topology paths and molecule coordinates are added within the class.\n")

        # Add topology and gro files to class dictionary
        self.system_setup["paths"]["topology_file"] = ff_molecules.topology_file
        self.system_setup["paths"]["coordinates_files"] = ff_molecules.gro_files


    def prepare_simulation( self, folder_name: str, ensembles: List[str], simulation_times: List[float],
                            initial_systems: List[str]=[], copies: int=0, input_kwargs: Dict[str, Any]={}, 
                            on_cluster: bool=False, off_set: int=0, **kwargs
                            ):
        """
        Prepares the simulation by generating job files for each temperature and pressure combination specified in the simulation setup.
        The method checks if an initial configuration file is provided. 
        If not, it generates the initial configuration based on the provided molecule numbers and PLAYMOL. 
        It then generates input files for each ensemble in a separate folder and creates a job file for each copy of the simulation.

        Parameters:
         - folder_name (str, optional): Name of the subfolder where to perform the simulations.
                                        Path structure is as follows: system.folder/system.name/folder_name
         - ensembles (List[str]): A list of ensembles to generate input files for. Definitions of each ensemble is provided in self.simulation_ensemble.
         - simulation_times (List[float]): A list of simulation times (ns) for each ensemble.
         - initial_systems (List[str]): A list of initial system .gro files to be used for each temperature and pressure state.
         - copies (int, optional): Number of copies for the specified system. Defaults to 0.
         - input_kwargs (Dict[str, Any], optional): Further kwargs that are parsed to the input template. Defaults to "{}".
         - on_cluster (bool, optional): If the GROMACS build should be submited to the cluster. Defaults to "False".
         - off_set (int, optional): First ensemble starts with 0{off_set}_ensemble. Defaulst to 0.
         - **kwargs: Arbitrary keyword arguments.

        Returns:
            None
        """
        self.job_files = []
        
        # Define simulation folder
        sim_folder = f'{self.system_setup["folder"]}/{self.system_setup["name"]}/{folder_name}'

        # Copy provided force field file to simulation folder
        os.makedirs( sim_folder, exist_ok = True )
        lammps_ff_file = shutil.copy( self.system_setup["paths"]["topology_file"], sim_folder )
        
        

        for i, (temperature, pressure, density) in enumerate( zip( self.system_setup["temperature"], 
                                                                   self.system_setup["pressure"], 
                                                                   self.system_setup["density"] ) ):
            
            job_files = []

            # Define folder for specific temp and pressure state
            state_folder = f"{sim_folder}/temp_{temperature:.{FOLDER_PRECISION}f}_pres_{pressure:.{FOLDER_PRECISION}f}"

            # Build system with MD software if none is provided
            if not initial_systems:

                print("\nBuilding system based on provided molecule numbers and coordinate files!\n" )

                # Get intial box lenghts using density estimate
                box = get_system_volume( molar_masses = self.molar_masses, 
                                         molecule_numbers = self.molecule_numbers, 
                                         density = density,
                                         unit_conversion = self.distance_conversion,
                                         box_type = self.system_setup["box"]["type"],
                                         z_x_relation = self.system_setup["box"]["z_x_relation"], 
                                         z_y_relation= self.system_setup["box"]["z_y_relation"]
                                        )

                # Coordinates from molecule that are not present in the system are sorted out within the function.
                # Hence, parse the non filtered list of molecules and coordinates here.
                initial_coord = generate_initial_configuration( build_template = self.system_setup["paths"]["build_template"],
                                                                destination_folder = state_folder, 
                                                                coordinate_paths = self.system_setup["paths"]["coordinates"], 
                                                                molecules_list = self.system_setup["molecules"], 
                                                                box = box,
                                                                submission_command = self.submission_command,
                                                                on_cluster = on_cluster,
                                                                **kwargs
                                                              )
            
                flag_restart = False
            else:
                initial_coord = initial_systems[i]
                print(f"\nIntial system provided for at: {initial_coord}\n")
                flag_restart = ".restart" in initial_coord or os.path.exists( initial_coord.replace( initial_coord.split(".")[-1], "cpt") )
                if flag_restart: 
                    print("Restart file is provided. Continue simulation from there!\n")

            # Define folder for each copy
            for copy in range( copies + 1 ):
                copy_folder = f"{state_folder}/copy_{copy}"

                # Produce input files (for each ensemble an own folder 0x_ensemble)
                input_files = generate_input_files( destination_folder = copy_folder, 
                                                    input_template = self.system_setup["paths"]["input_template"],
                                                    ensembles = ensembles, 
                                                    temperature = temperature, 
                                                    pressure = pressure,
                                                    data_file = lammps_data_file, 
                                                    ff_file = lammps_ff_file,
                                                    simulation_times = simulation_times,
                                                    dt = self.simulation_default["system"]["dt"], 
                                                    kwargs = { **self.simulation_default,
                                                               **self.simulation_sampling, 
                                                               **input_kwargs,
                                                               "restart_flag": flag_restart }, 
                                                    ensemble_definition = self.simulation_ensemble,
                                                    off_set = off_set
                                                    )
                
                # Create job file
                job_files.append( generate_job_file( destination_folder = copy_folder, 
                                                     job_template = self.system_setup["paths"]["template"]["job_file"], 
                                                     input_files = input_files, 
                                                     ensembles = ensembles,
                                                     job_name = f'{self.system_setup["name"]}_{temperature:.{JOB_PRECISION}f}_{pressure:.{JOB_PRECISION}f}',
                                                     job_out = f"job_{temperature:.{JOB_PRECISION}f}_{pressure:.{JOB_PRECISION}f}.sh", 
                                                     off_set = off_set 
                                                    ) 
                                )
                
            self.job_files.append( job_files )

In [None]:
# gromacs: intial_coord, initial_topo, initial_cpt, extend_sim
# lammps:

In [None]:
def generate_job_file( destination_folder: str, job_template: str, input_files: List[str],
                       ensembles: List[str], job_name: str, job_out: str="job.sh", 
                       off_set: int=0 ):

In [None]:
def generate_job_file( destination_folder: str, job_template: str, input_files: List[str], 
                       ensembles: List[str], job_name: str, job_out: str="job.sh",
                       off_set: int=0 ):
    """
    Generate initial job file for a set of simulation ensemble

    Parameters:
     - destination_folder (str): Path to the destination folder where the job file will be created.
     - job_template (str): Path to the job template file.
     - input_files (List[List[str]]): List of lists containing the paths to the MDP files for each simulation phase.
     - ensembles (List[str], optional): List of simulation ensembles.
     - job_name (str): Name of the job.
     - job_out (str, optional): Name of the job file. Defaults to "job.sh".
     - off_set (int, optional): First ensemble starts with 0{off_set}_ensemble. Defaulst to 0.

    Keyword Args:
     - initial_topo (str): Path to the initial topology file for GROMACS.
     - intial_coord (str): Path to the initial coordinate file for GROMACS.
     - initial_cpt (str): Path to the inital checkpoint file for GROMACS.
     - extend_time (int): If cpt file is provided, extend this simulation by this amount of steps for GROMACS

    Returns:
     - job_file (str): Path of job file

    Raises:
     - FileNotFoundError: If the job template file does not exist.
     - FileNotFoundError: If any of the MDP files does not exist.
     - FileNotFoundError: If the initial coordinate file does not exist.
     - FileNotFoundError: If the initial topology file does not exist.
     - FileNotFoundError: If the initial checkpoint file does not exist.
    """

    # Check if job template file exists
    if not os.path.isfile( job_template ):
        raise FileNotFoundError(f"Job template file { job_template } not found.")

    # Check for mdp files
    for file in mdp_files:
        if not os.path.isfile( file ):
            raise FileNotFoundError(f"Mdp file { file  } not found.")
    
    # Check if topology file exists
    if not os.path.isfile( initial_topo ):
        raise FileNotFoundError(f"Topology file { initial_topo } not found.")

    # Check if coordinate file exists
    if not os.path.isfile( intial_coord ):
        raise FileNotFoundError(f"Coordinate file { intial_coord } not found.")
    
    # Check if checkpoint file exists
    if initial_cpt and not os.path.isfile( initial_cpt ):
        raise FileNotFoundError(f"Checkpoint file { initial_cpt } not found.")
    
    with open(job_template) as f:
        template = Template(f.read())

    job_file_settings = { "ensembles": { f"{'0' if (j+off_set) < 10 else ''}{j+off_set}_{step}": {} for j,step in enumerate(ensembles)} }
    ensemble_names    = list(job_file_settings["ensembles"].keys())

    # Create the simulation folder
    os.makedirs( destination_folder, exist_ok = True )

    # Relative paths for each mdp file for each simulation phase
    mdp_relative  = [ os.path.relpath( mdp_files[j], f"{destination_folder}/{step}" ) for j,step in enumerate(ensemble_names) ]

    # Relative paths for each coordinate file (for energy minimization use initial coodinates, otherwise use the preceeding output)
    cord_relative = [ f"../{ensemble_names[j-1]}/{ensembles[j-1]}.gro" if j > 0 else os.path.relpath( intial_coord, f"{destination_folder}/{step}" ) for j,step in enumerate(job_file_settings["ensembles"].keys()) ]

    # Relative paths for each checkpoint file 
    cpt_relative  = [ f"../{ensemble_names[j-1]}/{ensembles[j-1]}.cpt" if j > 0 else os.path.relpath( initial_cpt, f"{destination_folder}/{step}" ) if initial_cpt and not ensembles[j] == "em" else "" for j,step in enumerate(ensemble_names) ]

    # Relative paths for topology
    topo_relative = [ os.path.relpath( initial_topo, f"{destination_folder}/{step}" ) for j,step in enumerate(ensemble_names) ]

    # output file 
    out_relative  = [ f"{step}.tpr -maxwarn 10" for step in ensembles]

    for j,step in enumerate(ensemble_names):

        # If first or preceeding step is energy minimization, or if there is no cpt file to read in
        if ensembles[j-1]  == "em" or ensembles[j] == "em" or not cpt_relative[j]:
            job_file_settings["ensembles"][step]["grompp"] = f"grompp -f {mdp_relative[j]} -c {cord_relative[j]} -p {topo_relative[j]} -o {out_relative[j]}"
        else:
            job_file_settings["ensembles"][step]["grompp"] = f"grompp -f {mdp_relative[j]} -c {cord_relative[j]} -p {topo_relative[j]} -t {cpt_relative[j]} -o {out_relative[j]}"
        
        # Define mdrun command
        if j == 0 and initial_cpt and extend_sim:
            # In case extension of the first simulation in the pipeline is wanted
            job_file_settings["ensembles"][step]["grompp"] = f"grompp -f {mdp_relative[j]} -c {ensembles[j]}.gro -p {topo_relative[j]} -t {ensembles[j]}.cpt -o {out_relative[j]}"
            job_file_settings["ensembles"][step]["mdrun"] = f"mdrun -deffnm {ensembles[j]} -cpi {ensembles[j]}.cpt" 
        else: 
            job_file_settings["ensembles"][step]["mdrun"] = f"mdrun -deffnm {ensembles[j]}" 

    # Define LOG output
    log_path   = f"{destination_folder}/LOG"

    # Add to job file settings
    job_file_settings.update( { "job_name": job_name, "log_path": log_path, "working_path": destination_folder } )

    rendered = template.render( **job_file_settings )

    # Create the job folder
    job_file = f"{destination_folder}/{job_out}"

    os.makedirs( os.path.dirname( job_file ), exist_ok = True )

    # Write new job file
    with open( job_file, "w") as f:
        f.write( rendered )

    return job_file

In [None]:
def generate_job_file( destination_folder: str, job_template: str, software: str,
                       input_files: List[str], ensembles: List[str], job_name: str, 
                       job_out: str="job.sh", off_set: int=0 ):
    
    """
    Generate initial job file for a set of simulation ensemble

    Parameters:
     - destination_folder (str): Path to the destination folder where the job file will be created.
     - job_template (str): Path to the job template file.
     - software (str): The simulation software to format the output for ('gromacs' or 'lammps').
     - input_files (List[str]): List of lists containing the paths to the input files for each simulation phase.
     - ensembles (List[str]): List of simulation ensembles
     - job_name (str): Name of the job.
     - job_out (str, optional): Name of the job file. Defaults to "job.sh".
     - off_set (int, optional): First ensemble starts with 0{off_set}_ensemble. Defaulst to 0.

    Returns:
     - job_file (str): Path of job file

    Raises:
     - FileNotFoundError: If the job template file does not exist.
     - FileNotFoundError: If any of the MDP files does not exist.
     - FileNotFoundError: If the initial coordinate file does not exist.
     - FileNotFoundError: If the initial topology file does not exist.
     - FileNotFoundError: If the initial checkpoint file does not exist.
    """

    # Check if job template file exists
    if not os.path.isfile( job_template ):
        raise FileNotFoundError(f"Job template file { job_template } not found.")

    # Check for input files
    for file in input_files:
        if not os.path.isfile( file ):
            raise FileNotFoundError(f"Input file { file  } not found.")
    
    with open(job_template) as f:
        template = Template(f.read())

    job_file_settings = { "ensembles": { f"{'0' if (j+off_set) < 10 else ''}{j+off_set}_{step}": {} for j,step in enumerate(ensembles)} }
    ensemble_names    = list(job_file_settings["ensembles"].keys())

    # Create the simulation folder
    os.makedirs( destination_folder, exist_ok = True )

    # Relative paths for each input file for each simulation phase
    for j,step in enumerate(ensemble_names):
        job_file_settings["ensembles"][step]["mdrun"] = os.path.relpath( input_files[j], f"{destination_folder}/{step}" )

    # Define LOG output
    log_path   = f"{destination_folder}/LOG"

    # Add to job file settings
    job_file_settings.update( { "job_name": job_name, "log_path": log_path, "working_path": destination_folder } )

    rendered = template.render( **job_file_settings )

    # Create the job folder
    job_file = f"{destination_folder}/{job_out}"

    os.makedirs( os.path.dirname( job_file ), exist_ok = True )

    # Write new job file
    with open( job_file, "w") as f:
        f.write( rendered )

    return job_file

In [None]:
def generate_input_files( destination_folder: str, input_template: str, software: str,
                          ensembles: List[str], temperature: float, pressure: float,
                          ensemble_definition: Dict[str, Any|Dict[str, str|float]], 
                          simulation_times: List[float], dt: float, off_set: int=0, **kwargs ):
    
    """
    Generate input files for simulation pipeline.

    Parameters:
     - destination_folder (str): The destination folder where the input files will be saved. Will be saved under destination_folder/0x_ensebmle/ensemble.input
     - input_template (str): The path to the input template file.
     - software (str): The simulation software to format the output for ('gromacs' or 'lammps').
     - ensembles (List[str]): A list of ensembles to generate input files for.
     - temperature (float): The temperature for the simulation.
     - pressure (float): The pressure for the simulation.
     - ensemble_definition (Dict[str, Any|Dict[str, str|float]]): Dictionary containing the ensemble settings for each ensemble.
     - simulation_times (List[float]): A list of simulation times (ns) for each ensemble.
     - dt (float): The time step for the simulation.
     - off_set (int, optional): First ensemble starts with 0{off_set}_ensemble. Defaulst to 0.
     - **kwargs (Any): Arbitrary keyword arguments.
    
    Keyword Args:
     - data_file (str): Absolute path of LAMMPS data file for LAMMPS.
     - ff_file (str): Absolute path of LAMMPS force field file for LAMMPS.
     - compressibility (float): Compressibility of the system for GROMACS.
     - init_step (int): Initial step to continue simulation for GROMACS.

    Raises:
     - KeyError: If an invalid ensemble is specified.
     - FileNotFoundError: If any input file does not exists.

    Returns:
     - input_files (List[str]): List with paths of the input files

    """

    # Check if input template file exists
    if not os.path.isfile( input_template ):
        raise FileNotFoundError(f"Input template file { input_template } not found.")
    
    # Open template
    with open( input_template ) as f:
        template = Template( f.read() )

    if software.lower() == "gromacs":
        
        # Check necessary input kwargs
        KwargsError( ["compressibility","init_step"], kwargs.keys() )

        # nano in pico second
        time_conversion = 1e3

        # Define file suffix based on software
        suffix = "mdp"

    elif software.lower() == "lammps":

        # Check necessary input kwargs
        KwargsError( ["data_file","ff_file"], kwargs.keys() )

        # nano in femto second
        time_conversion = 1e6

        # Define file suffix based on software
        suffix = "input"

        # Check if datafile exists
        if not os.path.isfile( kwargs['data_file'] ):
            raise FileNotFoundError(f"Data file { kwargs['data_file'] } not found.")
    
        # Check if force field file exists
        if not os.path.isfile( kwargs['ff_file'] ):
            raise FileNotFoundError(f"Force field file { kwargs['ff_file'] } not found.")


    # Save ensemble names
    ensemble_names = [ f"{'0' if (j+off_set) < 10 else ''}{j+off_set}_{step}" for j,step in enumerate(ensembles) ]

    # Define template dictionary
    renderdict = { **kwargs }

    # Produce input files for simulation pipeline
    input_files = []

    for j,(ensemble,time) in enumerate( zip( ensembles, simulation_times ) ):
        
        try:
            ensemble_settings = ensemble_definition[ensemble]
        except:
            raise KeyError(f"Wrong ensemple specified: {ensemble}. Valid options are: {', '.join(ensemble_definition.keys())} ")
        
        # Ensemble name
        renderdict["ensemble_name"] = ensemble

        # Output file
        input_out = f"{destination_folder}/{'0' if (j+off_set) < 10 else ''}{j+off_set}_{ensemble}/{ensemble}.{suffix}"

        if software.lower() == "gromacs":

            # Add temperature of sim to ensemble settings
            if "t" in ensemble_settings.keys():
                ensemble_settings["t"]["ref_t"] = temperature
            
            # Add pressure and compressibility to ensemble settings
            if "p" in ensemble_settings.keys():
                ensemble_settings["p"].update( { "ref_p": pressure, "compressibility": kwargs["compressibility"] } )

            # Overwrite the ensemble settings
            renderdict["ensemble"] = ensemble_settings

            # Define if restart
            renderdict["restart_flag"] = "no" if ensemble == "em" or ensembles[j-1] == "em" else "yes"

            # Add extension to first system (if wanted)
            if j == 0 and kwargs["init_step"] > 0:
                renderdict["system"]["init_step"] = kwargs["init_step"]

        elif software.lower()() == "lammps":

            # Add ensemble variables
            values = []
            for v in ensemble_settings["variables"]:
                if v == "temperature":
                    values.append( temperature )
                elif v == "pressure":
                    values.append( round( pressure / 1.01325, 3 ) )
                else:
                    raise KeyError(f"Variable is not implemented: '{v}'. Currently implemented are 'temperature' or 'pressure'. ")

            # Overwrite the ensemble settings
            renderdict["ensemble"] = { "var_val": zip(ensemble_settings["variables"],values), 
                                       "command": ensemble_settings["command"] 
                                     }

            renderdict["force_field_file"] = os.path.relpath( kwargs['ff_file'], os.path.dirname(input_out) )

            # If its the first ensemble use provided data path, otherwise use the previous restart file. Hence set restart flag
            if j == 0:
                renderdict["data_file"] = os.path.relpath( kwargs['data_file'], os.path.dirname(input_out) )
                renderdict["restart_flag"] = False
            else:
                renderdict["data_file"] = f"../{ensemble_names[j-1]}/{ensembles[j-1]}.restart"
                renderdict["restart_flag"] = True

        # Simulation time is provided in nano seconds and dt in pico/fico seconds, hence multiply with factor 1e3/1e6
        renderdict["system"]["nsteps"] = int( time_conversion * time / dt ) if not ensemble == "em" else int(time)
        renderdict["system"]["dt"] = dt

        # Provide a seed for tempearture generating:
        renderdict["seed"] = np.random.randint(0,1e5)
        
        # Create the destination folder
        os.makedirs( os.path.dirname( input_out ), exist_ok = True )

        # Render template
        rendered = template.render( ** kwargs ) 
        
        with open( input_out, "w" ) as f:
            f.write( rendered )
            
        input_files.append( input_out )

    return input_files

In [None]:
def generate_initial_configuration( destination_folder: str, build_template: str, software: str,
                                    coordinate_paths: List[str], 
                                    molecules_list: List[Dict[str, str|int]], box: Dict[str,float], 
                                    on_cluster: bool=False, initial_system: str="",
                                    n_try: int=10000, submission_command: str="qsub",
                                    **kwargs ):
    """
    Generate initial configuration for molecular dynamics simulation with GROMACS.

    Parameters:
     - destination_folder (str): The destination folder where the initial configurations will be saved.
     - build_template (str): Template for system building.
     - software (str): The simulation software to format the output for ('gromacs' or 'lammps').
     - coordinate_paths (List[str]): List of paths to coordinate files (GRO format) for each ensemble.
     - molecules_list (List[Dict[str, str|int]]): List with dictionaries with numbers and names of the molecules.
     - box (Dict[str,float]): List of box lengths for each ensemble. Provide [] if build_intial_box is false.
     - on_cluster (bool, optional): If the GROMACS build should be submited to the cluster. Defaults to "False".
     - initial_system (str, optional): Path to initial system, if initial system should be used to add molecules rather than new box. Defaults to "".
     - n_try (int, optional): Number of attempts to insert molecules. Defaults to 10000.
     - submission_command (str, optional): Command to submit jobs for cluster,
     - **kwargs (Any): Arbitrary keyword arguments.

    Keyword Args:
     - 

    Returns:
     - intial_coord (str): Path of inital configuration

    """
    # Define box folder
    build_folder = f"{destination_folder}/build"

    # Create and the output folder of the box
    os.makedirs( build_folder, exist_ok = True )

    # Check if job template file exists
    if not os.path.isfile( build_template ):
        raise FileNotFoundError(f"Build template file { build_template } not found.")
    else:
        with open( build_template ) as f:
            template = Template( f.read() )
    
    # Define output coordinate 
    suffix = "gro" if software.lower() == "gromacs" else "data" if software.lower() == "lammps" else ""
    intial_coord = f"{build_folder}/init_conf.{suffix}"

    # Sort out molecules that are zero
    non_zero_coord_mol_no = [ (coord, value["name"], value["number"]) for coord,value in zip(coordinate_paths,molecules_list) if value["number"] > 0 ]

    # Define template settings
    template_settings = { "coord_mol_no": non_zero_coord_mol_no, 
                          "box": box,
                          "initial_system": initial_system,
                          "n_try": n_try,
                          "folder": build_folder,
                          "output_coord": intial_coord,
                          **kwargs
                        }

    # Define output file
    bash_file = f"{build_folder}/build_box.sh"

    # Write bash file
    with open( bash_file, "w" ) as f:
        f.write( template.render( template_settings ) )

    if on_cluster:
        print("\nSubmit build to cluster and wait untils it is finished.\n")
        submit_and_wait( job_files = [ bash_file ], submission_command = submission_command )
    else:
        print("\nBuild system locally! Wait until it is finished.\n")
        # Call the bash to build the box. Write output to file.
        with open(f"{build_folder}/build_output.txt", "w") as f:
            subprocess.run(["bash", f"{build_folder}/build_box.sh"], stdout=f, stderr=f)

    # Check if the system is build 
    if not os.path.isfile( intial_coord ):
        raise FileNotFoundError(f"Something went wrong during the box building! { intial_coord } not found.")
    print("Build successful\n")

    return intial_coord

In [None]:
## to do: 
# smiles and FF matching --> topologie für die atom types
# lammps building template


In [16]:

smiles = [ "CCCC", "CCCCCC" ]
force_field_paths = [ "/Users/samir/Documents/Coding_libaries/pyLMP/force-fields/forcefield_UA_TAMie_alkanes.toml" ]

ff = forcefield( smiles = smiles, force_field_paths = force_field_paths )

residues = ["butane","hexane"]


files_suffix = "mol"

template = "/Users/samir/Documents/Coding_libaries/pyLMP/templates/lammps/template_lammps.mol"

#ff.write_molecule_files( template, ".", residues, files_suffix, **kwargs )


Force field provided for software 'LAMMPS'
