https://mosdef.org/

In [10]:
import mbuild
import foyer 
import os
import warnings
warnings.filterwarnings("ignore")

In [11]:
# Set API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")


In [12]:
# Defien function to generate LAMMPS input file using MosdDEF

#Define inputs 
system_smiles = 'CCO'  # Ethanol for example
box_size = 4 # nano meter  
n_molecules = 0 # Number of molecules
density = 789 ## kg/m^3
forcefield_name = 'oplsaa' # OPLS-AA forcefield. Can be changed by available forcefileds in mbuild
system_name = 'ethanol_test' # Name of the system

# Load system using its SMILES strings
system_unparad = mbuild.load(system_smiles, smiles=True)

# assign name 
system_unparad.name = system_name

# build box
box = mbuild.Box(3*[box_size])

# Fill the box with the molecule of interest
# filled_box = mbuild.fill_box(compound=system_unparad, density=density, box=box, overlap=0.2)
filled_box = mbuild.fill_box(compound=system_unparad, n_compounds=1, box=box, overlap=0.2)

## apply the forcefield to the system
ff = foyer.Forcefield(name=forcefield_name)
filled_box_param = filled_box.to_parmed(infer_residues=True) # Parmed structure
filled_box_parametrized = ff.apply(filled_box_param) # ff applied


## Pass the parametrized system to a Lammps data file 

mbuild.formats.lammpsdata.write_lammpsdata(
   filled_box_parametrized, 
   str(system_name)+".data",
   atom_style="full",
   unit_style="real",
   use_rb_torsions=True,
)



No urey bradley terms detected, will use angle_style harmonic
RB Torsions detected, will use dihedral_style opls


In [13]:
## Write into function 

# Defien function to generate LAMMPS input file using MosdDEF
def MosdDEF_1_inp(smiles):
    #Define inputs 
    system_smiles = smiles     ##'CCO'  # Ethanol for example
    box_size = 4 # nano meter = 
    n_molecules = 1 # Number of molecules
    density = 789 ## kg/m^3
    forcefield_name = 'oplsaa' # OPLS-AA forcefield. Can be changed by available forcefileds in mbuild
    system_name = 'test' # Name of the system

    # Load system using its SMILES strings
    system_unparad = mbuild.load(system_smiles, smiles=True)

    # assign name 
    system_unparad.name = system_name

    # build box
    box = mbuild.Box(3*[box_size])

    # Fill the box with the molecule of interest
    # filled_box = mbuild.fill_box(compound=system_unparad, density=density, box=box, overlap=0.2)
    filled_box = mbuild.fill_box(compound=system_unparad, n_compounds=n_molecules, box=box, overlap=0.2)

    ## apply the forcefield to the system
    ff = foyer.Forcefield(name=forcefield_name)
    filled_box_param = filled_box.to_parmed(infer_residues=True) # Parmed structure
    filled_box_parametrized = ff.apply(filled_box_param) # ff applied


    ## Pass the parametrized system to a Lammps data file 
    mbuild.formats.lammpsdata.write_lammpsdata(
    filled_box_parametrized, 
    str(system_name)+".data",
    atom_style="full",
    unit_style="real",
    use_rb_torsions=True,
    )

    return print(f'\nLAMMPS data file generated for {n_molecules} {smiles} molecule(s).')

# Test the function 
test_smile = 'CCO'
MosdDEF_1_inp(smiles=test_smile)



No urey bradley terms detected, will use angle_style harmonic
RB Torsions detected, will use dihedral_style opls

LAMMPS data file generated for 1 CCO molecule(s).


Convert functio to tool 

In [18]:
from langchain.tools import BaseTool, StructuredTool, tool

@tool 
def MosdDEF_1_inp(smiles:str):
    """Function to create a data file for LAMMPS simulations. The input is a smiles string of a molecule"""
    #Define inputs 
    system_smiles = smiles     ##'CCO'  # Ethanol for example
    box_size = 4 # nano meter = 
    n_molecules = 1 # Number of molecules
    #density = 789 ## kg/m^3
    forcefield_name = 'oplsaa' # OPLS-AA forcefield. Can be changed by available forcefileds in mbuild
    system_name = 'test' # Name of the system

    # Load system using its SMILES strings
    system_unparad = mbuild.load(system_smiles, smiles=True)

    # assign name 
    system_unparad.name = system_name

    # build box
    box = mbuild.Box(3*[box_size])

    # Fill the box with the molecule of interest
    # filled_box = mbuild.fill_box(compound=system_unparad, density=density, box=box, overlap=0.2)
    filled_box = mbuild.fill_box(compound=system_unparad, n_compounds=n_molecules, box=box, overlap=0.2)

    ## apply the forcefield to the system
    ff = foyer.Forcefield(name=forcefield_name)
    filled_box_param = filled_box.to_parmed(infer_residues=True) # Parmed structure
    filled_box_parametrized = ff.apply(filled_box_param) # ff applied

    ## Pass the parametrized system to a Lammps data file 
    mbuild.formats.lammpsdata.write_lammpsdata(
    filled_box_parametrized, 
    str(system_name)+".data",
    atom_style="full",
    unit_style="real",
    use_rb_torsions=True,
    )

    return #print(f'\nLAMMPS data file generated for {n_molecules} {smiles} molecule(s).')

## Print name description and args of tool 
print(MosdDEF_1_inp.name)
print(MosdDEF_1_inp.description)
print(MosdDEF_1_inp.args)


MosdDEF_1_inp
MosdDEF_1_inp(smiles: str) - Function to create a data file for LAMMPS simulations. The input is a smiles string of a molecule
{'smiles': {'title': 'Smiles', 'type': 'string'}}


### Load LLM and give MOsdef tool so it can use it 


In [19]:
## Load default OpenAI chatbot
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor,create_react_agent # To load simple ReAct agent. Reason an act
from langchain import hub
import os 
import warnings
warnings. filterwarnings('ignore') 

# Define API key for OPenAI
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

## Define LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)

# Define list of tools the LLM is going to use 
tools = [MosdDEF_1_inp]

# Get the template prompt to use - you can modify this!
prompt = hub.pull("hwchase17/react")

## Read the prompt template 
print(prompt)

input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'] template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}'


In [20]:
## Construct the ReAct agent by defining the llm, tools and prompt template
mosdef_1_Agent = create_react_agent(llm=llm,tools=tools,prompt=prompt)

# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=mosdef_1_Agent, tools=tools, verbose=True)

In [21]:
## Run the agent executor

input = 'Create a Lammps input file for CCO'
response = agent_executor.invoke({"input":str(input)})
response



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the MosdDEF_1_inp function to create the Lammps input file for CCO.
Action: MosdDEF_1_inp
Action Input: "CCO"[0mNo urey bradley terms detected, will use angle_style harmonic
RB Torsions detected, will use dihedral_style opls
[36;1m[1;3mNone[0m[32;1m[1;3mThe MosdDEF_1_inp function should have created the Lammps input file for CCO.
Final Answer: The Lammps input file for CCO has been created.[0m

[1m> Finished chain.[0m


{'input': 'Create a Lammps input file for CCO',
 'output': 'The Lammps input file for CCO has been created.'}