In [1]:
import pydantic
from pydantic import BaseModel
from openff.toolkit import Molecule
from openpharmmdflow.io.load import load_file
from openpharmmdflow.bespokefit import build_bespoke_workflow_factory, run_bespokefit
from openff.models.types import FloatQuantity
import numpy as np
from openff.interchange.components._packmol import UNIT_CUBE, pack_box
from openff.toolkit import ForceField
from openff.interchange import Interchange
from openpharmmdflow.pipeline.sm import create_simulation, run_simulation



In [2]:
type(UNIT_CUBE)

numpy.ndarray

In [3]:
from pathlib import Path


class SmallMoleculePipelineInputConfig(BaseModel):
    # TODO: support SMILES
    name: str
    path: Path


class BespokeWorkflowFactoryConfig(BaseModel):
    initial_force_field: str = "openff-2.2.0.offxml"
    qc_method: str = "gfn2xtb"
    qc_basis: str | None = None
    qc_program: str = "xtb"
    qc_spec_name: str = "xtb"
    qc_spec_description: str = "gfn2xtb"


class BespokeExecutorConfig(BaseModel):
    n_fragmenter_workers: int = 1
    n_optimizer_workers: int = 1
    n_qc_compute_workers: int = 2
    n_bespoke_workers: int = 1
    n_fragment_workers: int = 1


class BespokefitConfig(BaseModel):
    # Right now this works to optimize one small molecule
    # we could hack together something that works but it would
    # be best to run this out of band and just load in the
    # bespoke forcefeild
    bespoke_workflow_factory_config: BespokeWorkflowFactoryConfig
    bespoke_executor_config: BespokeExecutorConfig
    save_bespoke_ff: bool = True
    mol_to_bespoke: str


class SmallMoleculePipelinePrepConfig(BaseModel):
    bespokefit_config: BespokefitConfig


class SmallMoleculePipelinePackConfig(BaseModel):
    # TODO: add validator for len(molecule_names) == len(number_of_copies)
    # TODO: add validator for box_shape
    molecule_names: list[str]
    number_of_copies: list[int]
    mass_density: FloatQuantity["g/cm**3"]
    box_shape: np.ndarray = UNIT_CUBE

    class Config:
        arbitrary_types_allowed = True


class SmallMoleculePipelineParameterizeConfig(BaseModel):
    force_field: ForceField | str | Path | None = "openff-2.2.1.offxml"

    class Config:
        arbitrary_types_allowed = True


class SmallMoleculePipelineSimulateConfig(BaseModel):
    pdb_stride: int = 500
    trajectory_name: str = "trajectory.pdb"
    temp_k: float = 300
    time_step_fs: int = 1
    pressure_bar: float = 1
    n_steps: int = 5000


class SmallMoleculePipelineAnalyizeConfig(BaseModel):
    pass


class SmallMoleculePipelineConfig(BaseModel):
    work_dir: Path
    inputs: list[SmallMoleculePipelineInputConfig] | SmallMoleculePipelineInputConfig
    prep_config: SmallMoleculePipelinePrepConfig | None
    pack_config: SmallMoleculePipelinePackConfig
    parameterize_config: SmallMoleculePipelineParameterizeConfig
    simulate_config: SmallMoleculePipelineSimulateConfig
    analyize_config: SmallMoleculePipelineAnalyizeConfig


SMPInputConfig = SmallMoleculePipelineInputConfig
SMPConfig = SmallMoleculePipelineConfig

In [4]:
input = {"name": "ibuprofen", "path": "ibuprofen.sdf"}

sm_input = SMPInputConfig(**input)

settings = {
    "work_dir": "/home/mmh/Projects/OpenPharmMDFlow/experiments/sm/scratch",
    "inputs": [sm_input, sm_input],
    "prep_config": SmallMoleculePipelinePrepConfig(
        bespokefit_config=BespokefitConfig(
            bespoke_workflow_factory_config=BespokeWorkflowFactoryConfig(),
            bespoke_executor_config=BespokeExecutorConfig(),
            mol_to_bespoke="ibuprofen",
        )
    ),
    "pack_config": SmallMoleculePipelinePackConfig(
        molecule_names=["ibuprofen"], number_of_copies=[10], mass_density=0.1
    ),
    "parameterize_config": SmallMoleculePipelineParameterizeConfig(),
    "simulate_config": SmallMoleculePipelineSimulateConfig(),
    "analyize_config": SmallMoleculePipelineAnalyizeConfig(),
}

sm_config = SMPConfig(**settings)

In [5]:
sm_config.pack_config

SmallMoleculePipelinePackConfig(molecule_names=['ibuprofen'], number_of_copies=[10], mass_density=<Quantity(0.1, 'gram / centimeter ** 3')>, box_shape=array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]]))

In [6]:
# TODO: Use snakemake to manage pipeline?
# TODO: Use decorators for DAG/deps?


class SmallMoleculePipeline:
    # TODO: Track the stage
    # TODO: serialize
    # TODO: factory model?
    # TODO: if prep config, store nested configs more flat?
    def __init__(self, config: SmallMoleculePipelineInputConfig):
        self.config = config
        self.inputs = (
            config.inputs if isinstance(config.inputs, list) else [config.inputs]
        )
        self.prep_config = config.prep_config if config.prep_config else None
        self.pack_config = config.pack_config
        self.parameterize_config = config.parameterize_config
        self.bespoke_ff = None
        self.simulate_config = config.simulate_config

    def load(self):
        # TODO raise error if duplcate name
        self.loaded_mols = {}
        for input in self.inputs:
            mol = load_file(input.path)
            mol.name = input.name
            self.loaded_mols[mol.name] = mol

    def prep(self):
        # run bespokefit here
        if self.prep_config:
            self.factory = build_bespoke_workflow_factory(
                self.prep_config.bespokefit_config.bespoke_workflow_factory_config
            )
            self.bespoke_ff = run_bespokefit(
                self.prep_config.bespokefit_config,
                self.loaded_mols[self.prep_config.bespokefit_config.mol_to_bespoke],
                self.factory,
            )
        else:
            print("Nothing to prep")

    def pack(self):
        # build the box here
        # TODO: use mBuild for lattice tooling
        # Right now random packing is supported
        self.topology = pack_box(
            molecules=[
                self.loaded_mols[mol_name]
                for mol_name in self.pack_config.molecule_names
            ],
            number_of_copies=self.pack_config.number_of_copies,
            mass_density=self.pack_config.mass_density,
            box_shape=self.pack_config.box_shape,
        )

    def parameterize(self):
        # TODO test to make sure we use the FF we expect to use
        # Use bespoke ff if we made one
        self.force_field = (
            self.bespoke_ff if self.bespoke_ff else self.parameterize_config.force_field
        )
        # Now if force_field is a path or string, we need to turn it into a ForceField object
        if not isinstance(self.force_field, ForceField):
            self.force_field = ForceField(self.force_field)
        self.interchange = Interchange.from_smirnoff(
            force_field=self.force_field, topology=self.topology
        )

    def simulate(self):
        self.simulation = create_simulation(self.simulate_config, self.interchange)
        run_simulation(self.simulate_config, self.simulation)

    def analyize(self):
        # run analysis here
        pass

In [7]:
smp = SmallMoleculePipeline(sm_config)

In [8]:
smp.load()

In [9]:
# smp.prep()

In [10]:
smp.pack()

In [11]:
smp.parameterize()

In [12]:
smp.interchange.visualize("nglview")

NGLWidget()

In [13]:
smp.simulate()

Starting simulation
Step, volume (nm^3)
0 34.254
500 33.22
1000 32.831
1500 34.563
2000 32.912
2500 32.629
3000 34.303
3500 34.171
4000 30.176
4500 28.025
Elapsed time: 0.70 seconds


In [10]:
mol.name

'ibuprofen'

In [11]:
smp.loaded_mols

[Molecule with name 'ibuprofen' and SMILES '[H][O][C](=[O])[C@@]([H])([c]1[c]([H])[c]([H])[c]([C]([H])([H])[C]([H])([C]([H])([H])[H])[C]([H])([H])[H])[c]([H])[c]1[H])[C]([H])([H])[H]',
 Molecule with name 'ibuprofen' and SMILES '[H][O][C](=[O])[C@@]([H])([c]1[c]([H])[c]([H])[c]([C]([H])([H])[C]([H])([C]([H])([H])[H])[C]([H])([H])[H])[c]([H])[c]1[H])[C]([H])([H])[H]']

In [12]:
print(smp.prep_config)

bespokefit_config=BespokefitConfig(bespoke_workflow_factory_config=BespokeWorkflowFactoryConfig(initial_force_field='openff-2.2.0.offxml', qc_method='gfn2xtb', qc_basis=None, qc_program='xtb', qc_spec_name='xtb', qc_spec_description='gfn2xtb'), bespoke_executor_config=BespokeExecutorConfig(n_fragmenter_workers=1, n_optimizer_workers=1, n_qc_compute_workers=2, n_bespoke_workers=1, n_fragment_workers=1), save_bespoke_ff=True)


In [None]:
bespoke_workflow_factory_config = BespokeWorkflowFactoryConfig()
bespokefit_config = BespokefitConfig(
    bespoke_workflow_factory_config=BespokeWorkflowFactoryConfig(),
    bespoke_executor_config=BespokeExecutorConfig(),
)
factory = build_bespoke_workflow_factory(bespoke_workflow_factory_config)
ff = run_bespokefit(bespokefit_config, smp.loaded_mols[0], factory)

Deduplication                 : 100%|████████████| 1/1 [00:00<00:00, 357.69it/s]
Building Fitting Schema: 100%|████████████████████| 1/1 [00:00<00:00,  2.52it/s]


Output()

Output()

Output()