In [44]:
import pydantic
from pydantic import BaseModel
from openff.toolkit import Molecule
from openpharmmdflow.io.load import load_file

In [45]:
pydantic.__version__

'1.10.17'

In [46]:
from pathlib import Path


class SmallMoleculePipelineInputConfig(BaseModel):
    name: str
    path: Path


class BespokeWorkflowFactoryConfig(BaseModel):
    initial_force_field: str = "openff-2.2.0.offxml"
    qc_method: str = "gfn2xtb"
    qc_basis: str | None = None
    qc_program: str = "xtb"
    qc_spec_name: str = "xtb"
    qc_spec_description: str = "gfn2xtb"


class BespokeExecutorConfig(BaseModel):
    n_fragmenter_workers: int = 1
    n_optimizer_workers: int = 1
    n_qc_compute_workers: int = 2
    n_bespoke_workers: int = 1


class BespokefitConfig(BaseModel):
    # Right now this works to optimize one small molecule
    # we could hack together something that works but it would
    # be best to run this out of band and just load in the
    # bespoke forcefeild
    bespoke_workflow_factory_config: BespokeWorkflowFactoryConfig
    bespoke_executor_config: BespokeExecutorConfig


class SmallMoleculePipelinePrepConfig(BaseModel):
    bespokefit_config: BespokefitConfig


class SmallMoleculePipelineConfig(BaseModel):
    work_dir: Path
    inputs: list[SmallMoleculePipelineInputConfig] | SmallMoleculePipelineInputConfig
    prep_config: SmallMoleculePipelinePrepConfig | None


SMPInputConfig = SmallMoleculePipelineInputConfig
SMPConfig = SmallMoleculePipelineConfig

In [49]:
input = {"name": "ibuprofen", "path": "ibuprofen.sdf"}

sm_input = SMPInputConfig(**input)

settings = {
    "work_dir": "/home/mmh/Projects/OpenPharmMDFlow/experiments/sm/scratch",
    "inputs": [sm_input, sm_input],
}

sm_config = SMPConfig(**settings)

In [48]:
dir(sm_config)

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompose_class',
 '_enfo

In [31]:
class SmallMoleculePipeline:
    # TODO: Track the stage
    # TODO: serialize
    # TODO: factory model?
    def __init__(self, config: SmallMoleculePipelineInputConfig):
        self.config = config
        self.inputs = (
            config.inputs if isinstance(config.inputs, list) else [config.inputs]
        )
        self.prep_config = config.prep_config if config.prep_config else None

    def load(self):
        self.loaded_mols = []
        for input in self.inputs:
            mol = load_file(input.path)
            mol.name = input.name
            self.loaded_mols.append(mol)

    def prep(self):
        # run bespokefit here
        pass

    def pack(self):
        # build the box here
        pass

    def simulate(self):
        # run simulation here
        pass

    def analyize(self):
        # run analysis here
        pass

In [32]:
smp = SmallMoleculePipeline(sm_config)

In [33]:
smp.load()

In [34]:
mol = Molecule.from_file(sm_input.path)
mol.name = sm_input.name

In [35]:
mol.name

'ibuprofen'

In [36]:
smp.loaded_mols

[Molecule with name 'ibuprofen' and SMILES '[H][O][C](=[O])[C@@]([H])([c]1[c]([H])[c]([H])[c]([C]([H])([H])[C]([H])([C]([H])([H])[H])[C]([H])([H])[H])[c]([H])[c]1[H])[C]([H])([H])[H]',
 Molecule with name 'ibuprofen' and SMILES '[H][O][C](=[O])[C@@]([H])([c]1[c]([H])[c]([H])[c]([C]([H])([H])[C]([H])([C]([H])([H])[H])[C]([H])([H])[H])[c]([H])[c]1[H])[C]([H])([H])[H]']

In [40]:
print(smp.prep_config)

None
