From 3a211f9ce051a22e17c93a8c84cacc56c94ee026 Mon Sep 17 00:00:00 2001 From: j-wags Date: Mon, 16 May 2022 12:04:32 -0700 Subject: [PATCH 1/4] update install and quick start guides for psi4-ambertools incompatibility --- docs/getting-started/installation.md | 43 +++++++++++++++++------- docs/getting-started/quick-start.md | 49 +++++++++++++++++++++------- docs/index.md | 2 +- docs/users/theory.md | 2 ++ 4 files changed, 73 insertions(+), 23 deletions(-) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 15b7ce05..9ae10d8b 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -6,13 +6,38 @@ package manager. ## Using conda -The recommended way to install `openff-bespokefit` is via the `conda` package manager: +The recommended way to install `openff-bespokefit` is via the `conda` package manager. +A working installation also requires at least one package from each of the two sections below +("Fragmentation Backends" and "Reference Data Generators") ```shell conda install -c conda-forge openff-bespokefit ``` -### Optional dependencies + + +### Fragmentation Backends + +#### AmberTools Antechamber + +AmberTools is free and open-source, and can generally be used fragment molecules up to 40 heavy atoms in under +10 minutes. + +```shell +conda install -c conda-forge ambertools +```` + +#### OpenEye Toolkits + +If you have access to the OpenEye toolkits (namely `oechem`, `oequacpac` and `oeomega`) we recommend installing +these also as they can speed up certain operations significantly. OpenEye software requires a free-for-academics +license to run. + +```shell +conda install -c openeye openeye-toolkits +``` + +### Reference Data Generators #### Psi4 @@ -23,6 +48,11 @@ recommended to be installed unless you intend to train against data generated us conda install -c conda-forge -c defaults -c psi4 psi4 ``` +:::{warning} +There is an incompatibility between the AmberTools and Psi4 conda packages on Mac, and it is not possible to +create a working conda environment containing both. +::: + #### XTB The xtb package gives access to the XTB semi-empirical models produced by the Grimme group in Bonn which may be used @@ -46,15 +76,6 @@ TorchANI potentials are only suitable for molecules with a net neutral charge an consisting of C, H, N, O, S, F and Cl ::: -#### OpenEye toolkits - -If you have access to the OpenEye toolkits (namely `oechem`, `oequacpac` and `oeomega`) we recommend installing -these also as these can speed up certain operations significantly. - -```shell -conda install -c openeye openeye-toolkits -``` - ## From source To install `openff-bespokefit` from source begin by cloning the repository from diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 5ace0315..e5dd0ac7 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -1,6 +1,13 @@ (quick_start_chapter)= # Quick start +:::{warning} +To reduce runtime, the first sections of this "Quick start" guide use a fast semiempirical model, "GFN2-xTB", +to generate training data, +rather than the ["default" method](default_qc_method) used to train mainline OpenFF force fields. Details on how to +set up an environment for a "default" fit using Psi4 are available in the [installation guide](installation_chapter). +::: + BespokeFit aims to provide an automated pipeline that ingests a general molecular force field and a set of molecules of interest, and produce a new bespoke force field that has been augmented with highly specific force field parameters trained to accurately capture the important features and phenomenology of the input set. @@ -9,10 +16,12 @@ Such features may include generating bespoke torsion parameters that have been t to capture as closely as possible the torsion profiles of the rotatable bonds in the target molecule which have a large impact on conformational preferences. -The recommended way to install `openff-bespokefit` is via the `conda` package manager: +The recommended way to install `openff-bespokefit` is via the `conda` package manager. There are several optional +dependencies, and a good starting environment is: ```shell -conda install -c conda-forge openff-bespokefit +conda create -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools +conda activate bespokefit ``` although [several other methods are available](installation_chapter). @@ -49,8 +58,9 @@ openff-bespoke executor run --smiles "CC(=O)NC1=CC=C(C=C1)O" \ --workflow "default" \ --output "acetaminophen.json" \ --output-force-field "acetaminophen.offxml" \ - --n-qc-compute-workers 2 \ - --qc-compute-n-cores 8 + --n-qc-compute-workers 4 \ + --qc-compute-n-cores 1 \ + --default-qc-spec xtb gfn2xtb none ``` or the path to an SDF (or similar) file @@ -60,8 +70,9 @@ openff-bespoke executor run --file "acetaminophen.sdf" \ --workflow "default" \ --output "acetaminophen.json" \ --output-force-field "acetaminophen.offxml" \ - --n-qc-compute-workers 2 \ - --qc-compute-n-cores 8 + --n-qc-compute-workers 4 \ + --qc-compute-n-cores 1 \ + --default-qc-spec xtb gfn2xtb none ``` in addition to arguments defining how the bespoke fit should be performed and parallelized. @@ -88,7 +99,8 @@ openff-bespoke executor run --file "acetaminophen.sdf" \ --n-fragmenter-workers 1 \ --n-optimizer-workers 1 \ --n-qc-compute-workers 2 \ - --qc-compute-n-cores 8 + --qc-compute-n-cores 1 \ + --default-qc-spec xtb gfn2xtb none ``` See the chapter on the [bespoke executor](executor_chapter) for more information about parallelising fits. @@ -107,7 +119,7 @@ seamlessly coordinates every step of the fitting workflow from molecule fragment openff-bespoke executor launch --n-fragmenter-workers 1 \ --n-optimizer-workers 1 \ --n-qc-compute-workers 2 \ - --qc-compute-n-cores 8 + --qc-compute-n-cores 1 \ ``` The number of workers dedicated to each bespoke fitting stage can be tweaked here. In general, we recommend devoting @@ -120,14 +132,16 @@ the `submit` command either in the form of a SMILES pattern: ```shell openff-bespoke executor submit --smiles "CC(=O)NC1=CC=C(C=C1)O" \ - --workflow "default" + --workflow "default" \ + --default-qc-spec xtb gfn2xtb none ``` or loading the molecule from an SDF (or similar) file: ```shell openff-bespoke executor submit --file "acetaminophen.sdf" \ - --workflow "default" + --workflow "default" \ + --default-qc-spec xtb gfn2xtb none ``` The `submit` command will also accept a combination of the two input forms as well as multiple occurrences of either. @@ -166,6 +180,19 @@ See the [results chapter](bespoke_results_chapter) for more details on retrievin (quick_start_using_api)= ## Using the API +Instead of using the fast semiempirical method available via xTB, this section will use the +[default method](default_qc_method) available through Psi4. + +:::{warning} +This section will only work on Linux (not Mac) computers, due to an incompatibility between AmberTools and Psi4. +Mac users should proceed directly to the [next section](quick_start_config_factory), which shows how to run the Python +workflow using xTB. +::: + +```shell +conda install -c psi4 -c defaults psi4 +``` + For the more Python oriented user, or for users who are looking for more control over how the bespoke fit will be performed, BespokeFit exposes a full Python API. @@ -214,7 +241,7 @@ with BespokeExecutor( n_fragmenter_workers = 1, n_optimizer_workers = 1, n_qc_compute_workers = 2, - qc_compute_worker_config=BespokeWorkerConfig(n_cores=8) + qc_compute_worker_config=BespokeWorkerConfig(n_cores=2) ) as executor: # Submit our workflow to the executor task_id = executor.submit(input_schema=workflow_schema) diff --git a/docs/index.md b/docs/index.md index cc23a2db..effeccb9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -18,7 +18,7 @@ It is a Python library in the [Open Force Field ecosystem] that emphasises: directly from the command line without touching a line of Python :::{warning} -Please note that BespokeFit is experimental, pre-production software. It does +Please note that BespokeFit is under continuous development. It does not promise to have a stable API and may in cases produce inaccurate results. We are always looking to improve this framwork so if you do find any undesirable or irritating behaviour, please [file an issue!] diff --git a/docs/users/theory.md b/docs/users/theory.md index 42edba42..21d92ef8 100644 --- a/docs/users/theory.md +++ b/docs/users/theory.md @@ -67,6 +67,8 @@ step. [`openff-fragmenter`]: https://fragmenter.readthedocs.io/en/stable/index.html [may be specified]: workflow_chapter +(default_qc_method)= + ## QC Generation The third stage in the bespoke fitting workflow is generating any reference quantum chemical data that the bespoke From b3afbead06a421e590fc8d45737df1b9d8dccbd5 Mon Sep 17 00:00:00 2001 From: j-wags Date: Mon, 16 May 2022 12:21:35 -0700 Subject: [PATCH 2/4] quick start env needs mamba --- docs/getting-started/quick-start.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index e5dd0ac7..92b16139 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -20,7 +20,8 @@ The recommended way to install `openff-bespokefit` is via the `conda` package ma dependencies, and a good starting environment is: ```shell -conda create -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools +conda install -y mamba +mamba create -y -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools conda activate bespokefit ``` From ed589db654b35a99b991e442a8a41623e01d9649 Mon Sep 17 00:00:00 2001 From: j-wags Date: Mon, 16 May 2022 18:39:17 -0700 Subject: [PATCH 3/4] further updates to quickstart based on testing iterations --- docs/getting-started/quick-start.md | 69 +++++++++++++++++------------ 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 92b16139..e7336b8b 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -2,10 +2,9 @@ # Quick start :::{warning} -To reduce runtime, the first sections of this "Quick start" guide use a fast semiempirical model, "GFN2-xTB", +To reduce runtime, this "Quick start" guide uses a fast semiempirical model, "GFN2-xTB", to generate training data, -rather than the ["default" method](default_qc_method) used to train mainline OpenFF force fields. Details on how to -set up an environment for a "default" fit using Psi4 are available in the [installation guide](installation_chapter). +rather than the ["default" method](default_qc_method) used to train mainline OpenFF force fields. ::: BespokeFit aims to provide an automated pipeline that ingests a general molecular force field and a set of @@ -21,7 +20,7 @@ dependencies, and a good starting environment is: ```shell conda install -y mamba -mamba create -y -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools +mamba create -y -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools python=3.9 conda activate bespokefit ``` @@ -59,7 +58,7 @@ openff-bespoke executor run --smiles "CC(=O)NC1=CC=C(C=C1)O" \ --workflow "default" \ --output "acetaminophen.json" \ --output-force-field "acetaminophen.offxml" \ - --n-qc-compute-workers 4 \ + --n-qc-compute-workers 2 \ --qc-compute-n-cores 1 \ --default-qc-spec xtb gfn2xtb none ``` @@ -71,15 +70,21 @@ openff-bespoke executor run --file "acetaminophen.sdf" \ --workflow "default" \ --output "acetaminophen.json" \ --output-force-field "acetaminophen.offxml" \ - --n-qc-compute-workers 4 \ + --n-qc-compute-workers 2 \ --qc-compute-n-cores 1 \ --default-qc-spec xtb gfn2xtb none ``` in addition to arguments defining how the bespoke fit should be performed and parallelized. +:::{note} +Sometimes bespoke commands will raise `RuntimeError: The gateway could not be reached`. This can usually be resolved +by rerunning the command a few times. +::: + Here we have specified that we wish to start the fit from the general OpenFF 2.0.0 (Sage) force field, augmenting -it with bespoke parameters generated according to the [default built-in workflow](workflow_chapter). +it with bespoke parameters generated according to the +[default built-in workflow using GFN2-xTB reference data](workflow_chapter). :::{note} Other available workflow can be viewed by running `openff-bespoke executor run --help`, or alternatively, the path to a @@ -97,8 +102,8 @@ extra workers can easily be requested to speed things up: ```shell openff-bespoke executor run --file "acetaminophen.sdf" \ --workflow "default" \ - --n-fragmenter-workers 1 \ - --n-optimizer-workers 1 \ + --n-fragmenter-workers 2 \ + --n-optimizer-workers 2 \ --n-qc-compute-workers 2 \ --qc-compute-n-cores 1 \ --default-qc-spec xtb gfn2xtb none @@ -118,9 +123,9 @@ seamlessly coordinates every step of the fitting workflow from molecule fragment ```shell openff-bespoke executor launch --n-fragmenter-workers 1 \ - --n-optimizer-workers 1 \ - --n-qc-compute-workers 2 \ - --qc-compute-n-cores 1 \ + --n-optimizer-workers 2 \ + --n-qc-compute-workers 4 \ + --qc-compute-n-cores 1 ``` The number of workers dedicated to each bespoke fitting stage can be tweaked here. In general, we recommend devoting @@ -181,19 +186,6 @@ See the [results chapter](bespoke_results_chapter) for more details on retrievin (quick_start_using_api)= ## Using the API -Instead of using the fast semiempirical method available via xTB, this section will use the -[default method](default_qc_method) available through Psi4. - -:::{warning} -This section will only work on Linux (not Mac) computers, due to an incompatibility between AmberTools and Psi4. -Mac users should proceed directly to the [next section](quick_start_config_factory), which shows how to run the Python -workflow using xTB. -::: - -```shell -conda install -c psi4 -c defaults psi4 -``` - For the more Python oriented user, or for users who are looking for more control over how the bespoke fit will be performed, BespokeFit exposes a full Python API. @@ -203,13 +195,32 @@ is used to create the workflows that fully describe how bespoke parameters will ```python from openff.bespokefit.workflows import BespokeWorkflowFactory +from openff.qcsubmit.common_structures import QCSpec -factory = BespokeWorkflowFactory() +factory = BespokeWorkflowFactory( + # Define the starting force field that will be augmented with bespoke + # parameters. + initial_force_field="openff-2.0.0.offxml", + # Change the level of theory that the reference QC data is generated at + default_qc_specs=[ + QCSpec( + method="gfn2xtb", + basis=None, + program="xtb", + spec_name="xtb", + spec_description="gfn2xtb", + ) + ] +) ``` -The default factory will produce [workflows](workflow_chapter) that will augment the OpenFF 2.0.0 force field +Similar to the previous steps, here we override the default +["default" QC specification](default_qc_method) to use GFN2-xTB. If we had Psi4 +installed, we could remove the `default_qc_specs` argument and the factory would instead use our mainline +[fitting QC method]](default_qc_method). +The default factory will produce [workflows](workflow_chapter) that augment the OpenFF 2.0.0 force field with bespoke torsion parameters for all non-terminal *rotatable* bonds in the molecule that have been trained -to quantum chemical torsion scan data generated for said molecule using the [Psi4] quantum chemistry package. +to quantum chemical torsion scan data generated for said molecule. :::{note} See the [configuration section](quick_start_config_factory) for more info on customising the workflow factory. @@ -242,7 +253,7 @@ with BespokeExecutor( n_fragmenter_workers = 1, n_optimizer_workers = 1, n_qc_compute_workers = 2, - qc_compute_worker_config=BespokeWorkerConfig(n_cores=2) + qc_compute_worker_config=BespokeWorkerConfig(n_cores=1) ) as executor: # Submit our workflow to the executor task_id = executor.submit(input_schema=workflow_schema) From 2ae4ad37070f0e5bd7a2692a3f96ccc8c2fef738 Mon Sep 17 00:00:00 2001 From: j-wags Date: Mon, 16 May 2022 18:56:20 -0700 Subject: [PATCH 4/4] fix typo and rejigger install instructions --- docs/getting-started/quick-start.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index e7336b8b..80f461bf 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -19,9 +19,9 @@ The recommended way to install `openff-bespokefit` is via the `conda` package ma dependencies, and a good starting environment is: ```shell -conda install -y mamba -mamba create -y -n bespokefit -c conda-forge openff-bespokefit xtb-python ambertools python=3.9 -conda activate bespokefit +conda create -n bespokefit -y -c conda-forge mamba python=3.9 +conda activate bespokefit +mamba install -y -c conda-forge openff-bespokefit xtb-python ambertools ``` although [several other methods are available](installation_chapter). @@ -217,7 +217,7 @@ factory = BespokeWorkflowFactory( Similar to the previous steps, here we override the default ["default" QC specification](default_qc_method) to use GFN2-xTB. If we had Psi4 installed, we could remove the `default_qc_specs` argument and the factory would instead use our mainline -[fitting QC method]](default_qc_method). +[fitting QC method](default_qc_method). The default factory will produce [workflows](workflow_chapter) that augment the OpenFF 2.0.0 force field with bespoke torsion parameters for all non-terminal *rotatable* bonds in the molecule that have been trained to quantum chemical torsion scan data generated for said molecule.