# Estimating datasets with a pre-equilibration step

In this tutorial we will be estimating a data set using a pre-equilibration step. This is especially useful for when you will be estimating a data set multiple times, for example while optimizing force field parameters or benchmarking the same data set with many force fields. This tutorial will cover:

- creating a toy dataset
- defining custom calculation schemas for the properties in our data set
- executing an equilibration phase to a specified kJ/mol error tolerance
- executing a pre-equilibrated simulation phase that uses the equilibrated boxes as input

In [1]:
from openff.units import unit
from openff.evaluator.backends import ComputeResources
from openff.evaluator.backends.dask import DaskLocalCluster
from openff.evaluator.datasets import (
    MeasurementSource,
    PhysicalPropertyDataSet,
    PropertyPhase,
)
from openff.evaluator.client import EvaluatorClient, RequestOptions, Request
from openff.evaluator.server.server import Batch, EvaluatorServer

from openff.evaluator.forcefield import (
    LigParGenForceFieldSource,
    SmirnoffForceFieldSource,
    TLeapForceFieldSource,
)

from openff.evaluator.properties import Density, EnthalpyOfMixing
from openff.evaluator.substances import Substance
from openff.evaluator.thermodynamics import ThermodynamicState

## Create a toy dataset

Properties can be downloaded from the internet, loaded from a file, or created dynamically. Here we quickly create a small dataset.

In [2]:
dataset = PhysicalPropertyDataSet()
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=101.325 * unit.kilopascal,
)
dataset.add_properties(
    Density(
        thermodynamic_state=thermodynamic_state,
        phase=PropertyPhase.Liquid,
        value=1.0 * Density.default_unit(),
        uncertainty=1.0 * Density.default_unit(),
        source=MeasurementSource(doi=" "),
        substance=Substance.from_components("CCCO"),
    ),
    EnthalpyOfMixing(
        thermodynamic_state=thermodynamic_state,
        phase=PropertyPhase.Liquid,
        value=1.0 * EnthalpyOfMixing.default_unit(),
        uncertainty=1.0 * EnthalpyOfMixing.default_unit(),
        source=MeasurementSource(doi=" "),
        substance=Substance.from_components("CCCO", "O"),
    )
)
for i, prop in enumerate(dataset.properties, 1):
    prop.id = str(i)

We now need to define options for the equilibration phase.

Here, we set an absolute tolerance for the potential energy of 100 kJ/mol -- note the units are in terms of energy.
Currently a relative error tolerance is unsupported.

In [3]:
equilibration_options = RequestOptions()
equilibration_options.calculation_layers = ["EquilibrationLayer"]
density_equilibration_schema = Density.default_equilibration_schema(
    n_molecules=256,
    absolute_tolerance=100 * unit.kilojoules / unit.mole,
)

dhmix_equilibration_schema = EnthalpyOfMixing.default_equilibration_schema(
    n_molecules=256,
    absolute_tolerance=100 * unit.kilojoules / unit.mole,
)
equilibration_options.add_schema(
    "EquilibrationLayer",
    "Density",
    density_equilibration_schema,
)
equilibration_options.add_schema(
    "EquilibrationLayer",
    "EnthalpyOfMixing",
    dhmix_equilibration_schema,
)


We then specify a PreequilibratedSimulationLayer to actually compute the properties after equilibrating.

In [4]:
preequilibrated_simulation_options = RequestOptions()
preequilibrated_simulation_options.calculation_layers = ["PreequilibratedSimulationLayer"]
density_preequilibration_schema = Density.default_preequilibrated_simulation_schema(
    n_molecules=256,
    # toy tolerance for quick computation!
    absolute_tolerance=0.5 * Density.default_unit()
)
dhmix_preequilibration_schema = EnthalpyOfMixing.default_preequilibrated_simulation_schema(
    n_molecules=256,
    # toy tolerance for quick computation!
    absolute_tolerance=0.5 * EnthalpyOfMixing.default_unit()
)
preequilibrated_simulation_options.add_schema(
    "PreequilibratedSimulationLayer",
    "Density",
    density_preequilibration_schema,
)
preequilibrated_simulation_options.add_schema(
    "PreequilibratedSimulationLayer",
    "EnthalpyOfMixing",
    dhmix_preequilibration_schema,
)

We load a force field from SMIRNOFF.

In [5]:
force_field_path = "openff-2.1.0.offxml"
force_field_source = SmirnoffForceFieldSource.from_path(
    force_field_path
)

And now to compute.

In [None]:
with DaskLocalCluster(
    # uncomment options below to use a GPU to compute (much faster than CPU-only).
    
    # number_of_workers=1,
    # resources_per_worker=ComputeResources(
    #     number_of_threads=1,
    #     number_of_gpus=1,
    #     preferred_gpu_toolkit=ComputeResources.GPUToolkit.CUDA,
    # ),
) as calculation_backend:
    server = EvaluatorServer(
        calculation_backend=calculation_backend,
        working_directory=".",
        delete_working_files=False
    )
    with server:
        client = EvaluatorClient()

        # we first request the equilibration data
        # this can be copied between different runs to avoid re-running
        # the data is saved in a directory called "stored_data"
    
        request, error = client.request_estimate(
            dataset,
            force_field_source,
            equilibration_options,
        )

        # block until computation finished
        results, exception = request.results(synchronous=True, polling_interval=30)
        assert exception is None

        # now we request the actual properties to compute
        # this reads equilibration data from the stored_data directory

        request, error = client.request_estimate(
            dataset,
            force_field_source,
            preequilibrated_simulation_options,
        )

        # block until computation finished
        results, exception = request.results(synchronous=True, polling_interval=30)
        assert exception is None
        

In [None]:
print(len(results.queued_properties))

In [None]:
print(len(results.estimated_properties))

In [None]:
print(len(results.unsuccessful_properties))

In [None]:
print(len(results.exceptions))