# Adding DDX to existing dataset

In [1]:
from qcportal import PortalClient
from qcelemental.models.results import WavefunctionProtocolEnum
from openff.qcsubmit.results import OptimizationResultCollection,BasicResultCollection
from openff.qcsubmit.datasets import BasicDataset
from openff.qcsubmit.results.filters import ConnectivityFilter, ConformerRMSDFilter
from openff.qcsubmit.factories import BasicDatasetFactory
from openff.qcsubmit.common_structures import Metadata, QCSpec

In [2]:
client = PortalClient("https://api.qcarchive.molssi.org:443/")

# Set up all the name, etc which is the same as the original dataset

In [3]:
from openff.qcsubmit.common_structures import SCFProperties, Metadata

# want a finer grid for wb97 functionals
keywords = {
    "dft_spherical_points": 590,
    "dft_radial_points": 99
}

# let's calculate a bunch of stuff while we're at it; in case we decide to train on any of this.
properties = [
        SCFProperties.Dipole,
        SCFProperties.Quadrupole,
        SCFProperties.LowdinCharges,
        SCFProperties.MullikenCharges,
        SCFProperties.MBISCharges,
        SCFProperties.MayerIndices,
        SCFProperties.WibergLowdinIndices,
        SCFProperties.DipolePolarizabilities,
    ]


In [4]:
from qcelemental.models import DriverEnum

dataset = BasicDataset(
    dataset_name="OpenFF NAGL2 ESP Timing Benchmark v1.1",
    dataset_tagline="PBE0/def2-TZVPPD single point calculations of ~1000 diverse molecules.",
    description=(
        "PBE0/def2-TZVPPD/vacuum single point calculations of ~1000 diverse molecules sub-sampled from the ESP50k, multi-BR ESP, and I fragment datasets, to benchmark computational cost."
    ),
    driver=DriverEnum.properties, # This was DriverEnum.energy for the other ESP datasets, but I think to calculate dipole etc it needs to be properties.
    metadata=Metadata(
        submitter="amcisaac",
        long_description_url=(
            "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2024-09-18-OpenFF-NAGL2-ESP-Timing-Benchmark-v1.1"
        )
    ),
    qc_specifications={
        "pbe0/def2-TZVPPD": QCSpec(
            program="psi4",
            method="pbe0",
            basis="def2-TZVPPD",
            spec_name="pbe0/def2-TZVPPD",
            spec_description=(
                "PBE0 functional with def2-TZVPPD basis set"
            ),
            store_wavefunction=WavefunctionProtocolEnum.orbitals_and_eigenvalues,
            keywords=keywords,
            scf_properties=properties
        ),
            
    }
)

In [5]:
dataset._get_specifications()

{'pbe0/def2-TZVPPD': QCSpecification(program='psi4', driver=<SinglepointDriver.properties: 'properties'>, method='pbe0', basis='def2-tzvppd', keywords={'maxiter': 200, 'function_kwargs': {'properties': [<SCFProperties.Dipole: 'dipole'>, <SCFProperties.Quadrupole: 'quadrupole'>, <SCFProperties.LowdinCharges: 'lowdin_charges'>, <SCFProperties.MullikenCharges: 'mulliken_charges'>, <SCFProperties.MBISCharges: 'mbis_charges'>, <SCFProperties.MayerIndices: 'mayer_indices'>, <SCFProperties.WibergLowdinIndices: 'wiberg_lowdin_indices'>, <SCFProperties.DipolePolarizabilities: 'dipole_polarizabilities'>]}, 'dft_spherical_points': 590, 'dft_radial_points': 99}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.orbitals_and_eigenvalues: 'orbitals_and_eigenvalues'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>))}

# Add new QCSpec for DDX

In [6]:
from openff.qcsubmit.common_structures import DDXSettings
# Solvent specs
ddx_spec = DDXSettings(
    ddx_solvent_epsilon=78.4, 
    ddx_radii_scaling = 1.1, # default but wanted to specify explicitly
    ddx_radii_set = 'uff', # default but wanted to specify explicitly 
    ddx_model = 'pcm'
)


In [7]:
dataset.add_qc_spec(program="psi4",
                    method="pbe0",
                    basis="def2-TZVPPD",
                    spec_name="pbe0/def2-TZVPPD/ddx-water",
                    spec_description=(
                        "PBE0 functional with def2-TZVPPD basis set and DDX water (eps = 78.4) using UFF radii and 1.1x scaling factor"
                    ),
                    # store_wavefunction=WavefunctionProtocolEnum.orbitals_and_eigenvalues, # Don't save wavefunctions in case there's a lot of errors
                    keywords=keywords,
                    scf_properties=properties,
                    implicit_solvent= ddx_spec)

In [8]:
dataset._get_specifications()

{'pbe0/def2-TZVPPD': QCSpecification(program='psi4', driver=<SinglepointDriver.properties: 'properties'>, method='pbe0', basis='def2-tzvppd', keywords={'maxiter': 200, 'function_kwargs': {'properties': [<SCFProperties.Dipole: 'dipole'>, <SCFProperties.Quadrupole: 'quadrupole'>, <SCFProperties.LowdinCharges: 'lowdin_charges'>, <SCFProperties.MullikenCharges: 'mulliken_charges'>, <SCFProperties.MBISCharges: 'mbis_charges'>, <SCFProperties.MayerIndices: 'mayer_indices'>, <SCFProperties.WibergLowdinIndices: 'wiberg_lowdin_indices'>, <SCFProperties.DipolePolarizabilities: 'dipole_polarizabilities'>]}, 'dft_spherical_points': 590, 'dft_radial_points': 99}, protocols=AtomicResultProtocols(wavefunction=<WavefunctionProtocolEnum.orbitals_and_eigenvalues: 'orbitals_and_eigenvalues'>, stdout=True, error_correction=ErrorCorrectionProtocol(default_policy=True, policies=None), native_files=<NativeFilesProtocolEnum.none: 'none'>)),
 'pbe0/def2-TZVPPD/ddx-water': QCSpecification(program='psi4', driver

# Exporting dataset

In [9]:
dataset.export_dataset("compute.json")

print(dataset.qc_specifications)

{'pbe0/def2-TZVPPD': QCSpec(method='pbe0', basis='def2-TZVPPD', program='psi4', spec_name='pbe0/def2-TZVPPD', spec_description='PBE0 functional with def2-TZVPPD basis set', store_wavefunction=<WavefunctionProtocolEnum.orbitals_and_eigenvalues: 'orbitals_and_eigenvalues'>, implicit_solvent=None, maxiter=200, scf_properties=[<SCFProperties.Dipole: 'dipole'>, <SCFProperties.Quadrupole: 'quadrupole'>, <SCFProperties.LowdinCharges: 'lowdin_charges'>, <SCFProperties.MullikenCharges: 'mulliken_charges'>, <SCFProperties.MBISCharges: 'mbis_charges'>, <SCFProperties.MayerIndices: 'mayer_indices'>, <SCFProperties.WibergLowdinIndices: 'wiberg_lowdin_indices'>, <SCFProperties.DipolePolarizabilities: 'dipole_polarizabilities'>], keywords={'dft_spherical_points': 590, 'dft_radial_points': 99}), 'pbe0/def2-TZVPPD/ddx-water': QCSpec(method='pbe0', basis='def2-TZVPPD', program='psi4', spec_name='pbe0/def2-TZVPPD/ddx-water', spec_description='PBE0 functional with def2-TZVPPD basis set and DDX water (eps 

# Dataset information

In [10]:
print("## Metadata")
print(f"* Elements: {{{', '.join(dataset.metadata.dict()['elements'])}}}")

def print_field(od, field): print(f"\t* {field}: {od[field]}")

fields = ["basis", "implicit_solvent", "keywords", "maxiter", "method", "program"]
for spec, obj in dataset.qc_specifications.items():
    od = obj.dict()
    print("* Spec:", spec)
    for field in fields:
        print_field(od, field)
    print("\t* SCF properties:")
    for field in od["scf_properties"]:
        print(f"\t\t* {field}")

## Metadata
* Elements: {}
* Spec: pbe0/def2-TZVPPD
	* basis: def2-TZVPPD
	* implicit_solvent: None
	* keywords: {'dft_spherical_points': 590, 'dft_radial_points': 99}
	* maxiter: 200
	* method: pbe0
	* program: psi4
	* SCF properties:
		* dipole
		* quadrupole
		* lowdin_charges
		* mulliken_charges
		* mbis_charges
		* mayer_indices
		* wiberg_lowdin_indices
		* dipole_polarizabilities
* Spec: pbe0/def2-TZVPPD/ddx-water
	* basis: def2-TZVPPD
	* implicit_solvent: {'ddx_model': 'pcm', 'ddx_radii_scaling': 1.1, 'ddx_radii_set': 'uff', 'ddx_solvent_epsilon': 78.4, 'ddx_solvent': 'water'}
	* keywords: {'dft_spherical_points': 590, 'dft_radial_points': 99}
	* maxiter: 200
	* method: pbe0
	* program: psi4
	* SCF properties:
		* dipole
		* quadrupole
		* lowdin_charges
		* mulliken_charges
		* mbis_charges
		* mayer_indices
		* wiberg_lowdin_indices
		* dipole_polarizabilities
