# PDB-CCD TM

## Import Packages and Datasets

In [1]:
import os
import datetime

import qcportal
from qcportal.singlepoint import SinglepointDriver
from qcportal.singlepoint import QCSpecification
from qcportal.optimization import OptimizationSpecification

from qcportal.external import scaffold


In [2]:
ADDRESS = "https://api.qcarchive.molssi.org:443"
client = qcportal.PortalClient(
    ADDRESS, 
    username=os.environ['QCARCHIVE_USER'],
    password=os.environ['QCARCHIVE_PASSWORD'],
    cache_dir=".",
)

## Get Dataset, Update Description

In [3]:
dataset_name = "TM Benchmark Optimization Dataset Step 1 v0.0"
tagline = "Diverse set of conformers for single metal complexes with Pd, Fe, Zn, Cu, Mg, Li and charge of {-1,0,+1}, with some organic molecules undergoing step 1, initial optimization for benchmarking purposes"
new_description = ("""
This dataset includes single metal complexes with: {'Pd', 'Fe', 'Zn', 'Mg', 'Cu', 'Li'}, and the non-metals:
 {'C', 'H', 'P', 'S', 'O', 'N', 'F', 'Cl', 'Br'}, with a complex charge of {-1,0,+1}. Additionally, there are some organic
 molecules for benchmarking purposes. These complexes are optimized using UKS BP86/def2-TZVP and also wB97M-D3BJ/def2-TZVPPD with 
 frozen core when computationally feasible as a first optimization step before they are optimized at a higher level of theory. The
 molecular weight min, mean, and max are 81, 445, and 1026, respectively. There are 81 unique molecules, each tmc is submitted with
 3 different multiplicities to assess the spin state.

Geometries were sources from the PDB CCD and multiple sources in the literature, the DOIs include: 10.1134/S0022476620090103, 
 10.1021/acs.inorgchem.7b03000,  10.1016/j.molstruc.2022.132506, 10.1107/S2053229619001396, 10.1021/om0492045, 10.1107/S0108270113021148,
 10.1016/j.inoche.2013.06.007, and 10.1016/j.ijbiomac. 2023.125847.
""")
#dataset = scaffold.from_json("scaffold_sos-mp2_hf.json.bz2", client)
dataset = client.get_dataset("optimization", "TM Benchmark Optimization Dataset Step 1 v0.0")
dataset.tagline = tagline
dataset.description = new_description
dataset.tags = ["openff"]
dataset.extras["short_description"] = tagline
dataset.extras["long_description"] = new_description
dataset.extras["submitter"] = "jaclark5"
dataset.extras["creation_date"] = str(datetime.date.today())


## Delete Old Specification

In [4]:
print(f"There are {len(dataset.specifications)} specifications")
dataset.specification_names

There are 2 specifications


['hf/sto-3g', 'wb97m-d3bj/def2-tzvppd FC']

## Add New Specification

In [5]:
spec = OptimizationSpecification(
    program='geometric',
    qc_specification=QCSpecification(
        program='psi4',
        driver=SinglepointDriver.deferred,
        method='BP86',
        basis='def2-TZVP',
        keywords={
            'maxiter': 500, 
            'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices', 
                               'lowdin_charges', 'mulliken_charges'],
            'function_kwargs': {'properties': ['dipole_polarizabilities']},
            'reference': 'uks',
            "print": 3,
        },
    ),
    keywords={
        'tmax': 0.3,
        'check': 0,
        'qccnv': False,
        'reset': True,
        'trust': 0.1,
        'molcnv': False,
        'enforce': 0.0,
        'epsilon': 1e-05,
        'maxiter': 500,
        'coordsys': 'dlc',
        'constraints': {},
        'convergence_set': 'GAU',
    }, # keywords for geometric
)
dataset.add_specification(name="BP86/def2-TZVP", specification=spec)

InsertMetadata(error_description=None, errors=[], inserted_idx=[0], existing_idx=[])

In [6]:
dataset.submit()
print(f"There are {dataset.record_count} records")
scaffold.to_json(dataset, compress=True, filename="scaffold_wb97m-d3bj_def2-tzvppd_fc_and_bp86_def2-tzvp.json")

There are 633 records
