# PDB-CCD TM

## Import Packages and Datasets

In [1]:
import os
import datetime

import qcportal
from qcportal.singlepoint import SinglepointDriver
from qcportal.singlepoint import QCSpecification
from qcportal.optimization import OptimizationSpecification

from qcportal.external import scaffold


In [2]:
ADDRESS = "https://api.qcarchive.molssi.org:443"
client = qcportal.PortalClient(
    ADDRESS, 
    username=os.environ['QCARCHIVE_USER'],
    password=os.environ['QCARCHIVE_PASSWORD'],
    cache_dir=".",
)

## Get Dataset, Update Description

In [4]:
dataset_name = "TM Benchmark Optimization Dataset Step 1 v0.0"
new_description = (
    "This dataset includes single metal complexes with: {'Pd', 'Fe', 'Zn', 'Mg', 'Cu', 'Li'}, and the non-metals: "
    "{'C', 'H', 'P', 'S', 'O', 'N', 'F', 'Cl', 'Br'}, with a complex charge of {-1,0,+1}. Additionally, there are some organic "
    "molecules for benchmarking purposes. These complexes are optimized using ROHF SOS-MP2 / def2-mSVP and  ROHF / STO-3G in suit "
    "with the literature. There are two ROHF SOS-MP2 / def2-mSVP specifications with and without frozen core. The molecular weight "
    "min, mean, and max are 81, 445, and 1026, respectively. There are 81 unique molecules, each tmc is submitted with 3 different "
    "multiplicities to assess the spin state."
)
#dataset = scaffold.from_json("scaffold_sos-mp2_hf.json.bz2", client)
dataset = client.get_dataset("optimization", "TM Benchmark Optimization Dataset Step 1 v0.0")
dataset.description = new_description
dataset.tags = ["openff"]
dataset.extras["long_description"] = new_description
dataset.extras["submitter"] = "jaclark5"
dataset.extras["creation_date"] = str(datetime.date.today())


## Delete Old Specification

In [5]:
dataset.delete_specification(f"sos-mp2/def2-svp", delete_records=True)
dataset.delete_specification(f"hf/sto-3g", delete_records=True)
print(f"There are {len(dataset.specifications)} specifications")

There are 0 specifications


## Add New Specification

In [6]:
spec = OptimizationSpecification(
    program='geometric',
    qc_specification=QCSpecification(
        program='psi4',
        driver=SinglepointDriver.deferred,
        method="sos-mp2",
        basis="def2-msvp",
        keywords={
            'maxiter': 500, 
            'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices', 
                               'lowdin_charges', 'mulliken_charges'],
            'function_kwargs': {'properties': ['dipole_polarizabilities']},
            'reference': 'rohf',
            'scf_type': "df",
            'mp2_type': "df",
            "opt_coordinates": "both",
            "print": 3,
        },
    ),
    keywords={
        'tmax': 0.3,
        'check': 0,
        'qccnv': False,
        'reset': True,
        'trust': 0.1,
        'molcnv': False,
        'enforce': 0.0,
        'epsilon': 1e-05,
        'maxiter': 500,
        'coordsys': 'dlc',
        'constraints': {},
        'convergence_set': 'GAU',
    }, # keywords for geometric
)
name = f"sos-mp2/def2-msvp"
dataset.add_specification(name=name, specification=spec)

InsertMetadata(error_description=None, errors=[], inserted_idx=[0], existing_idx=[])

In [7]:
spec = OptimizationSpecification(
    program='geometric',
    qc_specification=QCSpecification(
        program='psi4',
        driver=SinglepointDriver.deferred,
        method="sos-mp2",
        basis="def2-msvp",
        keywords={
            'maxiter': 500, 
            'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices', 
                               'lowdin_charges', 'mulliken_charges'],
            'function_kwargs': {'properties': ['dipole_polarizabilities']},
            'reference': 'rohf',
            'scf_type': "df",
            'mp2_type': "df",
            "opt_coordinates": "both",
            "print": 3,
            "freeze_core": True,
        },
    ),
    keywords={
        'tmax': 0.3,
        'check': 0,
        'qccnv': False,
        'reset': True,
        'trust': 0.1,
        'molcnv': False,
        'enforce': 0.0,
        'epsilon': 1e-05,
        'maxiter': 500,
        'coordsys': 'dlc',
        'constraints': {},
        'convergence_set': 'GAU',
    }, # keywords for geometric
)
name = f"sos-mp2/def2-msvp FC"
dataset.add_specification(name=name, specification=spec)

InsertMetadata(error_description=None, errors=[], inserted_idx=[0], existing_idx=[])

In [8]:
spec = OptimizationSpecification(
    program='geometric',
    qc_specification=QCSpecification(
        program='psi4',
        driver=SinglepointDriver.deferred,
        method="hf",
        basis="sto-3g",
        keywords={
            'maxiter': 500, 
            'scf_properties': ['dipole', 'quadrupole', 'wiberg_lowdin_indices', 'mayer_indices', 
                               'lowdin_charges', 'mulliken_charges'],
            'function_kwargs': {'properties': ['dipole_polarizabilities']},
            'reference': 'rohf',
            "opt_coordinates": "both",
        },
    ),
    keywords={
        'tmax': 0.3,
        'check': 0,
        'qccnv': False,
        'reset': True,
        'trust': 0.1,
        'molcnv': False,
        'enforce': 0.0,
        'epsilon': 1e-05,
        'maxiter': 500,
        'coordsys': 'dlc',
        'constraints': {},
        'convergence_set': 'GAU',
    }, # keywords for geometric
)
name = f"hf/sto-3g"
dataset.add_specification(name=name, specification=spec)

InsertMetadata(error_description=None, errors=[], inserted_idx=[0], existing_idx=[])

In [9]:
dataset.submit()
print(f"There are {dataset.record_count} records")
scaffold.to_json(dataset, compress=True, filename="scaffold_sos-mp2_hf_fc.json")

There are 633 records
