In [135]:
import time
from pathlib import Path

import numpy as np
import yaml
from coulson.interface import get_pyscf_mf, mol_from_xyz, process_rdkit_mol
from coulson.ppp import (
    PPPCalculator,
    calculate_dsp,
    calculate_exchange,
    homo_lumo_overlap,
)
from joblib import Parallel, delayed, parallel_config
from pyscf.cc import CCSD
from utils import format_dictionary_for_yaml

In [113]:
def run_compound(xyz):
    # Process filename with RDKit mol detection from XYZ
    mol = mol_from_xyz(xyz)
    input_data, mask = process_rdkit_mol(mol)
    if not any(mask) is True:
        raise ValueError("Molecule does not have pi system.")

    # Perform SCF calculation
    time_pre_scf = time.perf_counter()
    ppp = PPPCalculator(input_data)
    ppp.scf(max_iter=500)
    time_post_scf = time.perf_counter()
    time_scf = time_post_scf - time_pre_scf

    # Calcualte exchange integral
    time_pre_exchange = time.perf_counter()
    exchange = calculate_exchange(ppp)
    time_post_exchange = time.perf_counter()
    time_exchange = time_post_exchange - time_pre_exchange

    # Calculate HOMO-LUMO overlap
    time_pre_overlap = time.perf_counter()
    overlap = homo_lumo_overlap(ppp)
    time_post_overlap = time.perf_counter()
    time_overlap = time_post_overlap - time_pre_overlap

    # Calculate S1 energies and oscillator strengths
    time_pre_cis_s1 = time.perf_counter()
    ppp.ci(n_states=3)
    time_post_cis_s1 = time.perf_counter()
    time_cis_s1 = time_post_cis_s1 - time_pre_cis_s1
    energy_s1_cis = ppp.ci_energies[0] - ppp.electronic_energy
    oscillator_strength = ppp.oscillator_strengths[0]

    # Calculate T1 energies
    time_pre_cis_t1 = time.perf_counter()
    ppp.ci(n_states=3, multiplicity="triplet")
    time_post_cis_t1 = time.perf_counter()
    time_cis_t1 = time_post_cis_t1 - time_pre_cis_t1
    energy_t1_cis = ppp.ci_energies[0] - ppp.electronic_energy

    # Calculate DSP contribution
    time_pre_dsp_scf = time.perf_counter()
    _, _ = calculate_dsp(ppp)
    time_post_dsp_scf = time.perf_counter()
    time_dsp_scf = time_post_dsp_scf - time_pre_dsp_scf

    time_pre_dsp_cis = time.perf_counter()
    _, _ = calculate_dsp(
        ppp, ci=True, energy_s_1=energy_s1_cis, energy_t_1=energy_t1_cis
    )
    time_post_dsp_cis = time.perf_counter()
    time_dsp_cis = time_post_dsp_cis - time_pre_dsp_cis

    time_pre_scf_pyscf = time.perf_counter()
    ppp = PPPCalculator(input_data)
    mf = get_pyscf_mf(ppp)
    mf.verbose = 0
    mf.kernel()
    time_post_scf_pyscf = time.perf_counter()
    time_scf_pyscf = time_post_scf_pyscf - time_pre_scf_pyscf

    time_pre_ccsd_pyscf = time.perf_counter()
    ccsd = CCSD(mf)
    ccsd.verbose = 0
    ccsd.kernel()
    time_post_ccsd_pyscf = time.perf_counter()
    time_ccsd_pyscf = time_post_ccsd_pyscf - time_pre_ccsd_pyscf

    time_pre_eom_s1_pyscf = time.perf_counter()
    _ = ccsd.eomee_ccsd_singlet(nroots=1)[0]
    time_post_eom_s1_pyscf = time.perf_counter()
    time_eom_s1_pyscf = time_post_eom_s1_pyscf - time_pre_eom_s1_pyscf
    time_pre_eom_t1_pyscf = time.perf_counter()
    _ = ccsd.eomee_ccsd_triplet(nroots=1)[0]
    time_post_eom_t1_pyscf = time.perf_counter()
    time_eom_t1_pyscf = time_post_eom_t1_pyscf - time_pre_eom_t1_pyscf

    results = {
        "scf": time_scf,
        "cis_s1": time_cis_s1,
        "cis_t1": time_cis_t1,
        "exchange": time_exchange,
        "overlap": time_overlap,
        "dsp_scf": time_dsp_scf,
        "dsp_cis": time_dsp_cis,
        "scf_pyscf": time_scf_pyscf,
        "ccsd_pyscf": time_ccsd_pyscf,
        "eom_s1_pyscf": time_eom_s1_pyscf,
        "eom_t1_pyscf": time_eom_t1_pyscf,
    }

    results["tot_scf"] = results["scf"] + results["dsp_scf"]
    results["part_scf"] = results["dsp_scf"]
    results["tot_cis"] = (
        results["scf"] + results["cis_s1"] + results["cis_t1"] + results["dsp_cis"]
    )
    results["part_cis"] = results["cis_s1"] + results["cis_t1"] + results["dsp_cis"]
    results["tot_eom_ccsd_coulson_scf"] = (
        results["scf"]
        + results["ccsd_pyscf"]
        + results["eom_s1_pyscf"]
        + results["eom_t1_pyscf"]
    )
    results["tot_eom"] = (
        results["scf_pyscf"]
        + results["ccsd_pyscf"]
        + results["eom_s1_pyscf"]
        + results["eom_t1_pyscf"]
    )
    results["part_eom"] = results["eom_s1_pyscf"] + results["eom_t1_pyscf"]

    return results


def calc_stats(results):
    mean = np.mean(results)
    std = np.std(results)
    return mean, std

Load parent azaphenalene

In [114]:
xyz_dir = snakemake.input.xyz_dir
xyz = Path(xyz_dir) / "1.xyz"

Run 100 times

In [116]:
with parallel_config(backend="loky", inner_max_num_threads=1):
    results = Parallel(n_jobs=snakemake.threads)(
        delayed(run_compound)(str(xyz)) for _ in range(100)
    )

Take out results

In [137]:
mean_scf_only, std_scf_only = calc_stats([results["scf"] for results in results])

mean_scf, std_scf = calc_stats([results["tot_scf"] for results in results])
mean_cis, std_cis = calc_stats([results["tot_cis"] for results in results])
mean_eom_ccsd, std_eom_ccsd = calc_stats([results["tot_eom"] for results in results])

mean_part_scf, std_part_scf = calc_stats([results["part_scf"] for results in results])
mean_part_cis, std_part_cis = calc_stats([results["part_cis"] for results in results])
mean_part_eom, std_part_eom = calc_stats([results["part_eom"] for results in results])

ratio_part_scf = mean_part_scf / mean_part_scf
ratio_part_cis = mean_part_cis / mean_part_scf
ratio_part_eom = mean_part_eom / mean_part_scf

mean_overlap, std_overlap = calc_stats([results["overlap"] for results in results])
mean_exchange, std_exchange = calc_stats([results["exchange"] for results in results])

Save results to file

In [144]:
variables = {}
variables["timing_mean_scf_only"] = mean_scf_only * 1e3
variables["timing_std_scf_only"] = std_scf_only * 1e3
variables["timing_mean_scf"] = mean_scf * 1e3
variables["timing_std_scf"] = std_scf * 1e3
variables["timing_mean_cis"] = mean_cis * 1e3
variables["timing_std_cis"] = std_cis * 1e3
variables["timing_mean_eom_ccsd"] = mean_eom_ccsd * 1e3
variables["timing_std_eom_ccsd"] = std_eom_ccsd * 1e3
variables["timing_mean_part_scf"] = mean_part_scf * 1e3
variables["timing_std_part_scf"] = std_part_scf * 1e3
variables["timing_mean_part_cis"] = mean_part_cis * 1e3
variables["timing_std_part_cis"] = std_part_cis * 1e3
variables["timing_mean_part_eom"] = mean_part_eom * 1e3
variables["timing_std_part_eom"] = std_part_eom * 1e3
variables["timing_ratio_part_scf"] = ratio_part_scf
variables["timing_ratio_part_cis"] = ratio_part_cis
variables["timing_ratio_part_eom"] = ratio_part_eom
variables["timing_mean_overlap"] = mean_overlap * 1e6
variables["timing_std_overlap"] = std_overlap * 1e6
variables["timing_mean_exchange"] = mean_exchange * 1e6
variables["timing_std_exchange"] = std_exchange * 1e6

In [145]:
path_variables = snakemake.output.variables
with open(path_variables, "w") as f:
    yaml.dump(format_dictionary_for_yaml(variables, n_dec=2), f)