In [None]:
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml
from utils import (
    add_to_variables,
    format_dictionary_for_yaml,
    load_data,
    plot_zero_zero,
)

plt.style.use("default")
mpl.rcParams["font.size"] = 14
plot_kwargs = {"edgecolors": "black", "linewidths": 0.5}

Define variables and parameters

In [None]:
params = {}
variables = {}

Read data

In [None]:
path_dft = snakemake.input.data_dft
path_mmff = snakemake.input.data_mmff
path_gfnff = snakemake.input.data_gfnff
path_gfn2 = snakemake.input.data_gfn2
path_ani1ccx = snakemake.input.data_ani1ccx
path_ref = snakemake.input.data_ref
df_dft = load_data(path_dft, path_ref).dropna()
df_mmff = load_data(path_mmff, path_ref).dropna()
df_gfnff = load_data(path_gfnff, path_ref).dropna()
df_gfn2 = load_data(path_gfn2, path_ref).dropna()
df_ani1ccx = load_data(path_ani1ccx, path_ref).dropna()

df_merged_gfnff = pd.merge(
    df_dft,
    df_gfnff,
    left_index=True,
    right_index=True,
    suffixes=("_dft", "_gfnff"),
)
df_merged_mmff = pd.merge(
    df_dft,
    df_mmff,
    left_index=True,
    right_index=True,
    suffixes=("_dft", "_mmff"),
)
df_merged_gfn2 = pd.merge(
    df_dft,
    df_gfn2,
    left_index=True,
    right_index=True,
    suffixes=("_dft", "_gfn2"),
)
df_merged_ani1ccx = pd.merge(
    df_dft,
    df_ani1ccx,
    left_index=True,
    right_index=True,
    suffixes=("_dft", "_ani1ccx"),
)

Make zero-zero plots

In [None]:
jobs = (
    ("mmff", "MMFF", df_mmff),
    ("gfnff", "GFNFF", df_gfnff),
    ("gfn2", "GFN2-xTB", df_gfn2),
    ("ani1ccx", "ANI-1ccx", df_ani1ccx),
    ("dft", "DFT", df_dft),
)
fig, ax = plt.subplots(2, 3, figsize=plt.rcParams["figure.figsize"] * np.array([3, 2]))
ax = ax.flatten()
ax[5].set_axis_off()
all_results = {}
for i, (name, label, df) in enumerate(jobs):
    # Plot gap at CIS + DSP level
    _, _, results = plot_zero_zero(
        df,
        x_name="t1_s1_ref",
        y_name="t1_s1_dsp_cis",
        plot_type="scatter",
        **plot_kwargs,
        ax=ax[i],
        legend_loc="lower right",
    )
    ax[i].set_title(label)
    variables = add_to_variables(
        variables,
        results,
        label_calculation="opt",
        label_compound="gap",
        label_method=name,
    )
    all_results[label] = results

fig.tight_layout()
path_figure = snakemake.output.figure_gap
fig.savefig(path_figure)
variables[f"fig_opt_gap"] = "../" + str(Path(path_figure).with_suffix(""))

# Add parameters to be saved
path_table = snakemake.output.table
params[f"tab_opt"] = "../" + path_table

# Print tables for the paper
df = pd.DataFrame(all_results).T
floatfmt = [None] + [".2f"] * (len(results) - 4) + [".0f"] * 4
df.to_markdown(path_table, floatfmt=floatfmt)

Plot against DFT

In [None]:
jobs = (
    ("mmff", "MMFF", df_merged_mmff),
    ("gfnff", "GFN-FF", df_merged_gfnff),
    ("gfn2", "GFN2-xTB", df_merged_gfn2),
    ("ani1ccx", "ANI-1ccx", df_merged_ani1ccx),
)
fig, ax = plt.subplots(2, 2, figsize=plt.rcParams["figure.figsize"] * np.array([2, 2]))
ax = ax.flatten()
for i, (name, label, df) in enumerate(jobs):
    # Plot singlet
    _, _, results = plot_zero_zero(
        df,
        x_name="t1_s1_dsp_cis_dft",
        y_name=f"t1_s1_dsp_cis_{name}",
        plot_type="scatter",
        zero_zero=False,
        **plot_kwargs,
        fig=fig,
        ax=ax[i],
        legend_loc="lower right",
    )
    ax[i].set_title(label)
    variables = add_to_variables(
        variables,
        results,
        label_calculation="opt",
        label_compound="corr",
        label_method=name,
    )
fig.tight_layout()

path_figure = snakemake.output.figure_corr
fig.savefig(path_figure)
variables[f"fig_opt_corr"] = "../" + str(Path(path_figure).with_suffix(""))

Take out computational time for optimization

In [None]:
results = {
    "opt_time_mmff": df_mmff["opt_time"].mean() * 1000,
    "opt_time_gfnff": df_gfnff["opt_time"].mean() * 1000,
    "opt_time_gfn2": df_gfn2["opt_time"].mean() * 1000,
    "opt_time_ani1ccx": df_ani1ccx["opt_time"].mean() * 1000,
    "run_time_mmff": df_mmff["run_time"].mean() * 1000,
    "run_time_gfnff": df_gfnff["run_time"].mean() * 1000,
    "run_time_gfn2": df_gfn2["run_time"].mean() * 1000,
    "run_time_ani1ccx": df_ani1ccx["run_time"].mean() * 1000,
}
variables.update(results)

Save variables and parameters to file

In [None]:
path_variables = snakemake.output.variables
path_params = snakemake.output.params
with open(path_params, "w") as f:
    yaml.dump(format_dictionary_for_yaml(params, n_dec=2), f)
with open(path_variables, "w") as f:
    yaml.dump(format_dictionary_for_yaml(variables, n_dec=2), f)