In [34]:
from pathlib import Path

import numpy as np
import pandas as pd
import yaml
from coulson.interface import mol_from_xyz, process_rdkit_mol
from coulson.parameters import EA_BH, IP_BH
from utils import format_dictionary_for_yaml

Loop over all the xyz files and take out the atom types

In [33]:
parent_path = Path(snakemake.input.path)
xyz_paths = [directory for directory in parent_path.glob("*") if directory.is_dir()]

atom_types_all = set()
for path in xyz_paths:
    for xyz_file in path.glob("*.xyz"):
        try:
            mol = mol_from_xyz(str(xyz_file))
            input_data, mask = process_rdkit_mol(mol)
        except (ValueError, NotImplementedError, IndexError):
            continue
        atom_types_all.update(input_data.atom_types)

Create dataframe

In [37]:
rows = []
for atom_type in atom_types_all:
    row = {
        "Atom type": atom_type,
        "IP (eV)": IP_BH[atom_type],
        "EA (eV)": EA_BH[atom_type],
    }
    rows.append(row)

In [57]:
df = pd.DataFrame(rows)
df.set_index("Atom type", inplace=True)
df.sort_values("Atom type", inplace=True)

Save Markdown table

In [64]:
floatfmt = [None] + [".2f"] * 2
path_table = snakemake.output.table
df.replace(np.nan, None).to_markdown(path_table, floatfmt=floatfmt, missingval="-")

<IPython.core.display.Markdown object>

Add parameters to be saved

In [None]:
params = {}
params[f"tab_parameters"] = "../" + path_table
with open(snakemake.output.params, "w") as f:
    yaml.dump(format_dictionary_for_yaml(params, n_dec=2), f)