In [None]:
from os import PathLike
from pathlib import Path

import numpy as np
import pandas as pd
from calculate import calculate_compound
from joblib import Parallel, delayed
from tqdm import tqdm

In [None]:
def process_compound(input_file: str | PathLike) -> pd.DataFrame:
    try:
        results = calculate_compound(str(input_file))
    except (ValueError, NotImplementedError, IndexError):
        results = {
            "s1_cis": [np.nan],
            "t1_cis": [np.nan],
            "exchange_integral": [np.nan],
            "dsp_scf": [np.nan],
            "dsp_cis": [np.nan],
            "homo_lumo_overlap": [np.nan],
            "oscillator_strength": [np.nan],
        }

    # Write output
    idx = input_file.stem
    data = {"id": [idx], **results}
    df = pd.DataFrame(data).set_index("id")
    return df

In [None]:
# Take out Snakemake dirs
input_path = Path(snakemake.input.path)
output_path = snakemake.output.combined
log_path = Path(snakemake.log.progress)
n_jobs = snakemake.threads

In [None]:
# Run the calculation in parallel with joblib
input_files = list(input_path.glob("*.xyz"))
with open(log_path, "w") as f:
    results = Parallel(n_jobs=n_jobs)(
        delayed(process_compound)(input_file)
        for input_file in tqdm(input_files, file=f, mininterval=5, maxinterval=5)
    )

In [None]:
# Concatenate the csv files
pd.concat(results).to_csv(output_path)