In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
COMPARE_SETS = (
    "tmp_compas",
    "tmp_qmugs",
)

COMPARE_PAIRS = {
    "QMugs": {
        "single_point_energy": {
            "x": {"column": "E", "multiplier": 1},
            "y": {"column": "GFN2_TOTAL_ENERGY", "multiplier": 1},
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
        "total_enthalpy": {
            "x": {"column": "H", "multiplier": 1},
            "y": {"column": "GFN2_TOTAL_ENTHALPY", "multiplier": 1},
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
        "total_entropy": {
            "x": {"column": "S", "multiplier": 1},
            "y": {
                "column": "GFN2_ENTROPY_TOT",
                "multiplier": 1 / 627509.474,
            },
            "public_unit": "J/mol",
            "public_convert": 2625.4996394799 * 1000,
        },
        "total_gibbs": {
            "x": {"column": "G", "multiplier": 1},
            "y": {"column": "GFN2_TOTAL_FREE_ENERGY", "multiplier": 1},
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
        "homo": {
            "x": {"column": "homo", "multiplier": 1},
            "y": {
                "column": "GFN2_HOMO_ENERGY",
                "multiplier": 27.2113961317875,
            },
            "public_unit": "eV",
            "public_convert": 1,
        },
        "lumo": {
            "x": {"column": "lumo", "multiplier": 1},
            "y": {
                "column": "GFN2_LUMO_ENERGY",
                "multiplier": 27.2113961317875,
            },
            "public_unit": "eV",
            "public_convert": 1,
        },
        "homo_lumo_gap": {
            "x": {"column": "gap", "multiplier": 1},
            "y": {
                "column": "GFN2_HOMO_LUMO_GAP",
                "multiplier": 27.2113961317875,
            },
            "public_unit": "eV",
            "public_convert": 1,
        },
        "total_dipole": {
            "x": {"column": "Mu", "multiplier": 1},
            "y": {"column": "GFN2_DIPOLE_TOT", "multiplier": 1},
            "public_unit": "D",
            "public_convert": 1,
        },
        "polarizability": {
            "x": {"column": "alpha", "multiplier": 1},
            "y": {"column": "GFN2_POLARIZABILITY_MOLECULAR", "multiplier": 1},
            "public_unit": "a.u.",
            "public_convert": 1,
        },
    },
    "COMPAS-3X": {
        "single_point_energy": {
            "x": {"column": "E", "multiplier": 1},
            "y": {
                "column": "Etot_eV",
                "multiplier": 1 / 27.2113961317875,
            },
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
        "homo": {
            "x": {"column": "homo", "multiplier": 1},
            "y": {"column": "HOMO_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "lumo": {
            "x": {"column": "lumo", "multiplier": 1},
            "y": {"column": "LUMO_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "homo_lumo_gap": {
            "x": {"column": "gap", "multiplier": 1},
            "y": {"column": "GAP_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "total_dipole": {
            "x": {"column": "Mu", "multiplier": 1},
            "y": {"column": "Dipmom_Debye", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "zero_point_energy": {
            "x": {"column": "zpe", "multiplier": 1},
            "y": {
                "column": "ZPE_eV",
                "multiplier": 1 / 27.2113961317875,
            },
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
    },
    "COMPAS-1X": {
        "single_point_energy": {
            "x": {"column": "E", "multiplier": 1},
            "y": {
                "column": "Etot_eV",
                "multiplier": 1 / 27.2113961317875,
            },
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
        "homo": {
            "x": {"column": "homo", "multiplier": 1},
            "y": {"column": "HOMO_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "lumo": {
            "x": {"column": "lumo", "multiplier": 1},
            "y": {"column": "LUMO_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "homo_lumo_gap": {
            "x": {"column": "gap", "multiplier": 1},
            "y": {"column": "GAP_eV", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "total_dipole": {
            "x": {"column": "Mu", "multiplier": 1},
            "y": {"column": "Dipmom_Debye", "multiplier": 1},
            "public_unit": "eV",
            "public_convert": 1,
        },
        "zero_point_energy": {
            "x": {"column": "zpe", "multiplier": 1},
            "y": {"column": "ZPE_eV", "multiplier": 1 / 27.2113961317875},
            "public_unit": "kJ/mol",
            "public_convert": 2625.4996394799,
        },
    },
}

IGNORE_KEYS = {
    "COMPAS-1X": set(
        [
            "SOIXGBGHVSXNKH-UHFFFAOYSA-N",
            "RJXPMUHZQGSTIM-UHFFFAOYSA-N",
            "VEPXTABLWURNHE-UHFFFAOYSA-N",
        ]
    )
}

In [None]:
PUBLIC_NAMES = {
    "compas-1x.csv_matched.csv": "COMPAS-1X",
    "compas-3x.csv_matched.csv": "COMPAS-3X",
    "summary.csv_matched.csv": "QMugs",
}

for compare_set in COMPARE_SETS:
    print(f"Processing set {compare_set}")

    set_dir = Path(compare_set).absolute()

    for file in set_dir.glob("*.csv"):
        filename = file.name
        if filename not in PUBLIC_NAMES:
            continue
        filename_public = PUBLIC_NAMES[filename]
        print(f"Processing file {file.name}")
        data = pd.read_csv(file)

        if filename_public in IGNORE_KEYS:
            data = data[~data["key"].isin(IGNORE_KEYS[filename_public])]
        print("Max abs diffs:")
        for pair_name in COMPARE_PAIRS[filename_public]:

            pair = COMPARE_PAIRS[filename_public][pair_name]
            x = pair["x"]
            y = pair["y"]
            unit = pair["public_unit"]
            unit_conv = pair["public_convert"]
            pair_data = pd.DataFrame(
                {
                    "key": data["key"],
                    x["column"]: data[x["column"]] * x["multiplier"],
                    y["column"]: data[y["column"]] * y["multiplier"],
                }
            )
            pair_data["diff"] = np.abs(pair_data[x["column"]] - pair_data[y["column"]])
            max_abs_diff = pair_data["diff"].max() * unit_conv
            print(f" {pair_name}", f"{max_abs_diff:.2e}", unit)

Processing set tmp_compas
Processing file compas-1x.csv_matched.csv
Max abs diffs:
 single_point_energy 1.17e-01 kJ/mol
 homo 1.10e-03 eV
 lumo 1.20e-03 eV
 homo_lumo_gap 1.97e-03 eV
 total_dipole 4.00e-03 eV
 zero_point_energy 7.18e-02 kJ/mol
Processing file compas-3x.csv_matched.csv
Max abs diffs:
 single_point_energy 1.03e-01 kJ/mol
 homo 2.60e-03 eV
 lumo 5.40e-03 eV
 homo_lumo_gap 8.08e-03 eV
 total_dipole 4.00e-03 eV
 zero_point_energy 5.72e-02 kJ/mol
Processing set tmp_qmugs
Processing file summary.csv_matched.csv
Max abs diffs:
 single_point_energy 1.13e-01 kJ/mol
 total_enthalpy 1.09e-01 kJ/mol
 total_entropy 5.29e-01 J/mol
 total_gibbs 1.43e-01 kJ/mol
 homo 6.43e-04 eV
 lumo 2.30e-03 eV
 homo_lumo_gap 1.74e-03 eV
 total_dipole 2.30e-02 D
 polarizability 4.63e-04 a.u.
