From c146775d61aa9f1e1badc73997654c22544e7b92 Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 22 Jun 2024 09:19:02 +0200 Subject: [PATCH 1/5] Replace lit.error by raise LoadError --- CONTRIBUTING.rst | 14 ++++- iodata/api.py | 8 +-- iodata/formats/charmm.py | 10 ++-- iodata/formats/cp2klog.py | 8 +-- iodata/formats/fchk.py | 20 +++---- iodata/formats/fcidump.py | 8 ++- iodata/formats/gamess.py | 8 ++- iodata/formats/gaussianinput.py | 4 +- iodata/formats/gromacs.py | 39 ++++++------- iodata/formats/json.py | 35 +++++------ iodata/formats/mol2.py | 2 +- iodata/formats/molden.py | 29 +++++---- iodata/formats/molekel.py | 21 ++++--- iodata/formats/mwfn.py | 40 ++++++++----- iodata/formats/pdb.py | 2 +- iodata/formats/sdf.py | 8 +-- iodata/formats/wfn.py | 20 ++++--- iodata/formats/wfx.py | 24 ++++---- iodata/test/test_gaussianinput.py | 4 +- iodata/test/test_wfx.py | 10 +--- iodata/utils.py | 97 ++++++++++++++++++------------- 21 files changed, 231 insertions(+), 180 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e70be6d1..b06cf546 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -153,8 +153,18 @@ If necessary, you can *push back* the line for later reading with ``lit.back(lin lit.back(line) break -When you encounter a file format error while reading the file, call ``lit.error(msg)``, -where ``msg`` is a short message describing the problem. +When you encounter a file format error while reading the file, raise a ``LoadError`` exception: + +.. code-block:: python + + from ..utils import LoadError + + @document_load_one(...) + def load_one(lit: LineIterator) -> dict: + ... + if something_wrong: + raise LoadError("Describe problem in a sentence.", lit) + The error that appears in the terminal will automatically include the file name and line number. diff --git a/iodata/api.py b/iodata/api.py index 26f3c22d..eaa2d855 100644 --- a/iodata/api.py +++ b/iodata/api.py @@ -154,10 +154,10 @@ def load_one(filename: str, fmt: Optional[str] = None, **kwargs) -> IOData: iodata = IOData(**format_module.load_one(lit, **kwargs)) except LoadError: raise - except StopIteration: - lit.error("File ended before all data was read.") + except StopIteration as exc: + raise LoadError("File ended before all data was read.", lit) from exc except Exception as exc: - raise LoadError(f"{filename}: Uncaught exception while loading file.") from exc + raise LoadError("Uncaught exception while loading file.", lit) from exc return iodata @@ -194,7 +194,7 @@ def load_many(filename: str, fmt: Optional[str] = None, **kwargs) -> Iterator[IO except LoadError: raise except Exception as exc: - raise LoadError(f"{filename}: Uncaught exception while loading file.") from exc + raise LoadError("Uncaught exception while loading file.", lit) from exc def _check_required(iodata: IOData, dump_func: Callable): diff --git a/iodata/formats/charmm.py b/iodata/formats/charmm.py index ae9f8149..7e51766f 100644 --- a/iodata/formats/charmm.py +++ b/iodata/formats/charmm.py @@ -32,7 +32,7 @@ import numpy as np from ..docstrings import document_load_one -from ..utils import LineIterator, amu, angstrom +from ..utils import LineIterator, LoadError, amu, angstrom __all__ = [] @@ -48,8 +48,10 @@ def load_one(lit: LineIterator) -> dict: while True: try: line = next(lit) - except StopIteration: - lit.error("Title section of CRD has no ending marker (missing bare *).") + except StopIteration as exc: + raise LoadError( + "Title section of CRD has no ending marker (missing bare *).", lit + ) from exc # Get title from crd file. if line.startswith("*"): text = line[1:] @@ -84,7 +86,7 @@ def _helper_read_crd(lit: LineIterator) -> tuple: # Read the line for number of atoms. natom = next(lit) if natom is None or not natom.strip().isdigit(): - lit.error("The number of atoms must be an integer.") + raise LoadError("The number of atoms must be an integer.", lit) natom = int(natom) # Read the atom lines resnums = [] diff --git a/iodata/formats/cp2klog.py b/iodata/formats/cp2klog.py index 38a33fc1..bcd8492c 100644 --- a/iodata/formats/cp2klog.py +++ b/iodata/formats/cp2klog.py @@ -27,7 +27,7 @@ from ..docstrings import document_load_one from ..orbitals import MolecularOrbitals from ..overlap import factorial2 -from ..utils import LineIterator +from ..utils import LineIterator, LoadError __all__ = [] @@ -185,7 +185,7 @@ def _read_cp2k_obasis(lit: LineIterator) -> dict: " ********************* Uncontracted Gaussian Type Orbitals *********************\n" ): return _read_cp2k_uncontracted_obasis(lit) - lit.error("Could not find basis set in CP2K ATOM output.") + raise LoadError("Could not find basis set in CP2K ATOM output.", lit) return None @@ -436,13 +436,13 @@ def load_one(lit: LineIterator) -> dict: " Atomic orbital expansion coefficients [Alpha]\n", " Atomic orbital expansion coefficients []\n", ]: - lit.error("Could not find orbital coefficients in CP2K ATOM output.") + raise LoadError("Could not find orbital coefficients in CP2K ATOM output.", lit) coeffs_alpha = _read_cp2k_orbital_coeffs(lit, oe_alpha) if not restricted: line = next(lit) if line != " Atomic orbital expansion coefficients [Beta]\n": - lit.error("Could not find beta orbital coefficient in CP2K ATOM output.") + raise LoadError("Could not find beta orbital coefficient in CP2K ATOM output.", lit) coeffs_beta = _read_cp2k_orbital_coeffs(lit, oe_beta) # Turn orbital data into a MolecularOrbitals object. diff --git a/iodata/formats/fchk.py b/iodata/formats/fchk.py index 63c70338..eef58110 100644 --- a/iodata/formats/fchk.py +++ b/iodata/formats/fchk.py @@ -29,7 +29,7 @@ from ..docstrings import document_dump_one, document_load_many, document_load_one from ..iodata import IOData from ..orbitals import MolecularOrbitals -from ..utils import DumpError, LineIterator, PrepareDumpError, amu +from ..utils import DumpError, LineIterator, LoadError, PrepareDumpError, amu __all__ = [] @@ -219,9 +219,9 @@ def load_one(lit: LineIterator) -> dict: nalpha = fchk["Number of alpha electrons"] nbeta = fchk["Number of beta electrons"] if nalpha < 0 or nbeta < 0 or nalpha + nbeta <= 0: - lit.error("The number of electrons is not positive.") + raise LoadError("The number of electrons is not positive.", lit) if nalpha < nbeta: - lit.error(f"n_alpha={nalpha} < n_beta={nbeta} is not valid!") + raise LoadError(f"n_alpha={nalpha} < n_beta={nbeta} is invalid.", lit) norba = fchk["Alpha Orbital Energies"].shape[0] mo_coeffs = np.copy(fchk["Alpha MO coefficients"].reshape(norba, nbasis).T) @@ -323,7 +323,7 @@ def load_many(lit: LineIterator) -> Iterator[dict]: prefix = "Opt point" nsteps = fchk["Optimization Number of geometries"] else: - lit.error("Could not find IRC or Optimization trajectory in FCHK file.") + raise LoadError("Cannot find IRC or Optimization trajectory in FCHK file.", lit) natom = fchk["Atomic numbers"].size for ipoint, nstep in enumerate(nsteps): @@ -382,7 +382,7 @@ def _load_fchk_low(lit: LineIterator, label_patterns: Optional[list[str]] = None elif len(words) == 2: result["command"], result["lot"] = words else: - lit.error("The second line of the FCHK file should contain two or three words.") + raise LoadError("The second line of the FCHK file should contain two or three words.", lit) while True: try: @@ -434,11 +434,11 @@ def _load_fchk_field(lit: LineIterator, label_patterns: list[str]) -> tuple[str, if len(words) == 2: try: return label, datatype(words[1]) - except ValueError: - lit.error(f"Could not interpret: {words[1]}") + except ValueError as exc: + raise LoadError(f"Could not interpret as {datatype}: {words[1]}", lit) from exc elif len(words) == 3: if words[1] != "N=": - lit.error("Expected N= not found.") + raise LoadError("Expected N= not found.", lit) length = int(words[2]) value = np.zeros(length, datatype) counter = 0 @@ -449,8 +449,8 @@ def _load_fchk_field(lit: LineIterator, label_patterns: list[str]) -> tuple[str, word = words.pop(0) try: value[counter] = datatype(word) - except (ValueError, OverflowError): - lit.error(f"Could not interpret: {word}") + except (ValueError, OverflowError) as exc: + raise LoadError(f"Could not interpret as {datatype}: {word}", lit) from exc counter += 1 return label, value diff --git a/iodata/formats/fcidump.py b/iodata/formats/fcidump.py index 9c17c386..ed1e0fc1 100644 --- a/iodata/formats/fcidump.py +++ b/iodata/formats/fcidump.py @@ -34,7 +34,7 @@ from ..docstrings import document_dump_one, document_load_one from ..iodata import IOData -from ..utils import LineIterator, set_four_index_element +from ..utils import LineIterator, LoadError, set_four_index_element __all__ = [] @@ -50,7 +50,7 @@ def load_one(lit: LineIterator) -> dict: # check header line = next(lit) if not line.startswith(" &FCI NORB="): - lit.error("Incorrect file header") + raise LoadError(f"Incorrect file header: {line.strip()}", lit) # read info from header words = line[5:].split(",") @@ -77,7 +77,9 @@ def load_one(lit: LineIterator) -> dict: for line in lit: words = line.split() if len(words) != 5: - lit.error("Expecting 5 fields on each data line in FCIDUMP") + raise LoadError( + f"Expecting 5 fields on each data line in FCIDUMP, got {len(words)}.", lit + ) value = float(words[0]) if words[3] != "0": ii = int(words[1]) - 1 diff --git a/iodata/formats/gamess.py b/iodata/formats/gamess.py index 820da5e7..30de23eb 100644 --- a/iodata/formats/gamess.py +++ b/iodata/formats/gamess.py @@ -22,7 +22,7 @@ from numpy.typing import NDArray from ..docstrings import document_load_one -from ..utils import LineIterator, angstrom +from ..utils import LineIterator, LoadError, angstrom __all__ = [] @@ -37,7 +37,7 @@ def _read_data(lit: LineIterator) -> tuple[str, str, list[str]]: # The dat file only contains symmetry-unique atoms, so we would be incapable of # supporting non-C1 symmetry without significant additional coding. if symmetry != "C1": - lit.error(f"Only C1 symmetry is supported. Got {symmetry}") + raise LoadError(f"Only C1 symmetry is supported, got {symmetry}.", lit) symbols = [] line = True while line != " $END \n": @@ -86,7 +86,9 @@ def _read_hessian(lit: LineIterator, result: dict[str]) -> NDArray[float]: """Extract ``hessian`` from the punch file.""" # check that $HESS is not already parsed if "athessian" in result: - lit.error("Cannot parse $HESS twice! Make sure approximate hessian is not being parsed!") + raise LoadError( + "Cannot parse $HESS twice. Make sure approximate hessian is not being parsed." + ) next(lit) natom = len(result["symbols"]) hessian = np.zeros((3 * natom, 3 * natom), float) diff --git a/iodata/formats/gaussianinput.py b/iodata/formats/gaussianinput.py index e2200e6a..2c8f29b8 100644 --- a/iodata/formats/gaussianinput.py +++ b/iodata/formats/gaussianinput.py @@ -22,7 +22,7 @@ from ..docstrings import document_load_one from ..periodic import sym2num -from ..utils import LineIterator, angstrom +from ..utils import LineIterator, LoadError, angstrom __all__ = [] @@ -68,7 +68,7 @@ def load_one(lit: LineIterator): if not contents: break if len(contents) != 4: - lit.error("No Cartesian Structure is detected") + raise LoadError("No Cartesian Structure is detected.", lit) numbers.append(sym2num[contents[0]]) coor = list(map(float, contents[1:])) coordinates.append(coor) diff --git a/iodata/formats/gromacs.py b/iodata/formats/gromacs.py index d7080bf4..9604d5ef 100644 --- a/iodata/formats/gromacs.py +++ b/iodata/formats/gromacs.py @@ -41,27 +41,24 @@ @document_load_one("GRO", ["atcoords", "atffparams", "cellvecs", "extra", "title"]) def load_one(lit: LineIterator) -> dict: """Do not edit this docstring. It will be overwritten.""" - while True: - data = _helper_read_frame(lit) - title = data[0] - time = data[1] - resnums = np.array(data[2]) - resnames = np.array(data[3]) - attypes = np.array(data[4]) - atcoords = data[5] - velocities = data[6] - cellvecs = data[7] - atffparams = {"attypes": attypes, "resnames": resnames, "resnums": resnums} - extra = {"time": time, "velocities": velocities} - return { - "atcoords": atcoords, - "atffparams": atffparams, - "cellvecs": cellvecs, - "extra": extra, - "title": title, - } - lit.error("Gromacs gro file could not be read.") - return None + data = _helper_read_frame(lit) + title = data[0] + time = data[1] + resnums = np.array(data[2]) + resnames = np.array(data[3]) + attypes = np.array(data[4]) + atcoords = data[5] + velocities = data[6] + cellvecs = data[7] + atffparams = {"attypes": attypes, "resnames": resnames, "resnums": resnums} + extra = {"time": time, "velocities": velocities} + return { + "atcoords": atcoords, + "atffparams": atffparams, + "cellvecs": cellvecs, + "extra": extra, + "title": title, + } @document_load_many("GRO", ["atcoords", "atffparams", "cellvecs", "extra", "title"]) diff --git a/iodata/formats/json.py b/iodata/formats/json.py index c3b03447..eca7e644 100644 --- a/iodata/formats/json.py +++ b/iodata/formats/json.py @@ -651,7 +651,7 @@ def _parse_json(json_in: dict, lit: LineIterator) -> dict: # Check if BSE file, which is too different elif "molssi_bse_schema" in result: raise LoadError( - f"{lit.filename}: IOData does not currently support MolSSI BSE Basis JSON." + "IOData does not currently support MolSSI BSE Basis JSON.", lit.filename ) # Center_data is required in any basis schema elif "center_data" in result: @@ -659,7 +659,7 @@ def _parse_json(json_in: dict, lit: LineIterator) -> dict: elif "driver" in result: schema_name = "qcschema_output" if "return_result" in result else "qcschema_input" else: - raise LoadError(f"{lit.filename}: Could not determine `schema_name`.") + raise LoadError("Could not determine `schema_name`.", lit.filename) if "schema_version" not in result: warn( f"{lit.filename}: QCSchema files should have a `schema_version` key." @@ -677,8 +677,9 @@ def _parse_json(json_in: dict, lit: LineIterator) -> dict: if schema_name == "qcschema_output": return _load_qcschema_output(result, lit) raise LoadError( - f"{lit.filename}: Invalid QCSchema type {result['schema_name']}, should be one of " - "`qcschema_molecule`, `qcschema_basis`, `qcschema_input`, or `qcschema_output`." + f"Invalid QCSchema type {result['schema_name']}, should be one of " + "`qcschema_molecule`, `qcschema_basis`, `qcschema_input`, or `qcschema_output`.", + lit.filename, ) @@ -759,7 +760,7 @@ def _parse_topology_keys(mol: dict, lit: LineIterator) -> dict: ) for key in topology_keys: if key not in mol: - raise LoadError(f"{lit.filename}: QCSchema topology requires '{key}' key") + raise LoadError(f"QCSchema topology requires '{key}' key", lit.filename) topology_dict = {} extra_dict = {} @@ -1038,7 +1039,7 @@ def _load_qcschema_input(result: dict, lit: LineIterator) -> dict: extra_dict["input"] = input_dict["extra"] if "molecule" not in result: - raise LoadError(f"{lit.filename}: QCSchema Input requires 'molecule' key") + raise LoadError("QCSchema Input requires 'molecule' key", lit.filename) molecule_dict = _parse_topology_keys(result["molecule"], lit) input_dict.update(molecule_dict) extra_dict["molecule"] = molecule_dict["extra"] @@ -1078,7 +1079,7 @@ def _parse_input_keys(result: dict, lit: LineIterator) -> dict: ) for key in input_keys: if key not in result: - raise LoadError(f"{lit.filename}: QCSchema `qcschema_input` file requires '{key}' key") + raise LoadError(f"QCSchema `qcschema_input` file requires '{key}' key", lit.filename) # Store all extra keys in extra_dict and gather at end input_dict = {} extra_dict = {} @@ -1173,8 +1174,8 @@ def _parse_driver(driver: str, lit: LineIterator) -> str: """ if driver not in ["energy", "gradient", "hessian", "properties"]: raise LoadError( - f"{lit.filename}: QCSchema driver must be one of `energy`, `gradient`, `hessian`, " - "or `properties`" + "QCSchema driver must be one of `energy`, `gradient`, `hessian`, or `properties`", + lit.filename, ) return driver @@ -1200,7 +1201,7 @@ def _parse_model(model: dict, lit: LineIterator) -> dict: extra_dict = {} if "method" not in model: - raise LoadError(f"{lit.filename}: QCSchema `model` requires a `method`") + raise LoadError("QCSchema `model` requires a `method`", lit.filename) model_dict["lot"] = model["method"] # QCEngineRecords doesn't give an empty string for basis-free methods, omits req'd key instead if "basis" not in model: @@ -1264,10 +1265,10 @@ def _parse_protocols(protocols: dict, lit: LineIterator) -> dict: keep_stdout = protocols["stdout"] protocols_dict = {} if wavefunction not in {"all", "orbitals_and_eigenvalues", "return_results", "none"}: - raise LoadError(f"{lit.filename}: Invalid `protocols` `wavefunction` keyword.") + raise LoadError("Invalid `protocols` `wavefunction` keyword.", lit.filename) protocols_dict["keep_wavefunction"] = wavefunction if not isinstance(keep_stdout, bool): - raise LoadError("{}: `protocols` `stdout` option must be a boolean.") + raise LoadError("`protocols` `stdout` option must be a boolean.", lit.filename) protocols_dict["keep_stdout"] = keep_stdout return protocols_dict @@ -1296,7 +1297,7 @@ def _load_qcschema_output(result: dict, lit: LineIterator) -> dict: extra_dict["output"] = output_dict["extra"] if "molecule" not in result: - raise LoadError(f"{lit.filename}: QCSchema Input requires 'molecule' key") + raise LoadError("QCSchema Input requires 'molecule' key", lit.filename) molecule_dict = _parse_topology_keys(result["molecule"], lit) output_dict.update(molecule_dict) extra_dict["molecule"] = molecule_dict["extra"] @@ -1338,7 +1339,7 @@ def _parse_output_keys(result: dict, lit: LineIterator) -> dict: ) for key in output_keys: if key not in result: - raise LoadError(f"{lit.filename}: QCSchema `qcschema_output` file requires '{key}' key") + raise LoadError(f"QCSchema `qcschema_output` file requires '{key}' key", lit.filenam) # Store all extra keys in extra_dict and gather at end output_dict = {} @@ -1413,7 +1414,7 @@ def _parse_provenance( """ if isinstance(provenance, dict): if "creator" not in provenance: - raise LoadError(f"{lit.filename}: `{source}` provenance requires `creator` key") + raise LoadError(f"`{source}` provenance requires `creator` key", lit.filename) if append: base_provenance = [provenance] else: @@ -1421,10 +1422,10 @@ def _parse_provenance( elif isinstance(provenance, list): for prov in provenance: if "creator" not in prov: - raise LoadError("{}: `{}` provenance requires `creator` key") + raise LoadError(f"`{source}` provenance requires `creator` key", lit.filename) base_provenance = provenance else: - raise LoadError(f"{lit.filename}: Invalid `{source}` provenance type") + raise LoadError(f"Invalid `{source}` provenance type", lit.filename) if append: base_provenance.append( {"creator": "IOData", "version": __version__, "routine": "iodata.formats.json.load_one"} diff --git a/iodata/formats/mol2.py b/iodata/formats/mol2.py index 814e7a85..defd4d70 100644 --- a/iodata/formats/mol2.py +++ b/iodata/formats/mol2.py @@ -80,7 +80,7 @@ def load_one(lit: LineIterator) -> dict: bonds = _load_helper_bonds(lit, nbonds) result["bonds"] = bonds if not molecule_found: - lit.error("Molecule could not be read") + raise LoadError("Molecule could not be read.", lit) return result diff --git a/iodata/formats/molden.py b/iodata/formats/molden.py index 2cfc48bd..e3cc02ab 100644 --- a/iodata/formats/molden.py +++ b/iodata/formats/molden.py @@ -45,7 +45,7 @@ from ..orbitals import MolecularOrbitals from ..overlap import compute_overlap, gob_cart_normalization from ..periodic import num2sym, sym2num -from ..utils import DumpError, LineIterator, PrepareDumpError, angstrom +from ..utils import DumpError, LineIterator, LoadError, PrepareDumpError, angstrom __all__ = [] @@ -141,7 +141,7 @@ def _load_low(lit: LineIterator) -> dict: line = next(lit) if line.strip() != "[Molden Format]": - lit.error("Molden header not found") + raise LoadError("Molden header not found.", lit) # The order of sections, denoted by "[...]", is not fixed in the Molden # format, so we need a loop that checks for all possible sections at # each iteration. If needed, the contents of the section is read. @@ -181,7 +181,7 @@ def _load_low(lit: LineIterator) -> dict: elif line == "[gto]": obasis = _load_helper_obasis(lit) elif line == "[sto]": - lit.error("Slater-type orbitals are not supported by IODATA.") + raise LoadError("Slater-type orbitals are not supported by IODATA.", lit) # molecular-orbital coefficients. elif line == "[mo]": data_alpha, data_beta = _load_helper_coeffs(lit) @@ -197,13 +197,17 @@ def _load_low(lit: LineIterator) -> dict: if coeffsb is None: if coeffsa.shape[0] != obasis.nbasis: - lit.error("Number of alpha orbital coefficients does not match the size of the basis.") + raise LoadError( + "Number of alpha orbital coefficients does not match the size of the basis.", lit + ) mo = MolecularOrbitals( "restricted", coeffsa.shape[1], coeffsa.shape[1], occsa, coeffsa, energiesa, irrepsa ) else: if coeffsb.shape[0] != obasis.nbasis: - lit.error("Number of beta orbital coefficients does not match the size of the basis.") + raise LoadError( + "Number of beta orbital coefficients does not match the size of the basis.", lit + ) mo = MolecularOrbitals( "unrestricted", coeffsa.shape[1], @@ -663,7 +667,7 @@ def _fix_molden_from_buggy_codes(result: dict, lit: LineIterator, norm_threshold coeffsa = result["mo"].coeffsa coeffsb = result["mo"].coeffsb else: - lit.error(f"Molecular orbital kind={result['mo'].kind} not recognized") + raise LoadError(f"Molecular orbital kind={result['mo'].kind} not recognized.", lit) if _is_normalized_properly(obasis, atcoords, coeffsa, coeffsb, norm_threshold): # The file is good. No need to change obasis. @@ -733,12 +737,13 @@ def _fix_molden_from_buggy_codes(result: dict, lit: LineIterator, norm_threshold result["mo"].coeffsb[:] = coeffsb_psi4 return - lit.error( - "Could not correct the data read from {}. The molden or mkl file " - "you are trying to load contains errors. Please make an issue " - "here: https://github.com/theochem/iodata/issues, and attach " - "this file. Please provide one or more small files causing this " - "error. Thanks!" + raise LoadError( + "The molden or mkl file you are trying to load contains errors. " + "Please make an issue here: https://github.com/theochem/iodata/issues, " + "and attach this file and explain which program you used to create it. " + "Please provide one or more small files causing this error. " + "Thanks!", + lit, ) diff --git a/iodata/formats/molekel.py b/iodata/formats/molekel.py index 5ddce78a..b1552e43 100644 --- a/iodata/formats/molekel.py +++ b/iodata/formats/molekel.py @@ -32,7 +32,7 @@ from ..docstrings import document_dump_one, document_load_one from ..iodata import IOData from ..orbitals import MolecularOrbitals -from ..utils import DumpError, LineIterator, PrepareDumpError, angstrom +from ..utils import DumpError, LineIterator, LoadError, PrepareDumpError, angstrom from .molden import CONVENTIONS, _fix_molden_from_buggy_codes __all__ = [] @@ -92,7 +92,9 @@ def _load_helper_obasis(lit: LineIterator) -> MolecularBasis: elif nbasis_shell == len(CONVENTIONS[(angmom, "p")]): kind = "p" else: - lit.error(f"Cannot interpret angmom={angmom} with nbasis_shell={nbasis_shell}") + raise LoadError( + f"Cannot interpret angmom={angmom} with nbasis_shell={nbasis_shell}.", lit + ) exponents = [] coeffs = [] for line in lit: @@ -207,15 +209,15 @@ def load_one(lit: LineIterator, norm_threshold: float = 1e-4) -> dict: occsb = _load_helper_occ(lit) if charge is None: - lit.error("Charge and spin polarization not found.") + raise LoadError("Charge and spin polarization not found.", lit) if atcoords is None: - lit.error("Coordinates not found.") + raise LoadError("Coordinates not found.", lit) if obasis is None: - lit.error("Orbital basis not found.") + raise LoadError("Orbital basis not found.", lit) if coeffsa is None: - lit.error("Alpha orbitals not found.") + raise LoadError("Alpha orbitals not found.", lit) if occsa is None: - lit.error("Alpha occupation numbers not found.") + raise LoadError("Alpha occupation numbers not found.", lit) nelec = atnums.sum() - charge if coeffsb is None: @@ -227,8 +229,9 @@ def load_one(lit: LineIterator, norm_threshold: float = 1e-4) -> dict: ) else: if occsb is None: - lit.error( - "Beta occupation numbers not found in mkl file while beta orbitals were present." + raise LoadError( + "Beta occupation numbers not found in mkl file while beta orbitals were present.", + lit, ) nalpha = int(np.round(occsa.sum())) nbeta = int(np.round(occsb.sum())) diff --git a/iodata/formats/mwfn.py b/iodata/formats/mwfn.py index 7b276859..e41166f1 100644 --- a/iodata/formats/mwfn.py +++ b/iodata/formats/mwfn.py @@ -90,7 +90,9 @@ def _load_helper_opener(lit: LineIterator) -> dict: # check values parsed if data["Ncenter"] <= 0: - lit.error(f"Ncenter should be a positive integer! Read Ncenter= {data['Ncenter']}") + raise LoadError( + f"Ncenter should be a positive integer. Read Ncenter= {data['Ncenter']}.", lit + ) # Possible values of Wfntype (wavefunction type): # 0: Restricted closed - shell single - determinant wavefunction(e.g.RHF, RKS) # 1: Unrestricted open - shell single - determinant wavefunction(e.g.UHF, UKS) @@ -104,7 +106,9 @@ def _load_helper_opener(lit: LineIterator) -> dict: # unrestricted wavefunction data["mo_kind"] = "unrestricted" else: - lit.error(f"Wavefunction type cannot be determined. Read Wfntype= {data['Wfntype']}") + raise LoadError( + f"Wavefunction type cannot be determined. Read Wfntype= {data['Wfntype']}.", lit + ) return data @@ -149,7 +153,9 @@ def _load_helper_atoms(lit: LineIterator, natom: int) -> dict: # check atomic numbers if min(data["atnums"]) <= 0: - lit.error(f"Atomic numbers should be positive integers! Read atnums= {data['atnums']}") + raise LoadError( + f"Atomic numbers should be positive integers. Read atnums= {data['atnums']}.", lit + ) return data @@ -167,7 +173,7 @@ def _load_helper_shells(lit: LineIterator, nshell: int) -> dict: data = {} for section, name in zip(sections, var_name): if not line.startswith(section): - lit.error(f"Expected line to start with {section}, but got line={line}.") + raise LoadError(f"Expected line to start with {section}, but got line={line}.", lit) data[name] = _load_helper_section(lit, nshell, " ", 0, int) line = next(lit) lit.back(line) @@ -182,7 +188,7 @@ def _load_helper_section( while len(section) < nprim: line = next(lit) if not line.startswith(start): - lit.error(f"Expected line to start with {start}! Got line={line}") + raise LoadError(f"Expected line to start with {start}. Got line={line}.", lit) section.extend(line.split()[skip:]) return np.array(section, dtype) @@ -250,10 +256,10 @@ def _load_mwfn_low(lit: LineIterator) -> dict: # load primitive exponents & coefficients if not next(lit).startswith("$Primitive exponents"): - lit.error("Expected '$Primitive exponents' section!") + raise LoadError("Expected '$Primitive exponents' section.", lit) data["exponents"] = _load_helper_section(lit, data["Nprimshell"], "", 0, float) if not next(lit).startswith("$Contraction coefficients"): - lit.error("Expected '$Contraction coefficients' section!") + raise LoadError("Expected '$Contraction coefficients' section.", lit) data["coeffs"] = _load_helper_section(lit, data["Nprimshell"], "", 0, float) # get number of basis & molecular orbitals (MO) @@ -302,8 +308,9 @@ def load_one(lit: LineIterator) -> dict: # check number of basis functions if obasis.nbasis != inp["Nbasis"]: raise LoadError( - f"{lit.filename}: Number of basis in MolecularBasis is not equal to the 'Nbasis'. " - f"{obasis.nbasis} != {inp['Nbasis']}" + f"Number of basis functions in MolecularBasis ({obasis.nbasis}) " + f"is not equal to the 'Nbasis' ({inp['Nbasis']}).", + lit.filename, ) # Determine number of orbitals of each kind. @@ -325,18 +332,21 @@ def load_one(lit: LineIterator) -> dict: # check number of electrons if mo.nelec != inp["Naelec"] + inp["Nbelec"]: raise LoadError( - f"{lit.filename}: Number of electrons in MolecularOrbitals is not equal to the sum of " - f"'Naelec' and 'Nbelec'. {mo.nelec} != {inp['Naelec']} + {inp['Nbelec']}" + f"Number of electrons in MolecularOrbitals ({mo.nelec}) is not equal to " + f"the sum of 'Naelec' and 'Nbelec' ({inp['Naelec']} + {inp['Nbelec']}).", + lit.filename, ) if mo.occsa.sum() != inp["Naelec"]: raise LoadError( - f"{lit.filename}: Number of alpha electrons in MolecularOrbitals is not equal to the " - f"'Naelec'. {mo.occsa.sum()} != {inp['Naelec']}" + f"Number of alpha electrons in MolecularOrbitals ({mo.occsa.sum()}) " + f"is not equal to the 'Naelec' ({inp['Naelec']}).", + lit.filename, ) if mo.occsb.sum() != inp["Nbelec"]: raise LoadError( - f"{lit.filename}: Number of beta electrons in MolecularOrbitals is not equal to the " - f"'Nbelec'. {mo.occsb.sum()} != {inp['Nbelec']}" + f"Number of beta electrons in MolecularOrbitals ({mo.occsb.sum()})" + f"is not equal to the 'Nbelec' ({inp['Nbelec']}).", + lit.filename, ) return { diff --git a/iodata/formats/pdb.py b/iodata/formats/pdb.py index aec22ef1..4dd3954b 100644 --- a/iodata/formats/pdb.py +++ b/iodata/formats/pdb.py @@ -188,7 +188,7 @@ def load_one(lit: LineIterator) -> dict: end_reached = True break if not molecule_found: - lit.error("Molecule could not be read.") + raise LoadError("Molecule could not be read.", lit) if not end_reached: lit.warn("The END is not found, but the parsed data is returned.") diff --git a/iodata/formats/sdf.py b/iodata/formats/sdf.py index baa6f442..fd90064a 100644 --- a/iodata/formats/sdf.py +++ b/iodata/formats/sdf.py @@ -42,7 +42,7 @@ ) from ..iodata import IOData from ..periodic import num2sym, sym2num -from ..utils import LineIterator, angstrom +from ..utils import LineIterator, LoadError, angstrom __all__ = [] @@ -61,7 +61,7 @@ def load_one(lit: LineIterator) -> dict: natom = int(words[0]) nbond = int(words[1]) if words[-1].upper() != "V2000": - lit.error("Only V2000 SDF files are supported.") + raise LoadError("Only V2000 SDF files are supported.", lit) atcoords = np.empty((natom, 3), float) atnums = np.empty(natom, int) for iatom in range(natom): @@ -82,8 +82,8 @@ def load_one(lit: LineIterator) -> dict: while True: try: words = next(lit) - except StopIteration: - lit.error("Molecule specification did not end properly with $$$$") + except StopIteration as exc: + raise LoadError("Molecule specification did not end properly with $$$$.", lit) from exc if words == "$$$$\n": break return { diff --git a/iodata/formats/wfn.py b/iodata/formats/wfn.py index e8bd19cd..9b2b5188 100644 --- a/iodata/formats/wfn.py +++ b/iodata/formats/wfn.py @@ -38,7 +38,7 @@ from ..orbitals import MolecularOrbitals from ..overlap import gob_cart_normalization from ..periodic import num2sym, sym2num -from ..utils import LineIterator, PrepareDumpError +from ..utils import LineIterator, LoadError, PrepareDumpError __all__ = [] @@ -130,7 +130,7 @@ def _load_helper_num(lit: LineIterator) -> list[int]: """Read number of orbitals, primitives and atoms.""" line = next(lit) if not line.startswith("G"): - lit.error("Expecting line to start with 'G'") + raise LoadError("Expecting line to start with 'G'.", lit) # FORMAT (16X,I7,13X,I7,11X,I9) num_mo = int(line[16:23]) nprim = int(line[36:43]) @@ -165,13 +165,13 @@ def _load_helper_section( while len(section) < n: line = next(lit) if not line.startswith(start): - lit.error(f"Expecting line to start with '{start}'") + raise LoadError(f"Expecting line to start with '{start}'.", lit) line = line[skip:] while len(line) >= step: section.append(dtype(line[:step].replace("D", "E"))) line = line[step:] if len(section) != n: - lit.error("Number of elements in section do not match 'n'") + raise LoadError("Number of elements in section do not match 'n'.", lit) return np.array(section, dtype=dtype) @@ -179,7 +179,7 @@ def _load_helper_mo(lit: LineIterator, nprim: int) -> tuple[int, float, float, N """Read one section of MO information.""" line = next(lit) if not line.startswith("MO"): - lit.error("Expecting line to start with 'MO'") + raise LoadError("Expecting line to start with 'MO'.", lit) # FORMAT (2X,I5,27X,F13.7,15X,F12.6) number = int(line[2:7]) occ = float(line[34:47]) @@ -319,19 +319,21 @@ def build_obasis( type_assignments[ibasis + ncon * ifn : ibasis + ncon * (ifn + 1)] == type_assignments[ibasis + ncon * ifn] ).all(): - lit.error("Inconcsistent type assignments in current batch of shells.") + raise LoadError("Inconcsistent type assignments in current batch of shells.", lit) # Check if all basis functions in the current batch sit on # the same center. If not, IOData cannot read this file. icenter = icenters[ibasis] if not (icenters[ibasis : ibasis + ncon * ncart] == icenter).all(): - lit.error("Incomplete shells in WFN file not supported by IOData.") + raise LoadError("Incomplete shells in WFN file not supported by IOData.", lit) # Check if the same exponent is used for corresponding basis functions. batch_exponents = exponents[ibasis : ibasis + ncon] for ifn in range(ncart): if not ( exponents[ibasis + ncon * ifn : ibasis + ncon * (ifn + 1)] == batch_exponents ).all(): - lit.error("Exponents must be the same for corresponding basis functions.") + raise LoadError( + "Exponents must be the same for corresponding basis functions.", lit + ) # A permutation is needed because we need to regroup basis functions # into shells. batch_primitive_names = [ @@ -418,7 +420,7 @@ def load_one(lit: LineIterator) -> dict: norb_b = np.sum(mo_spin == 2) norb_ab = np.sum(mo_spin == 3) if norb_a + norb_b + norb_ab != norb or (norb_b and norb_ab): - lit.error("Invalid orbital spin types.") + raise LoadError("Invalid orbital spin types.", lit) extra["mo_spin"] = mo_spin # Determine norb_a,norb_b,norb_ab for restricted wave function by heuristic. elif mo_occs.max() > 1.0: diff --git a/iodata/formats/wfx.py b/iodata/formats/wfx.py index a3359111..4fbbf67a 100644 --- a/iodata/formats/wfx.py +++ b/iodata/formats/wfx.py @@ -32,7 +32,7 @@ from ..iodata import IOData from ..orbitals import MolecularOrbitals from ..periodic import num2sym -from ..utils import LineIterator, PrepareDumpError +from ..utils import LineIterator, LoadError, PrepareDumpError from .wfn import CONVENTIONS, build_obasis, get_mocoeff_scales __all__ = [] @@ -159,9 +159,11 @@ def load_data_wfx(lit: LineIterator) -> dict: key = result["keywords"] num = result["num_perturbations"] if key not in perturbation_check: - lit.error(f"The keywords is {key}, but it should be either GTO, GIAO or CGST") + raise LoadError(f"The keywords is {key}, but it should be either GTO, GIAO or CGST.", lit) if num != perturbation_check[key]: - lit.error(f"Number of perturbations of {key} is {num}, expected {perturbation_check[key]}") + raise LoadError( + f"Number of perturbations of {key} is {num}, expected {perturbation_check[key]}.", lit + ) return result @@ -182,7 +184,7 @@ def parse_wfx(lit: LineIterator, required_tags: Optional[list] = None) -> dict: # set start & end of the section and add it to data dictionary section_start = line if section_start in data: - lit.error(f"Section with tag={section_start} is repeated!") + raise LoadError(f"Section with tag={section_start} is repeated.", lit) data[section_start] = [] section_end = line[:1] + "/" + line[1:] # special handling of section @@ -192,7 +194,7 @@ def parse_wfx(lit: LineIterator, required_tags: Optional[list] = None) -> dict: elif section_start is not None and line.startswith(" line under section @@ -207,12 +209,12 @@ def parse_wfx(lit: LineIterator, required_tags: Optional[list] = None) -> dict: # check if last section was closed if section_start is not None: - lit.error(f"Section {section_start} is not closed at end of file.") + raise LoadError(f"Section {section_start} is not closed at end of file.", lit) # check required section tags if required_tags is not None: for section_tag in required_tags: if section_tag not in data: - lit.error(f"Section {section_tag} is missing from loaded WFX data.") + raise LoadError(f"Section {section_tag} is missing from loaded WFX data.", lit) return data @@ -262,9 +264,9 @@ def load_one(lit: LineIterator) -> dict: norba = norbb + data["mo_spins"].count("Alpha") # check that mo_spin list contains no surprises if data["mo_spins"] != ["Alpha and Beta"] * norbb + ["Alpha"] * (norba - norbb): - lit.error("Unsupported values.") + raise LoadError("Unsupported values.", lit) if norba != data["mo_coeffs"].shape[1]: - lit.error("Number of orbitals inconsistent with orbital spin types.") + raise LoadError("Number of orbitals inconsistent with orbital spin types.", lit) # create molecular orbitals, which requires knowing the number of alpha and beta molecular # orbitals. These are expected to be the same for 'restricted' case, however, the number of # Alpha/Beta counts might not be the same for the restricted WFX (e.g., restricted @@ -289,10 +291,10 @@ def load_one(lit: LineIterator) -> dict: norbb = data["mo_spins"].count("Beta") # check that mo_spin list contains no surprises if data["mo_spins"] != ["Alpha"] * norba + ["Beta"] * norbb: - lit.error("Unsupported molecular orbital spin types.") + raise LoadError("Unsupported molecular orbital spin types.", lit) # check that number of orbitals match number of MO coefficients if norba + norbb != data["mo_coeffs"].shape[1]: - lit.error("Number of orbitals inconsistent with orbital spin types.") + raise LoadError("Number of orbitals inconsistent with orbital spin types.", lit) # Create orbitals. For unrestricted wavefunctions, IOData uses the same # conventions as WFX. mo = MolecularOrbitals( diff --git a/iodata/test/test_gaussianinput.py b/iodata/test/test_gaussianinput.py index 5ddaa5aa..f7e903b1 100644 --- a/iodata/test/test_gaussianinput.py +++ b/iodata/test/test_gaussianinput.py @@ -24,7 +24,7 @@ from numpy.testing import assert_allclose, assert_equal, assert_raises from ..api import load_one -from ..utils import angstrom +from ..utils import LoadError, angstrom def test_load_water_com(): @@ -65,7 +65,7 @@ def test_load_multi_title(): def test_load_error(): # test error raises when loading .com with z-matrix with ( - assert_raises(ValueError), + assert_raises(LoadError), as_file(files("iodata.test.data").joinpath("water_z.com")) as fn, ): load_one(str(fn)) diff --git a/iodata/test/test_wfx.py b/iodata/test/test_wfx.py index bc52dab6..0fb0f394 100644 --- a/iodata/test/test_wfx.py +++ b/iodata/test/test_wfx.py @@ -585,7 +585,7 @@ def test_parse_wfx_missing_tag_h2o(): pytest.raises(LoadError) as error, ): parse_wfx(lit, required_tags=[""]) - assert str(error.value).endswith("Section is missing from loaded WFX data.") + assert "Section is missing from loaded WFX data." in str(error) def test_load_data_wfx_h2o_error(): @@ -595,9 +595,7 @@ def test_load_data_wfx_h2o_error(): pytest.raises(LoadError) as error, ): load_one(str(fn_wfx)) - assert str(error.value).endswith( - "Expecting line but got ." - ) + assert "Expecting line but got ." in str(error) def test_load_truncated_h2o(tmpdir): @@ -608,9 +606,7 @@ def test_load_truncated_h2o(tmpdir): pytest.raises(LoadError) as error, ): load_one(str(fn_truncated)) - assert str(error.value).endswith( - "Section is not closed at end of file." - ) + assert "Section is not closed at end of file." in str(error) def test_load_one_h2o(): diff --git a/iodata/utils.py b/iodata/utils.py index bfdaf2e2..99f78a53 100644 --- a/iodata/utils.py +++ b/iodata/utils.py @@ -19,6 +19,7 @@ """Utility functions module.""" import warnings +from typing import TextIO import attrs import numpy as np @@ -62,34 +63,6 @@ kjmol: float = 1e3 / spc.value("Avogadro constant") / spc.value("Hartree energy") -class FileFormatError(ValueError): - """Raise when a file or input format cannot be identified.""" - - -class LoadError(ValueError): - """Raised when an error is encountered while loading from a file.""" - - -class LoadWarning(Warning): - """Raised when incorrect content is encountered and fixed when loading from a file.""" - - -class DumpError(ValueError): - """Raised when an error is encountered while dumping to a file.""" - - -class DumpWarning(Warning): - """Raised when an IOData object is made compatible with a format when dumping to a file.""" - - -class PrepareDumpError(ValueError): - """Raised when an IOData object is incompatible with a format before dumping to a file.""" - - -class WriteInputError(ValueError): - """Raised when an error is encountered while writing an input file.""" - - class LineIterator: """Iterator class for looping over lines and keeping track of the line number. @@ -132,17 +105,6 @@ def __next__(self): self.lineno += 1 return self.stack.pop() if self.stack else next(self.fh) - def error(self, msg: str): - """Raise an error while reading a file. - - Parameters - ---------- - msg - Message to raise alongside filename and line number. - - """ - raise LoadError(f"{self.filename}:{self.lineno} {msg}") - def warn(self, msg: str): """Raise a warning while reading a file. @@ -160,6 +122,63 @@ def back(self, line): self.lineno -= 1 +class FileFormatError(ValueError): + """Raise when a file or input format cannot be identified.""" + + +class LoadError(Exception): + """Raised when an error is encountered while loading from a file.""" + + def __init__( + self, + message, + file: str | LineIterator | TextIO | None = None, + lineno: int | None = None, + ): + super().__init__(message) + # Get the extra info + self.filename = None + self.lineno = None + if isinstance(file, str): + self.filename = file + elif isinstance(file, LineIterator): + self.filename = file.filename + if lineno is None: + self.lineno = file.lineno + elif isinstance(file, TextIO): + self.filename = file.name + + def __str__(self): + if self.filename is None: + location = "" + elif self.lineno is None: + location = f" ({self.filename})" + else: + location = f" ({self.filename}:{self.lineno})" + message = super().__str__() + return f"{message}{location}" + + +class LoadWarning(Warning): + """Raised when incorrect content is encountered and fixed when loading from a file.""" + + +class DumpError(ValueError): + """Raised when an error is encountered while dumping to a file.""" + + +class DumpWarning(Warning): + """Raised when an IOData object is made compatible with a format when dumping to a file.""" + + +class PrepareDumpError(ValueError): + """Raised when an IOData object is incompatible with a format before dumping to a file.""" + + +class WriteInputError(ValueError): + """Raised when an error is encountered while writing an input file.""" + + @attrs.define class Cube: """The volumetric data from a cube (or similar) file.""" From 5132a2dab91715d1faf659b08ab9f422657546da Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 22 Jun 2024 09:32:06 +0200 Subject: [PATCH 2/5] Remove useless return, now properly detected by static analysis --- iodata/formats/cp2klog.py | 1 - 1 file changed, 1 deletion(-) diff --git a/iodata/formats/cp2klog.py b/iodata/formats/cp2klog.py index bcd8492c..1c2f75ac 100644 --- a/iodata/formats/cp2klog.py +++ b/iodata/formats/cp2klog.py @@ -186,7 +186,6 @@ def _read_cp2k_obasis(lit: LineIterator) -> dict: ): return _read_cp2k_uncontracted_obasis(lit) raise LoadError("Could not find basis set in CP2K ATOM output.", lit) - return None def _read_cp2k_occupations_energies( From bd81359e6cf32d5627b944836a71d5fd5573a22c Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 22 Jun 2024 09:35:58 +0200 Subject: [PATCH 3/5] Update CONTRIBUTING.rst Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- CONTRIBUTING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b06cf546..d9f10643 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -163,7 +163,7 @@ When you encounter a file format error while reading the file, raise a ``LoadErr def load_one(lit: LineIterator) -> dict: ... if something_wrong: - raise LoadError("Describe problem in a sentence.", lit) + raise LoadError("Describe the problem in a sentence.", lit) The error that appears in the terminal will automatically include the file name and line number. From d40b8e0e8992a0bec73205f5bf1b1806d4f236d4 Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 22 Jun 2024 09:37:55 +0200 Subject: [PATCH 4/5] Improve contributor guide --- CONTRIBUTING.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index d9f10643..ae802f3c 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -166,6 +166,9 @@ When you encounter a file format error while reading the file, raise a ``LoadErr raise LoadError("Describe the problem in a sentence.", lit) The error that appears in the terminal will automatically include the file name and line number. +If your code has already read the full file and encounters an error when processing the data, +you can use ``raise LoadError("Describe problem in a sentence.", lit.filename)`` instead. +This way, no line number is included in the error message. ``dump_one`` functions: writing a single IOData object to a file From 84e8f8fa8267daae350fe2008fbb0d85aa7cdcfb Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Sat, 22 Jun 2024 09:39:26 +0200 Subject: [PATCH 5/5] Python 3.9 compatibility --- iodata/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/iodata/utils.py b/iodata/utils.py index 99f78a53..c9ed1ca3 100644 --- a/iodata/utils.py +++ b/iodata/utils.py @@ -19,7 +19,7 @@ """Utility functions module.""" import warnings -from typing import TextIO +from typing import Optional, TextIO, Union import attrs import numpy as np @@ -132,8 +132,8 @@ class LoadError(Exception): def __init__( self, message, - file: str | LineIterator | TextIO | None = None, - lineno: int | None = None, + file: Optional[Union[str, LineIterator, TextIO]] = None, + lineno: Optional[int] = None, ): super().__init__(message) # Get the extra info