Merge a749466 into 463e4f1

materialsproject · Jun 3, 2021 · e1617b7 · e1617b7
2 parents 463e4f1 + a749466
commit e1617b7
Show file tree

Hide file tree

Showing 63 changed files with 50,047 additions and 0 deletions.
diff --git a/pymatgen/io/xtb/__init__.py b/pymatgen/io/xtb/__init__.py
@@ -0,0 +1,8 @@
+# coding: utf-8
+# Copyright (c) Pymatgen Development Team.
+# Distributed under the terms of the MIT License.
+
+
+"""
+This package implements modules for input and output to and from CREST
+"""
diff --git a/pymatgen/io/xtb/outputs.py b/pymatgen/io/xtb/outputs.py
@@ -0,0 +1,178 @@
+# coding: utf-8
+# Copyright (c) Pymatgen Development Team.
+# Distributed under the terms of the MIT License.
+"""
+Parsers for XTB output files and directories
+"""
+import logging
+import os
+import re
+
+from monty.json import MSONable
+from pymatgen.core import Molecule
+from pymatgen.io.xyz import XYZ
+
+__author__ = "Alex Epstein"
+__copyright__ = "Copyright 2020, The Materials Project"
+__version__ = "0.1"
+__maintainer__ = "Alex Epstein"
+__email__ = "aepstein@lbl.gov"
+__credits__ = "Evan Spotte-Smith"
+
+logger = logging.getLogger(__name__)
+
+
+class CRESTOutput(MSONable):
+    """
+    Class to parse CREST output files
+    """
+
+    def __init__(self, output_filename, path="."):
+        """
+        Assumes runtype is iMTD-GC [default]
+        Args:
+            output_filename (str): Filename to parse
+            path (str): Path to directory including output_filename and all
+                other xtb output files (crest_best.xyz, etc.)
+        """
+
+        self.path = path
+        self.filename = output_filename
+
+        self.cmd_options = dict()
+        self.sorted_structures_energies = []
+        self.properly_terminated = False
+        self._parse_crest_output()
+
+    def _parse_crest_output(self):
+        """
+        Parse output file and directory to extract all command line inputs
+            and output files.
+        Sets the attributes:
+            cmd_options: Dict of type {flag: value}
+            sorted_structrues_energies: n x m x 2 list, for n conformers,
+                m rotamers per conformer, and tuple of
+                [Molecule, energy]
+            properly_terminated: True or False if run properly terminated
+        """
+        output_filepath = os.path.join(self.path, self.filename)
+
+        # Get CREST command
+        crest_cmd = None
+        with open(output_filepath, "r") as xtbout_file:
+            for line in xtbout_file:
+                if "> crest" in line:
+                    crest_cmd = line.strip()[8:]
+                    break
+
+        split_cmd = crest_cmd.split(" ")
+
+        # Get input file if present
+        try:
+            self.coord_file = os.path.join(self.path, split_cmd[0])
+            self.input_structure = Molecule.from_file(filename=self.coord_file)
+        except FileNotFoundError:
+            print("Input file {} not found".format(split_cmd[0]))
+
+        # Get CREST input flags
+        for i, entry in enumerate(split_cmd):
+            value = None
+            if entry:
+                if "-" in entry:
+                    option = entry[1:]
+                    if i + 1 < len(split_cmd):
+                        if "-" not in split_cmd[i + 1]:
+                            value = split_cmd[i + 1]
+                    self.cmd_options[option] = value
+        # Get input charge for decorating parsed molecules
+        chg = 0
+        if "chrg" in self.cmd_options.keys():
+            str_chg = self.cmd_options["chrg"]
+            if "-" in str_chg:
+                chg = int(str_chg)
+            else:
+                chg = int(str_chg[-1])
+        elif "c" in self.cmd_options.keys():
+            str_chg = self.cmd_options["c"]
+            if "-" in str_chg:
+                chg = int(str_chg)
+            else:
+                chg = int(str_chg[-1])
+
+        # Check for proper termination
+        with open(output_filepath, "rb+") as xtbout_file:
+            xtbout_file.seek(-2, 2)
+            while xtbout_file.read(1) != b"\n":
+                xtbout_file.seek(-2, 1)
+            end_bstring = xtbout_file.read()
+            if b"CREST terminated normally." in end_bstring:
+                self.properly_terminated = True
+
+        if self.properly_terminated:
+            # Parse for number of conformers and rotamers
+            conformer_pattern = re.compile(
+                r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+"
+                r"(?P<weight>-*\d+\.\d+)\s+"
+                r"(?P<conformer>-*\d+\.\d+)\s+(?P<set>\d+)\s+(?P<degen>\d+)\s+"
+                r"(?P<origin>\w+)\n"
+            )
+            rotamer_pattern = re.compile(
+                r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+"
+                r"(?P<weight>-*\d+\.\d+)\s+"
+                r"(?P<origin>\w+)\n"
+            )
+            conformer_degeneracies = []
+            energies = []
+            with open(output_filepath, "r") as xtbout_file:
+                for line in xtbout_file:
+                    conformer_match = conformer_pattern.match(line)
+                    rotamer_match = rotamer_pattern.match(line)
+                    if conformer_match:
+                        conformer_degeneracies.append(int(conformer_match["degen"]))
+                        energies.append(conformer_match["Etot"])
+                    elif rotamer_match:
+                        energies.append(rotamer_match["Etot"])
+            # Get final rotamers file and read in all molecules,
+            # sorted by conformer type and energy
+            if "crest_rotamers.xyz" in os.listdir(self.path):
+                final_rotamer_filename = "crest_rotamers.xyz"
+            else:
+                n_rot_files = []
+                for f in os.listdir(self.path):
+                    if "crest_rotamers" in f:
+                        n_rot_file = int(os.path.splitext(f)[0].split("_")[2])
+                        n_rot_files.append(n_rot_file)
+                if len(n_rot_files) > 0:
+                    final_rotamer_filename = "crest_rotamers_{}.xyz".format(max(n_rot_files))
+            try:
+                rotamers_path = os.path.join(self.path, final_rotamer_filename)
+                rotamer_structures = XYZ.from_file(rotamers_path).all_molecules
+                for r in rotamer_structures:
+                    r.set_charge_and_spin(charge=chg)
+                start = 0
+                for n, d in enumerate(conformer_degeneracies):
+                    self.sorted_structures_energies.append([])
+                    i = 0
+                    for i in range(start, start + d):
+                        self.sorted_structures_energies[n].append([rotamer_structures[i], energies[i]])
+                    start = i + 1
+            except FileNotFoundError:
+                print("{} not found, no rotamer list processed".format(final_rotamer_filename))
+
+            # Get lowest energy conformer from 'crest_best.xyz'
+            crestbest_path = os.path.join(self.path, "crest_best.xyz")
+            try:
+                lowest_e_struct = Molecule.from_file(crestbest_path)
+                lowest_e_struct.set_charge_and_spin(charge=chg)
+                self.lowest_energy_structure = lowest_e_struct
+            except FileNotFoundError:
+                print("{} not found".format(crestbest_path))
+
+        else:
+            crestbest_path = os.path.join(self.path, "crest_best.xyz")
+            try:
+                lowest_e_struct = Molecule.from_file(crestbest_path)
+                lowest_e_struct.set_charge_and_spin(charge=chg)
+                self.lowest_energy_structure = lowest_e_struct
+            except FileNotFoundError:
+                print("{} not found".format(crestbest_path))
diff --git a/pymatgen/io/xtb/tests/__init__.py b/pymatgen/io/xtb/tests/__init__.py
@@ -0,0 +1,8 @@
+# coding: utf-8
+# Copyright (c) Pymatgen Development Team.
+# Distributed under the terms of the MIT License.
+
+
+"""
+This package implements tests for CREST outputs.
+"""
diff --git a/pymatgen/io/xtb/tests/test_outputs.py b/pymatgen/io/xtb/tests/test_outputs.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+# Copyright (c) Pymatgen Development Team.
+# Distributed under the terms of the MIT License.
+
+
+import os
+import unittest
+
+from pymatgen.core.structure import Molecule
+from pymatgen.io.qchem.outputs import check_for_structure_changes
+from pymatgen.io.xtb.outputs import CRESTOutput
+from pymatgen.util.testing import PymatgenTest
+
+try:
+    from openbabel import openbabel as ob
+
+    have_babel = True
+except ImportError:
+    have_babel = False
+    print("OpenBabel not found, parsed molecules structures will not be  " "checked")
+
+__author__ = "Alex Epstein"
+__copyright__ = "Copyright 2020, The Materials Project"
+__version__ = "0.1"
+
+test_dir = os.path.join(os.path.dirname(__file__), PymatgenTest.TEST_FILES_DIR, "xtb", "sample_CREST_output")
+expected_output_dir = os.path.join(os.path.dirname(__file__), PymatgenTest.TEST_FILES_DIR, "xtb", "expected_output")
+
+
+class TestCRESTOutput(PymatgenTest):
+    """
+    Checks that all attributes of CRESTOutput match the expected values for a
+    sample CREST output directory.
+    """
+
+    def test_all(self):
+        expected_cmd_options = {"g": "H2O", "c": "2"}
+        expected_energies = [
+            [
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+                "-13.66580",
+            ],
+            [
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+                "-13.66479",
+            ],
+        ]
+        expected_sorted_structures = [[], []]
+        for f in os.listdir(expected_output_dir):
+            if f.endswith("xyz") and "_r" in f:
+                n_conf = int(f.split("_")[0][-1])
+                n_rot = int(f.split("_")[1].split(".")[0][-1])
+                m = Molecule.from_file(os.path.join(expected_output_dir, f))
+                expected_sorted_structures[n_conf].insert(n_rot, m)
+
+        cout = CRESTOutput(output_filename="crest_out.out", path=test_dir)
+        exp_best = Molecule.from_file(os.path.join(expected_output_dir, "expected_crest_best.xyz"))
+        for i, c in enumerate(cout.sorted_structures_energies):
+            for j, r in enumerate(c):
+                if have_babel:
+                    self.assertEqual(check_for_structure_changes(r[0], expected_sorted_structures[i][j]), "no_change")
+                self.assertAlmostEqual(float(r[1]), float(expected_energies[i][j]), 4)
+
+        self.assertEqual(cout.properly_terminated, True)
+        self.assertEqual(check_for_structure_changes(cout.lowest_energy_structure, exp_best), "no_change")
+        self.assertDictEqual(cout.cmd_options, expected_cmd_options)
+
+
+if __name__ == "__main__":
+    unittest.main()