-
Notifications
You must be signed in to change notification settings - Fork 838
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
63 changed files
with
50,047 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# coding: utf-8 | ||
# Copyright (c) Pymatgen Development Team. | ||
# Distributed under the terms of the MIT License. | ||
|
||
|
||
""" | ||
This package implements modules for input and output to and from CREST | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
# coding: utf-8 | ||
# Copyright (c) Pymatgen Development Team. | ||
# Distributed under the terms of the MIT License. | ||
""" | ||
Parsers for XTB output files and directories | ||
""" | ||
import logging | ||
import os | ||
import re | ||
|
||
from monty.json import MSONable | ||
from pymatgen.core import Molecule | ||
from pymatgen.io.xyz import XYZ | ||
|
||
__author__ = "Alex Epstein" | ||
__copyright__ = "Copyright 2020, The Materials Project" | ||
__version__ = "0.1" | ||
__maintainer__ = "Alex Epstein" | ||
__email__ = "aepstein@lbl.gov" | ||
__credits__ = "Evan Spotte-Smith" | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class CRESTOutput(MSONable): | ||
""" | ||
Class to parse CREST output files | ||
""" | ||
|
||
def __init__(self, output_filename, path="."): | ||
""" | ||
Assumes runtype is iMTD-GC [default] | ||
Args: | ||
output_filename (str): Filename to parse | ||
path (str): Path to directory including output_filename and all | ||
other xtb output files (crest_best.xyz, etc.) | ||
""" | ||
|
||
self.path = path | ||
self.filename = output_filename | ||
|
||
self.cmd_options = dict() | ||
self.sorted_structures_energies = [] | ||
self.properly_terminated = False | ||
self._parse_crest_output() | ||
|
||
def _parse_crest_output(self): | ||
""" | ||
Parse output file and directory to extract all command line inputs | ||
and output files. | ||
Sets the attributes: | ||
cmd_options: Dict of type {flag: value} | ||
sorted_structrues_energies: n x m x 2 list, for n conformers, | ||
m rotamers per conformer, and tuple of | ||
[Molecule, energy] | ||
properly_terminated: True or False if run properly terminated | ||
""" | ||
output_filepath = os.path.join(self.path, self.filename) | ||
|
||
# Get CREST command | ||
crest_cmd = None | ||
with open(output_filepath, "r") as xtbout_file: | ||
for line in xtbout_file: | ||
if "> crest" in line: | ||
crest_cmd = line.strip()[8:] | ||
break | ||
|
||
split_cmd = crest_cmd.split(" ") | ||
|
||
# Get input file if present | ||
try: | ||
self.coord_file = os.path.join(self.path, split_cmd[0]) | ||
self.input_structure = Molecule.from_file(filename=self.coord_file) | ||
except FileNotFoundError: | ||
print("Input file {} not found".format(split_cmd[0])) | ||
|
||
# Get CREST input flags | ||
for i, entry in enumerate(split_cmd): | ||
value = None | ||
if entry: | ||
if "-" in entry: | ||
option = entry[1:] | ||
if i + 1 < len(split_cmd): | ||
if "-" not in split_cmd[i + 1]: | ||
value = split_cmd[i + 1] | ||
self.cmd_options[option] = value | ||
# Get input charge for decorating parsed molecules | ||
chg = 0 | ||
if "chrg" in self.cmd_options.keys(): | ||
str_chg = self.cmd_options["chrg"] | ||
if "-" in str_chg: | ||
chg = int(str_chg) | ||
else: | ||
chg = int(str_chg[-1]) | ||
elif "c" in self.cmd_options.keys(): | ||
str_chg = self.cmd_options["c"] | ||
if "-" in str_chg: | ||
chg = int(str_chg) | ||
else: | ||
chg = int(str_chg[-1]) | ||
|
||
# Check for proper termination | ||
with open(output_filepath, "rb+") as xtbout_file: | ||
xtbout_file.seek(-2, 2) | ||
while xtbout_file.read(1) != b"\n": | ||
xtbout_file.seek(-2, 1) | ||
end_bstring = xtbout_file.read() | ||
if b"CREST terminated normally." in end_bstring: | ||
self.properly_terminated = True | ||
|
||
if self.properly_terminated: | ||
# Parse for number of conformers and rotamers | ||
conformer_pattern = re.compile( | ||
r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+" | ||
r"(?P<weight>-*\d+\.\d+)\s+" | ||
r"(?P<conformer>-*\d+\.\d+)\s+(?P<set>\d+)\s+(?P<degen>\d+)\s+" | ||
r"(?P<origin>\w+)\n" | ||
) | ||
rotamer_pattern = re.compile( | ||
r"\s+\d+\s+(?P<Erel>\d*\.\d*)\s+(?P<Etot>-*\d+\.\d+)\s+" | ||
r"(?P<weight>-*\d+\.\d+)\s+" | ||
r"(?P<origin>\w+)\n" | ||
) | ||
conformer_degeneracies = [] | ||
energies = [] | ||
with open(output_filepath, "r") as xtbout_file: | ||
for line in xtbout_file: | ||
conformer_match = conformer_pattern.match(line) | ||
rotamer_match = rotamer_pattern.match(line) | ||
if conformer_match: | ||
conformer_degeneracies.append(int(conformer_match["degen"])) | ||
energies.append(conformer_match["Etot"]) | ||
elif rotamer_match: | ||
energies.append(rotamer_match["Etot"]) | ||
# Get final rotamers file and read in all molecules, | ||
# sorted by conformer type and energy | ||
if "crest_rotamers.xyz" in os.listdir(self.path): | ||
final_rotamer_filename = "crest_rotamers.xyz" | ||
else: | ||
n_rot_files = [] | ||
for f in os.listdir(self.path): | ||
if "crest_rotamers" in f: | ||
n_rot_file = int(os.path.splitext(f)[0].split("_")[2]) | ||
n_rot_files.append(n_rot_file) | ||
if len(n_rot_files) > 0: | ||
final_rotamer_filename = "crest_rotamers_{}.xyz".format(max(n_rot_files)) | ||
try: | ||
rotamers_path = os.path.join(self.path, final_rotamer_filename) | ||
rotamer_structures = XYZ.from_file(rotamers_path).all_molecules | ||
for r in rotamer_structures: | ||
r.set_charge_and_spin(charge=chg) | ||
start = 0 | ||
for n, d in enumerate(conformer_degeneracies): | ||
self.sorted_structures_energies.append([]) | ||
i = 0 | ||
for i in range(start, start + d): | ||
self.sorted_structures_energies[n].append([rotamer_structures[i], energies[i]]) | ||
start = i + 1 | ||
except FileNotFoundError: | ||
print("{} not found, no rotamer list processed".format(final_rotamer_filename)) | ||
|
||
# Get lowest energy conformer from 'crest_best.xyz' | ||
crestbest_path = os.path.join(self.path, "crest_best.xyz") | ||
try: | ||
lowest_e_struct = Molecule.from_file(crestbest_path) | ||
lowest_e_struct.set_charge_and_spin(charge=chg) | ||
self.lowest_energy_structure = lowest_e_struct | ||
except FileNotFoundError: | ||
print("{} not found".format(crestbest_path)) | ||
|
||
else: | ||
crestbest_path = os.path.join(self.path, "crest_best.xyz") | ||
try: | ||
lowest_e_struct = Molecule.from_file(crestbest_path) | ||
lowest_e_struct.set_charge_and_spin(charge=chg) | ||
self.lowest_energy_structure = lowest_e_struct | ||
except FileNotFoundError: | ||
print("{} not found".format(crestbest_path)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# coding: utf-8 | ||
# Copyright (c) Pymatgen Development Team. | ||
# Distributed under the terms of the MIT License. | ||
|
||
|
||
""" | ||
This package implements tests for CREST outputs. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# coding: utf-8 | ||
# Copyright (c) Pymatgen Development Team. | ||
# Distributed under the terms of the MIT License. | ||
|
||
|
||
import os | ||
import unittest | ||
|
||
from pymatgen.core.structure import Molecule | ||
from pymatgen.io.qchem.outputs import check_for_structure_changes | ||
from pymatgen.io.xtb.outputs import CRESTOutput | ||
from pymatgen.util.testing import PymatgenTest | ||
|
||
try: | ||
from openbabel import openbabel as ob | ||
|
||
have_babel = True | ||
except ImportError: | ||
have_babel = False | ||
print("OpenBabel not found, parsed molecules structures will not be " "checked") | ||
|
||
__author__ = "Alex Epstein" | ||
__copyright__ = "Copyright 2020, The Materials Project" | ||
__version__ = "0.1" | ||
|
||
test_dir = os.path.join(os.path.dirname(__file__), PymatgenTest.TEST_FILES_DIR, "xtb", "sample_CREST_output") | ||
expected_output_dir = os.path.join(os.path.dirname(__file__), PymatgenTest.TEST_FILES_DIR, "xtb", "expected_output") | ||
|
||
|
||
class TestCRESTOutput(PymatgenTest): | ||
""" | ||
Checks that all attributes of CRESTOutput match the expected values for a | ||
sample CREST output directory. | ||
""" | ||
|
||
def test_all(self): | ||
expected_cmd_options = {"g": "H2O", "c": "2"} | ||
expected_energies = [ | ||
[ | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
"-13.66580", | ||
], | ||
[ | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
"-13.66479", | ||
], | ||
] | ||
expected_sorted_structures = [[], []] | ||
for f in os.listdir(expected_output_dir): | ||
if f.endswith("xyz") and "_r" in f: | ||
n_conf = int(f.split("_")[0][-1]) | ||
n_rot = int(f.split("_")[1].split(".")[0][-1]) | ||
m = Molecule.from_file(os.path.join(expected_output_dir, f)) | ||
expected_sorted_structures[n_conf].insert(n_rot, m) | ||
|
||
cout = CRESTOutput(output_filename="crest_out.out", path=test_dir) | ||
exp_best = Molecule.from_file(os.path.join(expected_output_dir, "expected_crest_best.xyz")) | ||
for i, c in enumerate(cout.sorted_structures_energies): | ||
for j, r in enumerate(c): | ||
if have_babel: | ||
self.assertEqual(check_for_structure_changes(r[0], expected_sorted_structures[i][j]), "no_change") | ||
self.assertAlmostEqual(float(r[1]), float(expected_energies[i][j]), 4) | ||
|
||
self.assertEqual(cout.properly_terminated, True) | ||
self.assertEqual(check_for_structure_changes(cout.lowest_energy_structure, exp_best), "no_change") | ||
self.assertDictEqual(cout.cmd_options, expected_cmd_options) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Oops, something went wrong.