-
Notifications
You must be signed in to change notification settings - Fork 867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve unphysical (greater than 1) occupancy handling in CifParser
and add missing site label if not check_occu
#3819
Changes from all commits
d9e6187
1b7071e
7cf5ba9
2a8a5dd
5236e70
28961a8
a15d53b
a525c24
09ddaa4
d674708
6bc6071
05c11d4
81cb4b8
1f3c5e5
c76726a
188b2d6
8abe428
d4703a1
d122b71
812bb8f
082e1f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -323,9 +323,9 @@ def __init__( | |||||||||||||||||
""" | ||||||||||||||||||
Args: | ||||||||||||||||||
filename (PathLike): CIF file, gzipped or bzipped CIF files are fine too. | ||||||||||||||||||
occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the | ||||||||||||||||||
occupancies will be scaled down to 1. | ||||||||||||||||||
site_tolerance (float): This tolerance is used to determine if two sites are in the same position, | ||||||||||||||||||
occupancy_tolerance (float): If total occupancy of a site is between | ||||||||||||||||||
1 and occupancy_tolerance, it will be scaled down to 1. | ||||||||||||||||||
site_tolerance (float): This tolerance is used to determine if two sites are at the same position, | ||||||||||||||||||
in which case they will be combined to a single disordered site. Defaults to 1e-4. | ||||||||||||||||||
frac_tolerance (float): This tolerance is used to determine is a coordinate should be rounded to an ideal | ||||||||||||||||||
value. e.g. 0.6667 is rounded to 2/3. This is desired if symmetry operations are going to be applied. | ||||||||||||||||||
|
@@ -1027,11 +1027,11 @@ def get_matching_coord( | |||||||||||||||||
|
||||||||||||||||||
# Get occupancy | ||||||||||||||||||
try: | ||||||||||||||||||
occu = str2float(data["_atom_site_occupancy"][idx]) | ||||||||||||||||||
occu: float = str2float(data["_atom_site_occupancy"][idx]) | ||||||||||||||||||
except (KeyError, ValueError): | ||||||||||||||||||
occu = 1 | ||||||||||||||||||
|
||||||||||||||||||
# If check_occu is True or the occupancy is greater than 0, create comp_d | ||||||||||||||||||
# If don't check_occu or the occupancy is greater than 0, create comp_dict | ||||||||||||||||||
if not check_occu or occu > 0: | ||||||||||||||||||
# Create site coordinate | ||||||||||||||||||
coord: Vector3D = ( | ||||||||||||||||||
|
@@ -1073,7 +1073,7 @@ def get_matching_coord( | |||||||||||||||||
|
||||||||||||||||||
if any(occu > 1 for occu in _sum_occupancies): | ||||||||||||||||||
msg = ( | ||||||||||||||||||
f"Some occupancies ({_sum_occupancies}) sum to > 1! If they are within " | ||||||||||||||||||
f"Some occupancies ({filter(lambda x: x<=1, _sum_occupancies)}) sum to > 1! If they are within " | ||||||||||||||||||
"the occupancy_tolerance, they will be rescaled. " | ||||||||||||||||||
f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}" | ||||||||||||||||||
) | ||||||||||||||||||
|
@@ -1149,7 +1149,10 @@ def get_matching_coord( | |||||||||||||||||
all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below | ||||||||||||||||||
for idx, species in enumerate(all_species): | ||||||||||||||||||
total_occu = sum(species.values()) | ||||||||||||||||||
if 1 < total_occu <= self._occupancy_tolerance: | ||||||||||||||||||
if check_occu and total_occu > self._occupancy_tolerance: | ||||||||||||||||||
raise ValueError(f"Occupancy {total_occu} exceeded tolerance.") | ||||||||||||||||||
|
||||||||||||||||||
if total_occu > 1: | ||||||||||||||||||
all_species[idx] = species / total_occu | ||||||||||||||||||
|
||||||||||||||||||
if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms): | ||||||||||||||||||
|
@@ -1198,6 +1201,7 @@ def get_matching_coord( | |||||||||||||||||
all_coords[idx], | ||||||||||||||||||
lattice, | ||||||||||||||||||
properties=site_properties, | ||||||||||||||||||
label=all_labels[idx], | ||||||||||||||||||
skip_checks=True, | ||||||||||||||||||
) | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -1278,8 +1282,6 @@ def parse_structures( | |||||||||||||||||
"in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.", | ||||||||||||||||||
UserWarning, | ||||||||||||||||||
) | ||||||||||||||||||
if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836 | ||||||||||||||||||
warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.") | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This warning should not be raised just because The following code has already done this, so I would suggest removing it: Lines 1011 to 1018 in 2e1c301
|
||||||||||||||||||
|
||||||||||||||||||
if primitive and symmetrized: | ||||||||||||||||||
raise ValueError( | ||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -731,17 +731,28 @@ def test_empty(self): | |
cb2 = CifBlock.from_str(str(cb)) | ||
assert cb == cb2 | ||
|
||
def test_bad_cif(self): | ||
def test_bad_occu(self): | ||
filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif" | ||
parser = CifParser(filepath) | ||
with pytest.raises( | ||
ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previous error message might be misleading, as this fails because the occupancy is greater than |
||
ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance." | ||
): | ||
parser.parse_structures(on_error="raise") | ||
parser = CifParser(filepath, occupancy_tolerance=2) | ||
struct = parser.parse_structures()[0] | ||
assert struct[0].species["Al3+"] == approx(0.778) | ||
|
||
def test_not_check_occu(self): | ||
# Test large occupancy with check_occu turned off | ||
with open(f"{TEST_FILES_DIR}/cif/site_type_symbol_test.cif") as cif_file: | ||
cif_str = cif_file.read() | ||
cif_str = cif_str.replace("Te Te 1.0000", "Te_label Te 10.0", 1) | ||
|
||
structs = CifParser.from_str(cif_str).parse_structures(check_occu=False) | ||
|
||
assert len(structs) > 0 | ||
assert set(structs[0].labels) == {"Te_label", "Ge"} | ||
|
||
def test_one_line_symm(self): | ||
cif_file = f"{TEST_FILES_DIR}/cif/OneLineSymmP1.cif" | ||
parser = CifParser(cif_file) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would help capture the
occupancy > tolerance
error.Without this, if an occupancy is greater than
tolerance
, it would not be scaled, and passed directly intoStructure
. TheException
raised byStructure
(because of unphysical occupancy) would be compressed and replaced with a general message which doesn't show the reason for failure:pymatgen/pymatgen/io/cif.py
Lines 1291 to 1301 in bb68c78
For example the following error message provided in #3816: