Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve unphysical (greater than 1) occupancy handling in CifParser and add missing site label if not check_occu #3819

Merged
merged 21 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 11 additions & 5 deletions pymatgen/io/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class CifParser:
def __init__(
self,
filename: str | StringIO,
occupancy_tolerance: float = 1.0,
occupancy_tolerance: float = 0.0,
DanielYang59 marked this conversation as resolved.
Show resolved Hide resolved
site_tolerance: float = 1e-4,
frac_tolerance: float = 1e-4,
check_cif: bool = True,
Expand All @@ -296,7 +296,7 @@ def __init__(
"""
Args:
filename (str): CIF filename, gzipped or bzipped CIF files are fine too.
occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the
occupancy_tolerance (float): If total occupancy of a site is between 1 and 1 + occupancy_tolerance, the
occupancies will be scaled down to 1.
site_tolerance (float): This tolerance is used to determine if two sites are sitting in the same position,
in which case they will be combined to a single disordered site. Defaults to 1e-4.
Expand Down Expand Up @@ -1082,8 +1082,15 @@ def get_matching_coord(coord):
all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below
for idx, species in enumerate(all_species):
total_occu = sum(species.values())
if 1 < total_occu <= self._occupancy_tolerance:
if check_occu:
if 1 < total_occu <= 1 + self._occupancy_tolerance:
all_species[idx] = species / total_occu
elif total_occu > 1 + self._occupancy_tolerance:
raise ValueError(f"Occupancy {total_occu} exceeded tolerance.")

elif total_occu > 1:
all_species[idx] = species / total_occu
self.warnings.append(f"Occupancy {total_occu} greater than 1.")

if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms):
site_properties = {}
Expand Down Expand Up @@ -1196,8 +1203,7 @@ def parse_structures(
"in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.",
UserWarning,
)
if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836
warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This warning should not be raised just because if not check_occu, it should really check the occupancy and only warn if the occupancy is "unphysical".

The following code has already done this, so I would suggest removing it:

pymatgen/pymatgen/io/cif.py

Lines 1011 to 1018 in 2e1c301

if any(occu > 1 for occu in sum_occu):
msg = (
f"Some occupancies ({sum_occu}) sum to > 1! If they are within "
"the occupancy_tolerance, they will be rescaled. "
f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}"
)
warnings.warn(msg)
self.warnings.append(msg)


if primitive and symmetrized:
raise ValueError(
"Using both 'primitive' and 'symmetrized' arguments is not currently supported "
Expand Down
6 changes: 3 additions & 3 deletions tests/io/test_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,10 +735,10 @@ def test_bad_cif(self):
filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif"
parser = CifParser(filepath)
with pytest.raises(
ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previous error message might be misleading, as this fails because the occupancy is greater than tolerance, not 1.

ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance."
):
parser.parse_structures(on_error="raise")
parser = CifParser(filepath, occupancy_tolerance=2)
parser = CifParser(filepath, occupancy_tolerance=1.0)
struct = parser.parse_structures()[0]
assert struct[0].species["Al3+"] == approx(0.778)

Expand Down Expand Up @@ -851,7 +851,7 @@ def test_no_check_occu(self):
# should fail without setting custom occupancy tolerance
CifParser.from_str(cif_str).parse_structures()

for tol in (1.5, 10):
for tol in (0.5, 9):
parser = CifParser.from_str(cif_str, occupancy_tolerance=tol)
structs = parser.parse_structures(primitive=False, check_occu=False)[0]
assert structs[0].species.as_dict()["Te"] == 1.5
Expand Down