Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: ChemSage DAT: Case sensitivity of compound names in endmembers #426

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 20 additions & 5 deletions pycalphad/io/cs_dat.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def _parse_species_postfix_charge(formula) -> v.Species:
else:
charge = 0
# assumes that the remaining formula is a pure element
constituents = dict(parse_chemical_formula(formula)[0])
return v.Species(name, constituents=constituents, charge=charge)
constituents = dict(parse_chemical_formula(formula.upper())[0])
return v.Species(name.upper(), constituents=constituents, charge=charge)

class TokenParserError(Exception):
"""Exception raised when the TokenParser hits a parsing error."""
Expand Down Expand Up @@ -643,6 +643,7 @@ def insert(self, dbf: Database, phase_name: str, As: List[str], Xs: List[str], e

def _species(el_chg):
el, chg = el_chg
el = el.upper()
name = rename_element_charge(el, chg)
constituents = dict(parse_chemical_formula(el)[0])
return v.Species(name, constituents=constituents, charge=chg)
Expand Down Expand Up @@ -794,7 +795,7 @@ def parse_header(toks: TokenParser) -> Header:
num_soln_phases = toks.parse(int)
list_soln_species_count = toks.parseN(num_soln_phases, int)
num_stoich_phases = toks.parse(int)
pure_elements = toks.parseN(num_pure_elements, str)
pure_elements = [x.upper() for x in toks.parseN(num_pure_elements, str)]
pure_elements_mass = toks.parseN(num_pure_elements, float)
num_gibbs_coeffs = toks.parse(int)
gibbs_coefficient_idxs = toks.parseN(num_gibbs_coeffs, int)
Expand Down Expand Up @@ -838,6 +839,20 @@ def parse_interval_heat_capacity(toks: TokenParser, num_gibbs_coeffs, H298, S298

def parse_endmember(toks: TokenParser, num_pure_elements, num_gibbs_coeffs, is_stoichiometric=False):
species_name = toks.parse(str)
# We are case-sensitive here, e.g., Co (cobalt) != CO (carbon monoxide)
# pycalphad's Species parser is not. We need to help that parser by leaving Co alone while converting CO to C1O1.
# We split the name on every capital letter. If the substring has no number at the end, add a '1'.
# Then we rejoin the string together and convert the species name to uppercase (the canonical form for Species).
possible_species = re.findall('.[^A-Z]*', species_name)
if len(possible_species) > 1:
rebuilt_species_name = []
for ps in possible_species:
if not (ps[-1].isdigit() or (ps[-1] in (')', ']'))):
rebuilt_species_name.append(ps+'1')
else:
rebuilt_species_name.append(ps)
species_name = ''.join(rebuilt_species_name)
species_name = species_name.upper()
if toks[0] == '#':
# special case for stoichiometric phases, this is a dummy species, skip it
_ = toks.parse(str)
Expand Down Expand Up @@ -1116,7 +1131,7 @@ def parse_phase_aqueous(toks, phase_name, phase_type, num_pure_elements, num_gib

def parse_phase(toks, num_pure_elements, num_gibbs_coeffs, num_excess_coeffs, num_const):
"""Dispatches to the correct parser depending on the phase type"""
phase_name = toks.parse(str)
phase_name = toks.parse(str).upper()
phase_type = toks.parse(str)
if phase_type in ('SUBQ', 'SUBG'):
phase = parse_phase_subq(toks, phase_name, phase_type, num_pure_elements, num_gibbs_coeffs, num_excess_coeffs)
Expand Down Expand Up @@ -1170,7 +1185,7 @@ def read_cs_dat(dbf: Database, fd):
fd : file-like
File descriptor.
"""
header, solution_phases, stoichiometric_phases = parse_cs_dat(fd.read().upper())
header, solution_phases, stoichiometric_phases = parse_cs_dat(fd.read())
# add elements and their reference states
for el, mass in zip(header.pure_elements, header.pure_elements_mass):
if 'E(' not in str(el):
Expand Down
36 changes: 36 additions & 0 deletions pycalphad/tests/databases/issue425.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
System Cu-Ni-Co-Fe-Mn-Sc-Ca-K-Cl-S-P-Si-Mg-O-N-C-B-H
18 1 2 0
Cu Ni Co
Fe Mn Sc
Ca K Cl
S P Si
Mg O N
C B H
63.54600000 58.69340000 58.93319500
55.84500000 54.93804500 44.95591200
40.07800000 39.09830000 35.45300000
32.06500000 30.97376200 28.08550000
24.30500000 15.99940000 14.00670000
12.01070000 10.81100000 1.00794000
6 1 2 3 4 5 6
6 1 2 3 4 5 6
gas_ideal
IDMX
CO
4 3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0
1700.0000 -203680.54 609.35468 -90.753584 0.31175713E-02
0.00000000 770627.67
2 32063.316 99.00 -10357.020 0.50
6000.0000 -224220.54 142.21490 -44.086320 0.00000000
0.00000000 5816543.2
2 20893.908 99.00 -2749.1074 0.50
6001.0000 -133600.03 72.371771 -38.372791 0.00000000
0.00000000 0.00000000
1 0.00000000 0.00
Co
4 1 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
6000.0000 421421.87 -3.5737662 -26.233680 0.00000000
0.00000000 0.00000000
1 0.00000000 0.00
6 changes: 5 additions & 1 deletion pycalphad/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,4 +860,8 @@ def test_database_symmetry_options_are_generated(load_database):
# The parameters should be filtered out when when writing such that a
# read/write is a no-op
read_dbf = Database.from_string(dbf.to_string(fmt="tdb"), fmt="tdb")
assert len(read_dbf._parameters) == 375
assert len(read_dbf._parameters) == 375

def test_dat_ambiguous_compound_names():
"Compound names in DAT files are case sensitive (gh-425)"
Database.from_file(files(pycalphad.tests.databases).joinpath("issue425.dat"), fmt='dat')