Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CIF parser for awkward linebreaks #61

Merged
merged 4 commits into from
Jun 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
86 changes: 60 additions & 26 deletions matador/scrapers/cif_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def cif2dict(seed, **kwargs):
doc['stoichiometry'] = _cif_disordered_stoichiometry(doc)
doc['num_atoms'] = len(doc['positions_frac'])

if '_space_group_symop_operation_xyz' in doc['_cif'] and '_symmetry_equiv_pos_as_xyz' not in doc['_cif']:
doc["_cif"]["_symmetry_equiv_pos_as_xyz"] = doc["_cif"]["_space_group_symop_operation_xyz"]
if '_symmetry_equiv_pos_as_xyz' in doc['_cif']:
_cif_set_unreduced_sites(doc)

Expand Down Expand Up @@ -153,8 +155,6 @@ def _cif_parse_raw(flines):
ind = 0
cif_dict = dict()
cif_dict['loops'] = list()
for line in flines:
line = line.strip()
while ind < len(flines):
jnd = 1
line = flines[ind].strip()
Expand All @@ -179,38 +179,72 @@ def _cif_parse_raw(flines):
for key in keys:
cif_dict[key] = []
cif_dict['loops'].append(keys)
while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
data = []
while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd].strip()):
data = ''
# loop over line and next lines
while len(data) < len(keys) and ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
# parse '' blocks out of strings
raw = flines[ind+jnd].split()
valid = False
while not valid:
valid = True
for i, entry in enumerate(raw):
if entry.startswith('\''):
start = i
valid = False
elif entry.endswith('\''):
end = i
valid = False
if not valid:
raw = raw[:start] + [' '.join(raw[start:end+1]).replace('\'', '')] + raw[end+1:]
data.extend(raw)
while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
data += flines[ind+jnd]
jnd += 1
try:
for index, datum in enumerate(data):
cif_dict[keys[index]].append(datum)
except Exception:
print('Failed to scrape one of {}'.format(keys))
pass

loop_dict = _cif_parse_loop(keys, data)
cif_dict.update(loop_dict)

ind += jnd

return cif_dict


def _cif_parse_loop(keys, data_block):
""" A hacky way to parse CIF data loops that can be split by quotes
or spaces. There must be a better way...

Parameters:
keys (list of str): list of keys for the loop.
data_block (str): raw string of the entire data block.

Returns:
Dict[str, str]: a dictionary with keys from ``keys``, containing the
data split by quotes and spaces. All data is left as
strings for further processing.

"""

from collections import deque, defaultdict

dq = deque(data_block)
data_list = []
entry = None
in_quotes = False
while dq:
char = dq.popleft()
if not char.strip() and entry is None:
continue
elif (not char.strip() or char in [" ", ";"]) and entry is not None and not in_quotes:
data_list.append(entry.strip())
entry = None
elif not char.strip() and entry is not None and in_quotes:
entry += " "
elif char == "'" and entry and entry is not None:
in_quotes = False
data_list.append(entry.strip())
entry = None
elif char == "'" and entry is None:
entry = ''
in_quotes = True
else:
if entry is None:
entry = char
else:
entry += char

loop_dict = defaultdict(list)
for ind, entry in enumerate(data_list):
ind = ind % len(keys)
loop_dict[keys[ind]].append(entry)

return loop_dict


def _cif_set_unreduced_sites(doc):
""" Expands sites by symmetry operations found under the key
`symemtry_equiv_pos_as_xyz` in the cif_dict.
Expand Down
1 change: 1 addition & 0 deletions matador/scrapers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"bands2dict": ElectronicDispersion,
"castep2dict": Crystal,
"res2dict": Crystal,
"cif2dict": Crystal,
}


Expand Down
41 changes: 30 additions & 11 deletions scripts/pxrd_calculator
Original file line number Diff line number Diff line change
Expand Up @@ -78,23 +78,42 @@ def compute_pxrd(**kwargs):
for doc in strucs:
doc.pxrd.save_peaks(doc.root_source + '_pxrd_peaks.dat')

if kwargs.get('save_res'):
from matador.export import doc2res
for doc in strucs:
doc2res(doc, doc.root_source + '.res', info=False)


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Compute, plot and export PXRD patterns from CIF file inputs.",
epilog=script_epilog
)
parser.add_argument('-l', '--wavelength', type=float, default=1.5406)
parser.add_argument('-bw', '--broadening_width', type=float, default=0.03)
parser.add_argument('-tm', '--theta_m', type=float, default=0.0)
parser.add_argument('--plot', action='store_true', help='show a plot of the PXRD patterns')
parser.add_argument('--savefig', type=str, help='save a plot to this file, e.g. "pxrd.pdf"')
parser.add_argument('-t', '--two_theta_range', nargs=2, type=float)
parser.add_argument('--spg_labels', action='store_true', help='label with spacegroup-formula instead of filename')
parser.add_argument('--save_patterns', action='store_true', help='save a .dat file with the xy pattern for each structure')
parser.add_argument('--save_peaks', action='store_true', help='save a .txt file per structure with a list of peaks')
parser.add_argument('--rugplot', action='store_true')
parser.add_argument('seeds', nargs='+', type=str, help='list of structures to compute')
parser.add_argument('-l', '--wavelength', type=float, default=1.5406,
help='the incident X-ray wavelength in Angstrom (DEFAULT: 1.506, i.e. CuKa)')
parser.add_argument('-bw', '--broadening_width', type=float, default=0.03,
help='the width of broadening to apply to each peak')
parser.add_argument('-tm', '--theta_m', type=float, default=0.0,
help='the monochromator angle n degrees (DEFAULT: 0 degrees)')
parser.add_argument('--plot', action='store_true',
help='show a plot of the PXRD patterns')
parser.add_argument('--savefig', type=str,
help='save a plot to this file, e.g. "pxrd.pdf"')
parser.add_argument('-t', '--two_theta_range', nargs=2, type=float,
help="the two theta range to use for plotting/calculating the pattern (DEFAULT: 10 80)")
parser.add_argument('--spg_labels', action='store_true',
help='label with computed spacegroup-formula instead of filename')
parser.add_argument('--save_res', action='store_true',
help='save a res file with a closer interpretation of the structure used')
parser.add_argument('--save_patterns', action='store_true',
help='save a .dat file with the xy pattern for each structure')
parser.add_argument('--save_peaks', action='store_true',
help='save a .txt file per structure with a list of peaks')
parser.add_argument('--rugplot', action='store_true',
help="additionally plot peak positions as a rug plot")
parser.add_argument('seeds', nargs='+', type=str,
help='list of structures to compute')

parsed_kwargs = vars(parser.parse_args())
compute_pxrd(**parsed_kwargs)
print('Done!')