ml-evs · ml-evs · Jun 7, 2020 · Jun 7, 2020 · Jun 7, 2020 · Jun 7, 2020
@@ -88,6 +88,8 @@ def cif2dict(seed, **kwargs):
     doc['stoichiometry'] = _cif_disordered_stoichiometry(doc)
     doc['num_atoms'] = len(doc['positions_frac'])
 
+    if '_space_group_symop_operation_xyz' in doc['_cif'] and '_symmetry_equiv_pos_as_xyz' not in doc['_cif']:
+        doc["_cif"]["_symmetry_equiv_pos_as_xyz"] = doc["_cif"]["_space_group_symop_operation_xyz"]
     if '_symmetry_equiv_pos_as_xyz' in doc['_cif']:
         _cif_set_unreduced_sites(doc)
 
@@ -153,8 +155,6 @@ def _cif_parse_raw(flines):
     ind = 0
     cif_dict = dict()
     cif_dict['loops'] = list()
-    for line in flines:
-        line = line.strip()
     while ind < len(flines):
         jnd = 1
         line = flines[ind].strip()
@@ -179,38 +179,72 @@ def _cif_parse_raw(flines):
             for key in keys:
                 cif_dict[key] = []
             cif_dict['loops'].append(keys)
-            while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
-                data = []
+            while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd].strip()):
+                data = ''
                 # loop over line and next lines
-                while len(data) < len(keys) and ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
-                    # parse '' blocks out of strings
-                    raw = flines[ind+jnd].split()
-                    valid = False
-                    while not valid:
-                        valid = True
-                        for i, entry in enumerate(raw):
-                            if entry.startswith('\''):
-                                start = i
-                                valid = False
-                            elif entry.endswith('\''):
-                                end = i
-                                valid = False
-                        if not valid:
-                            raw = raw[:start] + [' '.join(raw[start:end+1]).replace('\'', '')] + raw[end+1:]
-                    data.extend(raw)
+                while ind + jnd < len(flines) and _cif_line_contains_data(flines[ind+jnd]):
+                    data += flines[ind+jnd]
                     jnd += 1
-                try:
-                    for index, datum in enumerate(data):
-                        cif_dict[keys[index]].append(datum)
-                except Exception:
-                    print('Failed to scrape one of {}'.format(keys))
-                    pass
+
+            loop_dict = _cif_parse_loop(keys, data)
+            cif_dict.update(loop_dict)
 
         ind += jnd
 
     return cif_dict
 
 
+def _cif_parse_loop(keys, data_block):
+    """ A hacky way to parse CIF data loops that can be split by quotes
+    or spaces. There must be a better way...
+
+    Parameters:
+        keys (list of str): list of keys for the loop.
+        data_block (str): raw string of the entire data block.
+
+    Returns:
+        Dict[str, str]: a dictionary with keys from ``keys``, containing the
+            data split by quotes and spaces. All data is left as
+            strings for further processing.
+
+    """
+
+    from collections import deque, defaultdict
+
+    dq = deque(data_block)
+    data_list = []
+    entry = None
+    in_quotes = False
+    while dq:
+        char = dq.popleft()
+        if not char.strip() and entry is None:
+            continue
+        elif (not char.strip() or char in [" ", ";"]) and entry is not None and not in_quotes:
+            data_list.append(entry.strip())
+            entry = None
+        elif not char.strip() and entry is not None and in_quotes:
+            entry += " "
+        elif char == "'" and entry and entry is not None:
+            in_quotes = False
+            data_list.append(entry.strip())
+            entry = None
+        elif char == "'" and entry is None:
+            entry = ''
+            in_quotes = True
+        else:
+            if entry is None:
+                entry = char
+            else:
+                entry += char
+
+    loop_dict = defaultdict(list)
+    for ind, entry in enumerate(data_list):
+        ind = ind % len(keys)
+        loop_dict[keys[ind]].append(entry)
+
+    return loop_dict
+
+
 def _cif_set_unreduced_sites(doc):
     """ Expands sites by symmetry operations found under the key
     `symemtry_equiv_pos_as_xyz` in the cif_dict.

@@ -20,6 +20,7 @@
     "bands2dict": ElectronicDispersion,
     "castep2dict": Crystal,
     "res2dict": Crystal,
+    "cif2dict": Crystal,
 }
 
 

@@ -78,23 +78,42 @@ def compute_pxrd(**kwargs):
         for doc in strucs:
             doc.pxrd.save_peaks(doc.root_source + '_pxrd_peaks.dat')
 
+    if kwargs.get('save_res'):
+        from matador.export import doc2res
+        for doc in strucs:
+            doc2res(doc, doc.root_source + '.res', info=False)
+
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description="Compute, plot and export PXRD patterns from CIF file inputs.",
         epilog=script_epilog
     )
-    parser.add_argument('-l', '--wavelength', type=float, default=1.5406)
-    parser.add_argument('-bw', '--broadening_width', type=float, default=0.03)
-    parser.add_argument('-tm', '--theta_m', type=float, default=0.0)
-    parser.add_argument('--plot', action='store_true', help='show a plot of the PXRD patterns')
-    parser.add_argument('--savefig', type=str, help='save a plot to this file, e.g. "pxrd.pdf"')
-    parser.add_argument('-t', '--two_theta_range', nargs=2, type=float)
-    parser.add_argument('--spg_labels', action='store_true', help='label with spacegroup-formula instead of filename')
-    parser.add_argument('--save_patterns', action='store_true', help='save a .dat file with the xy pattern for each structure')
-    parser.add_argument('--save_peaks', action='store_true', help='save a .txt file per structure with a list of peaks')
-    parser.add_argument('--rugplot', action='store_true')
-    parser.add_argument('seeds', nargs='+', type=str, help='list of structures to compute')
+    parser.add_argument('-l', '--wavelength', type=float, default=1.5406,
+                        help='the incident X-ray wavelength in Angstrom (DEFAULT: 1.506, i.e. CuKa)')
+    parser.add_argument('-bw', '--broadening_width', type=float, default=0.03,
+                        help='the width of broadening to apply to each peak')
+    parser.add_argument('-tm', '--theta_m', type=float, default=0.0,
+                        help='the monochromator angle n degrees (DEFAULT: 0 degrees)')
+    parser.add_argument('--plot', action='store_true',
+                        help='show a plot of the PXRD patterns')
+    parser.add_argument('--savefig', type=str,
+                        help='save a plot to this file, e.g. "pxrd.pdf"')
+    parser.add_argument('-t', '--two_theta_range', nargs=2, type=float,
+                        help="the two theta range to use for plotting/calculating the pattern (DEFAULT: 10 80)")
+    parser.add_argument('--spg_labels', action='store_true',
+                        help='label with computed spacegroup-formula instead of filename')
+    parser.add_argument('--save_res', action='store_true',
+                        help='save a res file with a closer interpretation of the structure used')
+    parser.add_argument('--save_patterns', action='store_true',
+                        help='save a .dat file with the xy pattern for each structure')
+    parser.add_argument('--save_peaks', action='store_true',
+                        help='save a .txt file per structure with a list of peaks')
+    parser.add_argument('--rugplot', action='store_true',
+                        help="additionally plot peak positions as a rug plot")
+    parser.add_argument('seeds', nargs='+', type=str,
+                        help='list of structures to compute')
+
     parsed_kwargs = vars(parser.parse_args())
     compute_pxrd(**parsed_kwargs)
     print('Done!')