pyiron · jan-janssen · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
@@ -2,34 +2,21 @@
 
 import os
 import warnings
-from dataclasses import asdict, dataclass, field
-from io import StringIO
 from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from ase.atoms import Atoms
 
+from pyiron_lammps.output_raw import (
+    parse_raw_dump_from_h5md,
+    parse_raw_dump_from_text,
+    parse_raw_lammps_log,
+)
 from pyiron_lammps.structure import UnfoldingPrism
 from pyiron_lammps.units import UnitConverter
 
 
-@dataclass
-class DumpData:
-    steps: List = field(default_factory=lambda: [])
-    natoms: List = field(default_factory=lambda: [])
-    cells: List = field(default_factory=lambda: [])
-    indices: List = field(default_factory=lambda: [])
-    forces: List = field(default_factory=lambda: [])
-    mean_forces: List = field(default_factory=lambda: [])
-    velocities: List = field(default_factory=lambda: [])
-    mean_velocities: List = field(default_factory=lambda: [])
-    unwrapped_positions: List = field(default_factory=lambda: [])
-    mean_unwrapped_positions: List = field(default_factory=lambda: [])
-    positions: List = field(default_factory=lambda: [])
-    computes: Dict = field(default_factory=lambda: {})
-
-
 def remap_indices_ase(
     lammps_indices: Union[np.ndarray, List],
     potential_elements: Union[np.ndarray, List],
@@ -135,45 +122,6 @@ def parse_lammps_output(
     return hdf_output
 
 
-def to_amat(l_list: Union[np.ndarray, List]) -> List:
-    lst = np.reshape(l_list, -1)
-    if len(lst) == 9:
-        (
-            xlo_bound,
-            xhi_bound,
-            xy,
-            ylo_bound,
-            yhi_bound,
-            xz,
-            zlo_bound,
-            zhi_bound,
-            yz,
-        ) = lst
-
-    elif len(lst) == 6:
-        xlo_bound, xhi_bound, ylo_bound, yhi_bound, zlo_bound, zhi_bound = lst
-        xy, xz, yz = 0.0, 0.0, 0.0
-    else:
-        raise ValueError("This format for amat not yet implemented: " + str(len(lst)))
-
-    # > xhi_bound - xlo_bound = xhi -xlo  + MAX(0.0, xy, xz, xy + xz) - MIN(0.0, xy, xz, xy + xz)
-    # > xhili = xhi -xlo   = xhi_bound - xlo_bound - MAX(0.0, xy, xz, xy + xz) + MIN(0.0, xy, xz, xy + xz)
-    xhilo = (
-        (xhi_bound - xlo_bound)
-        - max([0.0, xy, xz, xy + xz])
-        + min([0.0, xy, xz, xy + xz])
-    )
-
-    # > yhilo = yhi -ylo = yhi_bound -ylo_bound - MAX(0.0, yz) + MIN(0.0, yz)
-    yhilo = (yhi_bound - ylo_bound) - max([0.0, yz]) + min([0.0, yz])
-
-    # > zhi - zlo = zhi_bound- zlo_bound
-    zhilo = zhi_bound - zlo_bound
-
-    cell = [[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]]
-    return cell
-
-
 def _parse_dump(
     dump_h5_full_file_name: str,
     dump_out_full_file_name: str,
@@ -183,9 +131,12 @@ def _parse_dump(
     remap_indices_funct: callable = remap_indices_ase,
 ) -> Dict:
     if os.path.isfile(dump_h5_full_file_name):
-        return _collect_dump_from_h5md(
+        if not _check_ortho_prism(prism=prism):
+            raise RuntimeError(
+                "The Lammps output will not be mapped back to pyiron correctly."
+            )
+        return parse_raw_dump_from_h5md(
             file_name=dump_h5_full_file_name,
-            prism=prism,
         )
     elif os.path.exists(dump_out_full_file_name):
         return _collect_dump_from_text(
@@ -201,31 +152,6 @@ def _parse_dump(
         )
 
 
-def _collect_dump_from_h5md(file_name: str, prism: UnfoldingPrism) -> Dict:
-    import h5py
-
-    if not _check_ortho_prism(prism=prism):
-        raise RuntimeError(
-            "The Lammps output will not be mapped back to pyiron correctly."
-        )
-
-    with h5py.File(file_name, mode="r", libver="latest", swmr=True) as h5md:
-        positions = [pos_i.tolist() for pos_i in h5md["/particles/all/position/value"]]
-        steps = [steps_i.tolist() for steps_i in h5md["/particles/all/position/step"]]
-        forces = [for_i.tolist() for for_i in h5md["/particles/all/force/value"]]
-        # following the explanation at: http://nongnu.org/h5md/h5md.html
-        cell = [
-            np.eye(3) * np.array(cell_i.tolist())
-            for cell_i in h5md["/particles/all/box/edges/value"]
-        ]
-    return {
-        "forces": forces,
-        "positions": positions,
-        "steps": steps,
-        "cells": cell,
-    }
-
-
 def _collect_dump_from_text(
     file_name: str,
     prism: UnfoldingPrism,
@@ -237,136 +163,36 @@ def _collect_dump_from_text(
     general purpose routine to extract static from a lammps dump file
     """
     rotation_lammps2orig = prism.R.T
-    with open(file_name, "r") as f:
-        dump = DumpData()
-
-        for line in f:
-            if "ITEM: TIMESTEP" in line:
-                dump.steps.append(int(f.readline()))
-
-            elif "ITEM: BOX BOUNDS" in line:
-                c1 = np.fromstring(f.readline(), dtype=float, sep=" ")
-                c2 = np.fromstring(f.readline(), dtype=float, sep=" ")
-                c3 = np.fromstring(f.readline(), dtype=float, sep=" ")
-                cell = np.concatenate([c1, c2, c3])
-                lammps_cell = to_amat(cell)
-                unfolded_cell = prism.unfold_cell(lammps_cell)
-                dump.cells.append(unfolded_cell)
-
-            elif "ITEM: NUMBER OF ATOMS" in line:
-                n = int(f.readline())
-                dump.natoms.append(n)
-
-            elif "ITEM: ATOMS" in line:
-                # get column names from line
-                columns = line.lstrip("ITEM: ATOMS").split()
-
-                # Read line by line of snapshot into a string buffer
-                # Than parse using pandas for speed and column acces
-                buf = StringIO()
-                for _ in range(n):
-                    buf.write(f.readline())
-                buf.seek(0)
-                df = pd.read_csv(
-                    buf,
-                    nrows=n,
-                    sep="\\s+",
-                    header=None,
-                    names=columns,
-                    engine="c",
-                )
-                df.sort_values(by="id", ignore_index=True, inplace=True)
-                # Coordinate transform lammps->pyiron
-                dump.indices.append(
-                    remap_indices_funct(
-                        lammps_indices=df["type"].array.astype(int),
-                        potential_elements=potential_elements,
-                        structure=structure,
-                    )
-                )
-
-                force = np.stack(
-                    [df["fx"].array, df["fy"].array, df["fz"].array], axis=1
+    dump_lammps_dict = parse_raw_dump_from_text(file_name=file_name)
+    dump_dict = {}
+    for key, val in dump_lammps_dict.items():
+        if key in ["cells"]:
+            dump_dict[key] = [prism.unfold_cell(cell=cell) for cell in val]
+        elif key in ["indices"]:
+            dump_dict[key] = [
+                remap_indices_funct(
+                    lammps_indices=indices,
+                    potential_elements=potential_elements,
+                    structure=structure,
                 )
-                dump.forces.append(np.matmul(force, rotation_lammps2orig))
-                if "f_mean_forces[1]" in columns:
-                    force = np.stack(
-                        [
-                            df["f_mean_forces[1]"].array,
-                            df["f_mean_forces[2]"].array,
-                            df["f_mean_forces[3]"].array,
-                        ],
-                        axis=1,
-                    )
-                    dump.mean_forces.append(np.matmul(force, rotation_lammps2orig))
-                if "vx" in columns and "vy" in columns and "vz" in columns:
-                    v = np.stack(
-                        [
-                            df["vx"].array,
-                            df["vy"].array,
-                            df["vz"].array,
-                        ],
-                        axis=1,
-                    )
-                    dump.velocities.append(np.matmul(v, rotation_lammps2orig))
-
-                if "f_mean_velocities[1]" in columns:
-                    v = np.stack(
-                        [
-                            df["f_mean_velocities[1]"].array,
-                            df["f_mean_velocities[2]"].array,
-                            df["f_mean_velocities[3]"].array,
-                        ],
-                        axis=1,
-                    )
-                    dump.mean_velocities.append(np.matmul(v, rotation_lammps2orig))
-
-                if "xsu" in columns:
-                    direct_unwrapped_positions = np.stack(
-                        [
-                            df["xsu"].array,
-                            df["ysu"].array,
-                            df["zsu"].array,
-                        ],
-                        axis=1,
-                    )
-                    dump.unwrapped_positions.append(
-                        np.matmul(
-                            np.matmul(direct_unwrapped_positions, lammps_cell),
-                            rotation_lammps2orig,
-                        )
-                    )
-
-                    direct_positions = direct_unwrapped_positions - np.floor(
-                        direct_unwrapped_positions
-                    )
-                    dump.positions.append(
-                        np.matmul(
-                            np.matmul(direct_positions, lammps_cell),
-                            rotation_lammps2orig,
-                        )
-                    )
-
-                if "f_mean_positions[1]" in columns:
-                    pos = np.stack(
-                        [
-                            df["f_mean_positions[1]"].array,
-                            df["f_mean_positions[2]"].array,
-                            df["f_mean_positions[3]"].array,
-                        ],
-                        axis=1,
-                    )
-                    dump.mean_unwrapped_positions.append(
-                        np.matmul(pos, rotation_lammps2orig)
-                    )
-                for k in columns:
-                    if k.startswith("c_"):
-                        kk = k.replace("c_", "")
-                        if kk not in dump.computes.keys():
-                            dump.computes[kk] = []
-                        dump.computes[kk].append(df[k].array)
-
-        return asdict(dump)
+                for indices in val
+            ]
+        elif key in [
+            "forces",
+            "mean_forces",
+            "velocities",
+            "mean_velocities",
+            "mean_unwrapped_positions",
+        ]:
+            dump_dict[key] = [np.matmul(v, rotation_lammps2orig) for v in val]
+        elif key in ["positions", "unwrapped_positions"]:
+            dump_dict[key] = [
+                np.matmul(np.matmul(v, lammps_cell), rotation_lammps2orig)
+                for v, lammps_cell in zip(val, dump_lammps_dict["cells"])
+            ]
+        else:
+            dump_dict[key] = val
+    return dump_dict
 
 
 def _parse_log(
@@ -405,44 +231,18 @@ def _collect_output_log(
     """
     general purpose routine to extract static from a lammps log file
     """
-    with open(file_name, "r") as f:
-        dfs = []
-        read_thermo = False
-        for l in f:
-            l = l.lstrip()
-
-            if l.startswith("Step"):
-                thermo_lines = ""
-                read_thermo = True
-
-            if read_thermo:
-                if l.startswith("Loop") or l.startswith("ERROR"):
-                    read_thermo = False
-                    dfs.append(
-                        pd.read_csv(StringIO(thermo_lines), sep="\\s+", engine="c")
-                    )
-
-                elif l.startswith("WARNING:"):
-                    warnings.warn(f"A warning was found in the log:\n{l}")
-
-                else:
-                    thermo_lines += l
+    df = parse_raw_lammps_log(file_name=file_name)
 
     h5_dict = {
         "Step": "steps",
         "Temp": "temperature",
         "PotEng": "energy_pot",
         "TotEng": "energy_tot",
         "Volume": "volume",
+        "LogStep": "LogStep",
     }
-    if len(dfs) == 1:
-        df = dfs[0]
-    else:
-        h5_dict["LogStep"] = "LogStep"
-        for i in range(len(dfs)):
-            df = dfs[i]
-            df["LogStep"] = np.ones(len(df)) * i
-        df = pd.concat(dfs, ignore_index=True)
+    if "LogStep" not in df.columns:
+        del h5_dict["LogStep"]
 
     for key in df.columns[df.columns.str.startswith("f_mean")]:
         h5_dict[key] = key.replace("f_", "")