Merge pull request #2157 from htz1992213/master

Support combining data with multiple mol-id
materialsproject · Jun 1, 2021 · 662a6e3 · 662a6e3
2 parents 37da271 + 3a90f50
commit 662a6e3
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 17 deletions.
diff --git a/pymatgen/analysis/elasticity/stress.py b/pymatgen/analysis/elasticity/stress.py
@@ -9,7 +9,6 @@
 """
 
 import math
-import warnings
 
 import numpy as np
 
@@ -80,7 +79,7 @@ def deviator_stress(self):
         returns the deviatoric component of the stress
         """
         if not self.is_symmetric:
-            raise warnings.warn("The stress tensor is not symmetric, " "so deviator stress will not be either")
+            raise ValueError("The stress tensor is not symmetric, so deviator stress will not be either")
         return self - self.mean_stress * np.eye(3)
 
     def piola_kirchoff_1(self, def_grad):

diff --git a/pymatgen/entries/compatibility.py b/pymatgen/entries/compatibility.py
@@ -565,7 +565,7 @@ def process_entries(self, entries: Union[ComputedEntry, list], clean: bool = Tru
             # get the energy adjustments
             try:
                 adjustments = self.get_adjustments(entry)
-            except CompatibilityError as exc:
+            except CompatibilityError:
                 ignore_entry = True
                 continue
 

diff --git a/pymatgen/io/lammps/data.py b/pymatgen/io/lammps/data.py
@@ -45,10 +45,10 @@
 
 __author__ = "Kiran Mathew, Zhi Deng, Tingzheng Hou"
 __copyright__ = "Copyright 2018, The Materials Virtual Lab"
-__version__ = "1.0"
-__maintainer__ = "Zhi Deng"
-__email__ = "z4deng@eng.ucsd.edu"
-__date__ = "Aug 1, 2018"
+__version__ = "2.0"
+__maintainer__ = "Tingzheng Hou"
+__email__ = "tingzheng_hou@berkeley.edu"
+__date__ = "May 29, 2021"
 
 MODULE_DIR = Path(__file__).resolve().parent
 
@@ -1290,9 +1290,12 @@ def __init__(
     ):
         """
         Args:
-            list_of_molecules: a list of LammpsData of a single cluster.
-            list_of_names: a list of name for each cluster.
-            list_of_numbers: a list of Integer for counts of each molecule
+            list_of_molecules: A list of LammpsData objects of a chemical cluster.
+                 Each LammpsData object (cluster) may contain one or more molecule ID.
+            list_of_names: A list of name (string) for each cluster. The characters in each name are
+                restricted to word characters ([a-zA-Z0-9_]). If names with any non-word characters
+                are passed in, the special characters will be substituted by '_'.
+            list_of_numbers: A list of Integer for counts of each molecule
                 coordinates (pandas.DataFrame): DataFrame with with four
                 columns ["atom", "x", "y", "z"] for coordinates of atoms.
             atom_style (str): Output atom_style. Default to "full".
@@ -1304,7 +1307,9 @@ def __init__(
         self.box = LammpsBox(np.array(3 * [[min_xyz - 0.5, max_xyz + 0.5]]))
         self.atom_style = atom_style
         self.n = sum(list_of_numbers)
-        self.names = list_of_names
+        self.names = list()
+        for name in list_of_names:
+            self.names.append("_".join(re.findall(r"\w+", name)))
         self.mols = list_of_molecules
         self.nums = list_of_numbers
         self.masses = pd.concat([mol.masses.copy() for mol in self.mols], ignore_index=True)
@@ -1322,15 +1327,18 @@ def __init__(
         self.atoms = pd.DataFrame()
         mol_count = 0
         type_count = 0
+        self.mols_per_data = list()
         for i, mol in enumerate(self.mols):
             atoms_df = mol.atoms.copy()
             atoms_df["molecule-ID"] += mol_count
             atoms_df["type"] += type_count
+            mols_in_data = len(atoms_df["molecule-ID"].unique())
+            self.mols_per_data.append(mols_in_data)
             for j in range(self.nums[i]):
                 self.atoms = self.atoms.append(atoms_df, ignore_index=True)
-                atoms_df["molecule-ID"] += 1
+                atoms_df["molecule-ID"] += mols_in_data
             type_count += len(mol.masses)
-            mol_count += self.nums[i]
+            mol_count += self.nums[i] * mols_in_data
         self.atoms.index += 1
         assert len(self.atoms) == len(coordinates), "Wrong number of coordinates."
         self.atoms.update(coordinates)
@@ -1392,7 +1400,7 @@ def from_files(cls, coordinate_file, list_of_numbers, *filenames):
             coordinate_file (str): The filename of xyz coordinates.
             list_of_numbers (list): A list of numbers specifying counts for each
                 clusters parsed from files.
-            filenames (str): A series of filenames in string format.
+            filenames (str): A series of LAMMPS data filenames in string format.
         """
         names = []
         mols = []
@@ -1414,7 +1422,8 @@ def from_lammpsdata(cls, mols, names, list_of_numbers, coordinates, atom_style=N
         The input LammpsData objects are used non-destructively.
 
         Args:
-            mols: a list of LammpsData of a single cluster.
+            mols: a list of LammpsData of a chemical cluster.Each LammpsData object (cluster)
+                may contain one or more molecule ID.
             names: a list of name for each cluster.
             list_of_numbers: a list of Integer for counts of each molecule
                 coordinates (pandas.DataFrame): DataFrame with with four
@@ -1434,7 +1443,11 @@ def from_lammpsdata(cls, mols, names, list_of_numbers, coordinates, atom_style=N
     def get_string(self, distance=6, velocity=8, charge=4):
         """
         Returns the string representation of CombinedData, essentially
-        the string to be written to a file. Combination info is included.
+        the string to be written to a file. Combination info is included
+        as a comment. For single molecule ID data, the info format is:
+            num name
+        For data with multiple molecule ID, the format is:
+            num(mols_per_data) name
 
         Args:
             distance (int): No. of significant figures to output for
@@ -1449,7 +1462,10 @@ def get_string(self, distance=6, velocity=8, charge=4):
             String representation
         """
         lines = LammpsData.get_string(self, distance, velocity, charge).splitlines()
-        info = "# " + " + ".join(str(a) + " " + b for a, b in zip(self.nums, self.names))
+        info = "# " + " + ".join(
+            (str(a) + " " + b) if c == 1 else (str(a) + "(" + str(c) + ") " + b)
+            for a, b, c in zip(self.nums, self.names, self.mols_per_data)
+        )
         lines.insert(1, info)
         return "\n".join(lines)
 

diff --git a/pymatgen/io/lammps/tests/test_data.py b/pymatgen/io/lammps/tests/test_data.py
@@ -867,6 +867,9 @@ def setUpClass(cls):
         )
         cls.ec_fec2 = CombinedData.from_lammpsdata([cls.ec, cls.fec], ["EC", "FEC"], [1200, 300], cls.coord)
         cls.ec_fec_ld = cls.ec_fec1.as_lammpsdata()
+        cls.double_coord = pd.concat([cls.coord, cls.coord], ignore_index=True)
+        cls.double_coord.index += 1
+        cls.ec_fec3 = CombinedData.from_lammpsdata([cls.ec_fec_ld], ["EC FEC"], [2], cls.double_coord)
 
     def test_from_files(self):
         # general tests
@@ -1004,8 +1007,10 @@ def test_from_lammpsdata(self):
     def test_get_string(self):
         # general tests
         ec_fec_lines = self.ec_fec1.get_string().splitlines()
+        ec_fec_double_lines = self.ec_fec3.get_string().splitlines()
         # header information
         self.assertEqual(ec_fec_lines[1], "# 1200 cluster1 + 300 cluster2")
+        self.assertEqual(ec_fec_double_lines[1], "# 2(1500) EC_FEC")
         # data type consistency tests
         self.assertEqual(ec_fec_lines[98], "1  harmonic 3.200000000 -1 2")
         self.assertEqual(ec_fec_lines[109], "12  charmm 2.700000000 2 180 0.0")
@@ -1014,7 +1019,19 @@ def test_get_string(self):
             "16  multi/harmonic 0.382999522 -1.148998570 0.000000000 1.531998090 0.000000000",
         )
         self.assertEqual(ec_fec_lines[141], "1  10.5 -1  2")
+        self.assertEqual(ec_fec_double_lines[98], "1  harmonic 3.200000000 -1 2")
+        self.assertEqual(ec_fec_double_lines[109], "12  charmm 2.700000000 2 180 0.0")
+        self.assertEqual(
+            ec_fec_double_lines[113],
+            "16  multi/harmonic 0.382999522 -1.148998570 0.000000000 1.531998090 0.000000000",
+        )
+        self.assertEqual(
+            ec_fec_double_lines[30146],
+            "30000  3000  12 -0.2329  4.630985  7.328547 51.604678",
+        )
+        self.assertEqual(ec_fec_double_lines[141], "1  10.5 -1  2")
         self.assertEqual(len(ec_fec_lines), 99159)
+        self.assertEqual(len(ec_fec_double_lines), 198159)
 
     def test_as_lammpsdata(self):
         ec_fec = self.ec_fec_ld