Skip to content

Commit

Permalink
Merge pull request #2157 from htz1992213/master
Browse files Browse the repository at this point in the history
Support combining data with multiple mol-id
  • Loading branch information
mkhorton committed Jun 1, 2021
2 parents 37da271 + 3a90f50 commit 662a6e3
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 17 deletions.
3 changes: 1 addition & 2 deletions pymatgen/analysis/elasticity/stress.py
Expand Up @@ -9,7 +9,6 @@
"""

import math
import warnings

import numpy as np

Expand Down Expand Up @@ -80,7 +79,7 @@ def deviator_stress(self):
returns the deviatoric component of the stress
"""
if not self.is_symmetric:
raise warnings.warn("The stress tensor is not symmetric, " "so deviator stress will not be either")
raise ValueError("The stress tensor is not symmetric, so deviator stress will not be either")
return self - self.mean_stress * np.eye(3)

def piola_kirchoff_1(self, def_grad):
Expand Down
2 changes: 1 addition & 1 deletion pymatgen/entries/compatibility.py
Expand Up @@ -565,7 +565,7 @@ def process_entries(self, entries: Union[ComputedEntry, list], clean: bool = Tru
# get the energy adjustments
try:
adjustments = self.get_adjustments(entry)
except CompatibilityError as exc:
except CompatibilityError:
ignore_entry = True
continue

Expand Down
44 changes: 30 additions & 14 deletions pymatgen/io/lammps/data.py
Expand Up @@ -45,10 +45,10 @@

__author__ = "Kiran Mathew, Zhi Deng, Tingzheng Hou"
__copyright__ = "Copyright 2018, The Materials Virtual Lab"
__version__ = "1.0"
__maintainer__ = "Zhi Deng"
__email__ = "z4deng@eng.ucsd.edu"
__date__ = "Aug 1, 2018"
__version__ = "2.0"
__maintainer__ = "Tingzheng Hou"
__email__ = "tingzheng_hou@berkeley.edu"
__date__ = "May 29, 2021"

MODULE_DIR = Path(__file__).resolve().parent

Expand Down Expand Up @@ -1290,9 +1290,12 @@ def __init__(
):
"""
Args:
list_of_molecules: a list of LammpsData of a single cluster.
list_of_names: a list of name for each cluster.
list_of_numbers: a list of Integer for counts of each molecule
list_of_molecules: A list of LammpsData objects of a chemical cluster.
Each LammpsData object (cluster) may contain one or more molecule ID.
list_of_names: A list of name (string) for each cluster. The characters in each name are
restricted to word characters ([a-zA-Z0-9_]). If names with any non-word characters
are passed in, the special characters will be substituted by '_'.
list_of_numbers: A list of Integer for counts of each molecule
coordinates (pandas.DataFrame): DataFrame with with four
columns ["atom", "x", "y", "z"] for coordinates of atoms.
atom_style (str): Output atom_style. Default to "full".
Expand All @@ -1304,7 +1307,9 @@ def __init__(
self.box = LammpsBox(np.array(3 * [[min_xyz - 0.5, max_xyz + 0.5]]))
self.atom_style = atom_style
self.n = sum(list_of_numbers)
self.names = list_of_names
self.names = list()
for name in list_of_names:
self.names.append("_".join(re.findall(r"\w+", name)))
self.mols = list_of_molecules
self.nums = list_of_numbers
self.masses = pd.concat([mol.masses.copy() for mol in self.mols], ignore_index=True)
Expand All @@ -1322,15 +1327,18 @@ def __init__(
self.atoms = pd.DataFrame()
mol_count = 0
type_count = 0
self.mols_per_data = list()
for i, mol in enumerate(self.mols):
atoms_df = mol.atoms.copy()
atoms_df["molecule-ID"] += mol_count
atoms_df["type"] += type_count
mols_in_data = len(atoms_df["molecule-ID"].unique())
self.mols_per_data.append(mols_in_data)
for j in range(self.nums[i]):
self.atoms = self.atoms.append(atoms_df, ignore_index=True)
atoms_df["molecule-ID"] += 1
atoms_df["molecule-ID"] += mols_in_data
type_count += len(mol.masses)
mol_count += self.nums[i]
mol_count += self.nums[i] * mols_in_data
self.atoms.index += 1
assert len(self.atoms) == len(coordinates), "Wrong number of coordinates."
self.atoms.update(coordinates)
Expand Down Expand Up @@ -1392,7 +1400,7 @@ def from_files(cls, coordinate_file, list_of_numbers, *filenames):
coordinate_file (str): The filename of xyz coordinates.
list_of_numbers (list): A list of numbers specifying counts for each
clusters parsed from files.
filenames (str): A series of filenames in string format.
filenames (str): A series of LAMMPS data filenames in string format.
"""
names = []
mols = []
Expand All @@ -1414,7 +1422,8 @@ def from_lammpsdata(cls, mols, names, list_of_numbers, coordinates, atom_style=N
The input LammpsData objects are used non-destructively.
Args:
mols: a list of LammpsData of a single cluster.
mols: a list of LammpsData of a chemical cluster.Each LammpsData object (cluster)
may contain one or more molecule ID.
names: a list of name for each cluster.
list_of_numbers: a list of Integer for counts of each molecule
coordinates (pandas.DataFrame): DataFrame with with four
Expand All @@ -1434,7 +1443,11 @@ def from_lammpsdata(cls, mols, names, list_of_numbers, coordinates, atom_style=N
def get_string(self, distance=6, velocity=8, charge=4):
"""
Returns the string representation of CombinedData, essentially
the string to be written to a file. Combination info is included.
the string to be written to a file. Combination info is included
as a comment. For single molecule ID data, the info format is:
num name
For data with multiple molecule ID, the format is:
num(mols_per_data) name
Args:
distance (int): No. of significant figures to output for
Expand All @@ -1449,7 +1462,10 @@ def get_string(self, distance=6, velocity=8, charge=4):
String representation
"""
lines = LammpsData.get_string(self, distance, velocity, charge).splitlines()
info = "# " + " + ".join(str(a) + " " + b for a, b in zip(self.nums, self.names))
info = "# " + " + ".join(
(str(a) + " " + b) if c == 1 else (str(a) + "(" + str(c) + ") " + b)
for a, b, c in zip(self.nums, self.names, self.mols_per_data)
)
lines.insert(1, info)
return "\n".join(lines)

Expand Down
17 changes: 17 additions & 0 deletions pymatgen/io/lammps/tests/test_data.py
Expand Up @@ -867,6 +867,9 @@ def setUpClass(cls):
)
cls.ec_fec2 = CombinedData.from_lammpsdata([cls.ec, cls.fec], ["EC", "FEC"], [1200, 300], cls.coord)
cls.ec_fec_ld = cls.ec_fec1.as_lammpsdata()
cls.double_coord = pd.concat([cls.coord, cls.coord], ignore_index=True)
cls.double_coord.index += 1
cls.ec_fec3 = CombinedData.from_lammpsdata([cls.ec_fec_ld], ["EC FEC"], [2], cls.double_coord)

def test_from_files(self):
# general tests
Expand Down Expand Up @@ -1004,8 +1007,10 @@ def test_from_lammpsdata(self):
def test_get_string(self):
# general tests
ec_fec_lines = self.ec_fec1.get_string().splitlines()
ec_fec_double_lines = self.ec_fec3.get_string().splitlines()
# header information
self.assertEqual(ec_fec_lines[1], "# 1200 cluster1 + 300 cluster2")
self.assertEqual(ec_fec_double_lines[1], "# 2(1500) EC_FEC")
# data type consistency tests
self.assertEqual(ec_fec_lines[98], "1 harmonic 3.200000000 -1 2")
self.assertEqual(ec_fec_lines[109], "12 charmm 2.700000000 2 180 0.0")
Expand All @@ -1014,7 +1019,19 @@ def test_get_string(self):
"16 multi/harmonic 0.382999522 -1.148998570 0.000000000 1.531998090 0.000000000",
)
self.assertEqual(ec_fec_lines[141], "1 10.5 -1 2")
self.assertEqual(ec_fec_double_lines[98], "1 harmonic 3.200000000 -1 2")
self.assertEqual(ec_fec_double_lines[109], "12 charmm 2.700000000 2 180 0.0")
self.assertEqual(
ec_fec_double_lines[113],
"16 multi/harmonic 0.382999522 -1.148998570 0.000000000 1.531998090 0.000000000",
)
self.assertEqual(
ec_fec_double_lines[30146],
"30000 3000 12 -0.2329 4.630985 7.328547 51.604678",
)
self.assertEqual(ec_fec_double_lines[141], "1 10.5 -1 2")
self.assertEqual(len(ec_fec_lines), 99159)
self.assertEqual(len(ec_fec_double_lines), 198159)

def test_as_lammpsdata(self):
ec_fec = self.ec_fec_ld
Expand Down

0 comments on commit 662a6e3

Please sign in to comment.