diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 89c5f922a..2426ce692 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -6,9 +6,9 @@ dependencies: - coverage - codacy-coverage - matplotlib =3.3.4 - - numpy =1.20 - - pyiron_base =0.1.46 - - pyiron_atomistics =0.2.3 + - numpy =1.20.1 + - pyiron_base =0.1.48 + - pyiron_atomistics =0.2.4 - scipy =1.6.0 - seaborn =0.11.1 - scikit-image =0.18.1 diff --git a/pyiron_contrib/__init__.py b/pyiron_contrib/__init__.py index ced7138f2..72c9ef781 100644 --- a/pyiron_contrib/__init__.py +++ b/pyiron_contrib/__init__.py @@ -34,6 +34,12 @@ JOB_CLASS_DICT['Fenics'] = 'pyiron_contrib.continuum.fenics.job.generic' JOB_CLASS_DICT['FenicsLinearElastic'] = 'pyiron_contrib.continuum.fenics.job.elastic' JOB_CLASS_DICT['TrainingContainer'] = 'pyiron_contrib.atomistic.atomistics.job.trainingcontainer' +JOB_CLASS_DICT['RandomDisMaster'] = 'pyiron_contrib.atomistic.mlip.masters' +JOB_CLASS_DICT['RandomMDMaster'] = 'pyiron_contrib.atomistic.mlip.masters' +JOB_CLASS_DICT['MlipSelect'] = 'pyiron_contrib.atomistic.mlip.mlipselect' +JOB_CLASS_DICT['Mlip'] = 'pyiron_contrib.atomistic.mlip.mlip' +JOB_CLASS_DICT['LammpsMlip'] = 'pyiron_contrib.atomistic.mlip.lammps' +JOB_CLASS_DICT['MlipJob'] = 'pyiron_contrib.atomistic.mlip.mlipjob' from ._version import get_versions diff --git a/pyiron_contrib/atomistic/atomistics/job/trainingcontainer.py b/pyiron_contrib/atomistic/atomistics/job/trainingcontainer.py index 3e87a9c0d..58b00b546 100644 --- a/pyiron_contrib/atomistic/atomistics/job/trainingcontainer.py +++ b/pyiron_contrib/atomistic/atomistics/job/trainingcontainer.py @@ -22,6 +22,7 @@ >>> container.include_dataset(df) You can retrieve the full database with :method:`~.TrainingContainer.to_pandas()` like this + >>> container.to_pandas() name atoms energy forces number_of_atoms Fe_bcc ... @@ -40,6 +41,7 @@ class TrainingContainer(GenericJob): def __init__(self, project, job_name): super().__init__(project=project, job_name=job_name) + self.__name__ = "TrainingContainer" self._table = pd.DataFrame({ "name": [], "atoms": [], @@ -85,6 +87,7 @@ def to_pandas(self): Export list of structure to pandas table for external fitting codes. The table contains the following columns: + - 'name': human-readable name of the structure - 'ase_atoms': the structure as a :class:`.Atoms` object - 'energy': the energy of the full structure - 'forces': the per atom forces as a :class:`numpy.ndarray`, shape Nx3 diff --git a/pyiron_contrib/atomistic/mlip/__init__.py b/pyiron_contrib/atomistic/mlip/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyiron_contrib/atomistic/mlip/cfgs.py b/pyiron_contrib/atomistic/mlip/cfgs.py new file mode 100644 index 000000000..e3786b551 --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/cfgs.py @@ -0,0 +1,148 @@ +# coding: utf-8 +# http://gitlab.skoltech.ru/shapeev/mlip-dev/blob/master/src/external/python/mlippy/cfgs.py + +from __future__ import print_function +import numpy as np + + +class Cfg: + pos = None + lat = None + types = None + energy = None + forces = None + stresses = None + desc = None + grade = None + + +def readcfg(f): + cfg = Cfg() + cfg.lat = np.zeros((3, 3)) + size = -1 + mode = -1 + line = f.readline() + while line: + line = line.upper() + line = line.strip() + if mode == 0: + if line.startswith('SIZE'): + line = f.readline() + size = int(line.strip()) + cfg.types = np.zeros(size) + cfg.pos = np.zeros((size, 3)) + elif line.startswith('SUPERCELL'): + line = f.readline() + vals = line.strip().split() + cfg.lat[0, :] = vals[0:3] + line = f.readline() + vals = line.strip().split() + cfg.lat[1, :] = vals[0:3] + line = f.readline() + vals = line.strip().split() + cfg.lat[2, :] = vals[0:3] + elif line.startswith('ATOMDATA'): + if line.endswith('FZ'): + cfg.forces = np.zeros((size, 3)) + for i in range(size): + line = f.readline() + vals = line.strip().split() + cfg.types[i] = vals[1] + cfg.pos[i, :] = vals[2:5] + if cfg.forces is not None: + cfg.forces[i, :] = vals[5:8] + elif line.startswith('ENERGY'): + line = f.readline() + cfg.energy = float(line.strip()) + elif line.startswith('PLUSSTRESS'): + line = f.readline() + vals = line.strip().split() + cfg.stresses = np.zeros(6) + cfg.stresses[:] = vals[0:6] + elif line.startswith('FEATURE MV_GRADE'): + cfg.grade = float(line.split()[-1]) + elif line.startswith('FEATURE PYIRON'): + cfg.desc = line.split()[-1] + if line.startswith('BEGIN_CFG'): + mode = 0 + elif line.startswith('END_CFG'): + break + line = f.readline() + return cfg + + +def savecfg(f, cfg, desc=None): + atstr1 = 'AtomData: id type cartes_x cartes_y cartes_z fx fy fz' + atstr2 = 'AtomData: id type cartes_x cartes_y cartes_z' + size = len(cfg.types) + print('BEGIN_CFG', file=f) + print('Size', file=f) + print(' %-d' % size, file=f) + if cfg.lat is not None: + print('SuperCell', file=f) + for i in range(3): + print(' %14f%14f%14f' + % (cfg.lat[i, 0], cfg.lat[i, 1], cfg.lat[i, 2]), file=f) + if cfg.forces is not None: + print(atstr1, file=f) + else: + print(atstr2, file=f) + for i in range(size): + if cfg.forces is not None: + print(' %4d %4d %14f%14f%14f %16.8e %16.8e %16.8e' % + (i+1, cfg.types[i], cfg.pos[i, 0], cfg.pos[i, 1], cfg.pos[i, 2], + cfg.forces[i, 0], cfg.forces[i, 1], cfg.forces[i, 2]), file=f) + else: + print(' %4d %4d %14f%14f%14f' % + (i+1, cfg.types[i], cfg.pos[i, 0], cfg.pos[i, 1], cfg.pos[i, 2]), + file=f) + if cfg.energy is not None: + print('Energy\t%14f' % cfg.energy, file=f) + if cfg.stresses is not None: + print('PlusStress: xx yy zz yz xz xy', file=f) + print(' %14f%14f%14f%14f%14f%14f' % + (cfg.stresses[0], cfg.stresses[1], cfg.stresses[2], + cfg.stresses[3], cfg.stresses[4], cfg.stresses[5]), file=f) + if desc is not None: + print('Feature from %s' % desc, file=f) + if cfg.desc is not None: + print('Feature %s' % cfg.desc, file=f) + print('END_CFG', file=f) + + +class cfgparser: + def __init__(self, file, max_cfgs=None): + self.cfgs = [] + self.file = file + self.max_cfgs = max_cfgs + + def __enter__(self): + while True: + if self.max_cfgs is not None and len(self.cfgs) == self.max_cfgs: + break + cfg = readcfg(self.file) + if cfg.types is not None: + self.cfgs.append(cfg) + else: + break + return self.cfgs + + def __exit__(self, *args): + self.cfgs = [] + + +def printcfg(cfg): + savecfg(None, cfg) + + +def loadcfgs(filename, max_cfgs=None): + with open(filename, 'r') as file: + with cfgparser(file, max_cfgs) as cfgs: + return cfgs + + +def savecfgs(filename, cfgs, desc=None): + with open(filename, 'w') as file: + for cfg in cfgs: + savecfg(file, cfg, desc) + print("", file=file) diff --git a/pyiron_contrib/atomistic/mlip/lammps.py b/pyiron_contrib/atomistic/mlip/lammps.py new file mode 100644 index 000000000..94ea25779 --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/lammps.py @@ -0,0 +1,126 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +import os +from pyiron.lammps.base import Input +from pyiron.lammps.interactive import LammpsInteractive +from pyiron_contrib.atomistic.mlip.mlip import read_cgfs +from pyiron_base import GenericParameters + +__author__ = "Jan Janssen" +__copyright__ = "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - " \ + "Computational Materials Design (CM) Department" +__version__ = "1.0" +__maintainer__ = "Jan Janssen" +__email__ = "janssen@mpie.de" +__status__ = "development" +__date__ = "Sep 1, 2018" + + +class LammpsMlip(LammpsInteractive): + def __init__(self, project, job_name): + super(LammpsMlip, self).__init__(project, job_name) + self.input = MlipInput() + self.__name__ = "LammpsMlip" + self.__version__ = None # Reset the version number to the executable is set automatically + self._executable = None + self._executable_activate() + + def set_input_to_read_only(self): + """ + This function enforces read-only mode for the input classes, but it has to be implement in the individual + classes. + """ + super(LammpsMlip, self).set_input_to_read_only() + self.input.mlip.read_only = True + + def write_input(self): + super(LammpsMlip, self).write_input() + if self.input.mlip['mlip:load-from'] == 'auto': + self.input.mlip['mlip:load-from'] = os.path.basename(self.potential['Filename'][0][0]) + self.input.mlip.write_file(file_name="mlip.ini", cwd=self.working_directory) + + def enable_active_learning(self): + self.input.mlip.load_string("""\ +abinitio void +mlip mtpr +mlip:load-from Trained.mtp_ +calculate-efs TRUE +fit FALSE +select TRUE +select:site-en-weight 0.0 +select:energy-weight 1.0 +select:force-weight 0.0 +select:stress-weight 0.0 +select:threshold-init 1e-5 +select:threshold 2.0 +select:threshold-swap 1.000001 +select:threshold-break 5.0 +select:save-selected selected.cfg +select:save-state selection.mvs +select:load-state state.mvs +select:efs-ignored FALSE +select:log selection.log +write-cfgs:skip 0 +log lotf.log""") + + def collect_output(self): + super(LammpsMlip, self).collect_output() + if 'select:save-selected' in self.input.mlip._dataset['Parameter']: + file_name = os.path.join(self.working_directory, self.input.mlip['select:save-selected']) + if os.path.exists(file_name): + cell, positions, forces, stress, energy, indicies, grades, jobids, timesteps = read_cgfs(file_name=file_name) + with self.project_hdf5.open("output/mlip") as hdf5_output: + hdf5_output['forces'] = forces + hdf5_output['energy_tot'] = energy + hdf5_output['pressures'] = stress + hdf5_output['cells'] = cell + hdf5_output['positions'] = positions + hdf5_output['indicies'] = indicies + + +class MlipInput(Input): + def __init__(self): + self.mlip = MlipParameter() + super(MlipInput, self).__init__() + + def to_hdf(self, hdf5): + """ + + Args: + hdf5: + Returns: + """ + with hdf5.open("input") as hdf5_input: + self.mlip.to_hdf(hdf5_input) + super(MlipInput, self).to_hdf(hdf5) + + def from_hdf(self, hdf5): + """ + + Args: + hdf5: + Returns: + """ + with hdf5.open("input") as hdf5_input: + self.mlip.from_hdf(hdf5_input) + super(MlipInput, self).from_hdf(hdf5) + + +class MlipParameter(GenericParameters): + def __init__(self, separator_char=' ', comment_char='#', table_name="mlip_inp"): + super(MlipParameter, self).__init__(separator_char=separator_char, comment_char=comment_char, table_name=table_name) + + def load_default(self, file_content=None): + if file_content is None: + file_content = '''\ +abinitio void +mlip mtpr +mlip:load-from auto +calculate-efs TRUE +fit FALSE +select FALSE +''' + self.load_string(file_content) + diff --git a/pyiron_contrib/atomistic/mlip/masters.py b/pyiron_contrib/atomistic/mlip/masters.py new file mode 100644 index 000000000..8de00480c --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/masters.py @@ -0,0 +1,126 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +import numpy as np +from pyiron_atomistics.atomistics.master.parallel import ParallelMaster +from pyiron_base import JobGenerator + +__author__ = "Jan Janssen" +__copyright__ = "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - " \ + "Computational Materials Design (CM) Department" +__version__ = "1.0" +__maintainer__ = "Jan Janssen" +__email__ = "janssen@mpie.de" +__status__ = "development" +__date__ = "Sep 1, 2018" + + +def random_displacement(basis, displacement=0.05): + basis_copy = basis.copy() + random_vec = (np.random.random([len(basis_copy), 3]) - 0.5) * displacement + basis_copy.positions += random_vec + return basis_copy + + +class DisplacementJobGenerator(JobGenerator): + @property + def parameter_list(self): + """ + + Returns: + (list) + """ + return list(range(self._job.input['num_points'])) + + @staticmethod + def job_name(parameter): + return "rsdis_job_" + str(parameter) + + def modify_job(self, job, parameter): + job.structure = random_displacement(basis=job.structure, displacement=float(self._job.input['displacement'])) + return job + + +class RandomDisMaster(ParallelMaster): + """ + + Args: + project: + job_name: + """ + + def __init__(self, project, job_name): + super(RandomDisMaster, self).__init__(project, job_name) + self.__name__ = 'RandomDisMaster' + self.__version__ = '0.0.1' + self.input['num_points'] = (100, 'number of points') + self.input['displacement'] = (0.05, 'displacement') + self._job_generator = DisplacementJobGenerator(self) + + def collect_output(self): + """ + + Returns: + + """ + pass + + +class RandomMDJobGenerator(JobGenerator): + @property + def parameter_list(self): + """ + + Returns: + (list) + """ + return list(enumerate([int(job_id) for job_id in self._job.input['job_ids'].split()])) + + @staticmethod + def job_name(parameter): + return "md_job_" + str(parameter[1]) + '_' + str(parameter[0]) + + def modify_job(self, job, parameter): + job.structure = self._job.project.load(parameter[1]).structure + job.server.accept_crash = True + return job + + +class RandomMDMaster(ParallelMaster): + """ + + Args: + project: + job_name: + """ + + def __init__(self, project, job_name): + super(RandomMDMaster, self).__init__(project, job_name) + self.__name__ = 'RandomMDMaster' + self.__version__ = '0.0.1' + self.input['num_points'] = (100, 'number of points') + self._job_generator = RandomMDJobGenerator(self) + + @property + def structure_job_id_lst(self): + return [int(job_id) for job_id in self.input['job_ids'].split()] + + @structure_job_id_lst.setter + def structure_job_id_lst(self, job_id_lst): + self.input['job_ids'] = ' '.join([str(job_id) for job_id in job_id_lst]) + + def run_static(self): + if 'job_ids' not in self.input._dataset['Parameter']: + self.input['job_ids'] = ' '.join([str(job_id) for job_id in np.random.choice(self.structure_job_id_lst, + self.input['num_points'])]) + self.to_hdf() + super(RandomMDMaster, self).run_static() + + def collect_output(self): + """ + + Returns: + + """ + pass diff --git a/pyiron_contrib/atomistic/mlip/mlip.py b/pyiron_contrib/atomistic/mlip/mlip.py new file mode 100644 index 000000000..7af871f8c --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/mlip.py @@ -0,0 +1,477 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +from __future__ import print_function +from itertools import islice +import numpy as np +import os +import pandas as pd +import posixpath +import scipy.constants +from pyiron_base import Settings, GenericParameters, GenericJob, Executable +from pyiron_atomistics import ase_to_pyiron +from pyiron_contrib.atomistic.mlip.cfgs import savecfgs, loadcfgs, Cfg + +__author__ = "Jan Janssen" +__copyright__ = "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - " \ + "Computational Materials Design (CM) Department" +__version__ = "1.0" +__maintainer__ = "Jan Janssen" +__email__ = "janssen@mpie.de" +__status__ = "development" +__date__ = "Sep 1, 2017" + +s = Settings() + + +gpa_to_ev_ang = 1e22 / scipy.constants.physical_constants['joule-electron volt relationship'][0] + + +class Mlip(GenericJob): + def __init__(self, project, job_name): + super(Mlip, self).__init__(project, job_name) + self.__version__ = None + self.__name__ = "Mlip" + self._executable_activate() + self._job_dict = {} + self.input = MlipParameter() + self._command_line = CommandLine() + + def _executable_activate(self, enforce=False): + if self._executable is None or enforce: + if len(self.__module__.split(".")) > 1: + self._executable = Executable( + codename=self.__name__, + module=self.__module__.split(".")[-2], + path_binary_codes=s.resource_paths, + ) + else: + self._executable = Executable( + codename=self.__name__, path_binary_codes=s.resource_paths + ) + @property + def calculation_dataframe(self): + job_id_lst, time_step_start_lst, time_step_end_lst, time_step_delta_lst = self._job_dict_lst(self._job_dict) + return pd.DataFrame({'Job id': job_id_lst, + 'Start config': time_step_start_lst, + 'End config': time_step_end_lst, + 'Step': time_step_delta_lst}) + + @property + def potential_files(self): + pot = os.path.join(self.working_directory, 'Trained.mtp_') + states = os.path.join(self.working_directory, 'state.mvs') + if os.path.exists(pot) and os.path.exists(states): + return [pot, states] + + def set_input_to_read_only(self): + """ + This function enforces read-only mode for the input classes, but it has to be implement in the individual + classes. + """ + self.input.read_only = True + self._command_line.read_only = True + + def add_job_to_fitting(self, job_id, time_step_start=0, time_step_end=-1, time_step_delta=10): + if time_step_end == -1: + time_step_end = np.shape(self.project.inspect(int(job_id))['output/generic/cells'])[0]-1 + self._job_dict[job_id] = {'time_step_start': time_step_start, + 'time_step_end': time_step_end, + 'time_step_delta': time_step_delta} + + def write_input(self): + restart_file_lst = [ + os.path.basename(f) + for f in self._restart_file_list + ] + list(self._restart_file_dict.values()) + if 'testing.cfg' not in restart_file_lst: + species_count = self._write_test_set(file_name='testing.cfg', cwd=self.working_directory) + elif self.input['filepath'] != 'auto': + species_count = 0 + else: + raise ValueError('speciescount not set') + if 'training.cfg' not in restart_file_lst: + self._write_training_set(file_name='training.cfg', cwd=self.working_directory) + self._copy_potential(species_count=species_count, file_name='start.mtp', cwd=self.working_directory) + if self.version != '1.0.0': + self._command_line.activate_postprocessing() + self._command_line[0] = self._command_line[0].replace('energy_auto', str(self.input['energy-weight'])) + self._command_line[0] = self._command_line[0].replace('force_auto', str(self.input['force-weight'])) + self._command_line[0] = self._command_line[0].replace('stress_auto', str(self.input['stress-weight'])) + self._command_line[0] = self._command_line[0].replace('iteration_auto', str(int(self.input['iteration']))) + self._command_line.write_file(file_name='mlip.sh', cwd=self.working_directory) + + def collect_logfiles(self): + pass + + def collect_output(self): + file_name = os.path.join(self.working_directory, 'diff.cfg') + if os.path.exists(file_name): + _, _, _, _, _, _, _, job_id_diff_lst, timestep_diff_lst = read_cgfs(file_name) + else: + job_id_diff_lst, timestep_diff_lst = [], [] + file_name = os.path.join(self.working_directory, 'selected.cfg') + if os.path.exists(file_name): + _, _, _, _, _, _, _, job_id_new_training_lst, timestep_new_training_lst = read_cgfs(file_name) + else: + job_id_new_training_lst, timestep_new_training_lst = [], [] + file_name = os.path.join(self.working_directory, 'grades.cfg') + if os.path.exists(file_name): + _, _, _, _, _, _, grades_lst, job_id_grades_lst, timestep_grades_lst = read_cgfs(file_name) + else: + grades_lst, job_id_grades_lst, timestep_grades_lst = [], [], [] + with self.project_hdf5.open('output') as hdf5_output: + hdf5_output['grades'] = grades_lst + hdf5_output['job_id'] = job_id_grades_lst + hdf5_output['timestep'] = timestep_grades_lst + hdf5_output['job_id_diff'] = job_id_diff_lst + hdf5_output['timestep_diff'] = timestep_diff_lst + hdf5_output['job_id_new'] = job_id_new_training_lst + hdf5_output['timestep_new'] = timestep_new_training_lst + + def get_structure(self, iteration_step=-1): + job = self.project.load(self['output/job_id_diff'][iteration_step]) + return job.get_structure(self['output/timestep_diff'][iteration_step]) + + def to_hdf(self, hdf=None, group_name=None): + super(Mlip, self).to_hdf(hdf=hdf, group_name=group_name) + self.input.to_hdf(self._hdf5) + with self._hdf5.open('input') as hdf_input: + job_id_lst, time_step_start_lst, time_step_end_lst, time_step_delta_lst = self._job_dict_lst(self._job_dict) + hdf_input["job_id_list"] = job_id_lst + hdf_input["time_step_start_lst"] = time_step_start_lst + hdf_input["time_step_end_lst"] = time_step_end_lst + hdf_input["time_step_delta_lst"] = time_step_delta_lst + + def from_hdf(self, hdf=None, group_name=None): + super(Mlip, self).from_hdf(hdf=hdf, group_name=group_name) + self.input.from_hdf(self._hdf5) + with self._hdf5.open('input') as hdf_input: + for job_id, start, end, delta in zip( + hdf_input["job_id_list"], + hdf_input["time_step_start_lst"], + hdf_input["time_step_end_lst"], + hdf_input["time_step_delta_lst"] + ): + self.add_job_to_fitting( + job_id=job_id, + time_step_start=start, + time_step_end=end, + time_step_delta=delta + ) + + def get_suggested_number_of_configuration(self, species_count=None, multiplication_factor=2.0): + if self.input['filepath'] == 'auto': + source = self._find_potential(self.input['potential']) + else: + source = self.input['filepath'] + with open(source) as f: + lines = f.readlines() + radial_basis_size, radial_funcs_count, alpha_scalar_moments = 0.0, 0.0, 0.0 + for line in lines: + if 'radial_basis_size' in line: + radial_basis_size = int(line.split()[2]) + if 'radial_funcs_count' in line: + radial_funcs_count = int(line.split()[2]) + if 'species_count' in line and species_count is None: + species_count = int(line.split()[2]) + if 'alpha_scalar_moments' in line: + alpha_scalar_moments = int(line.split()[2]) + return int(multiplication_factor * \ + (radial_basis_size * radial_funcs_count * species_count ** 2 + alpha_scalar_moments)) + + @staticmethod + def _update_species_count(lines, species_count): + ind = 0 + for ind, line in enumerate(lines): + if 'species_count' in line: + break + species_line = lines[ind].split() + species_line[-1] = str(species_count) + lines[ind] = ' '.join(species_line) + '\n' + return lines + + @staticmethod + def _update_min_max_distance(lines, min_distance, max_distance): + min_dis_updated, max_dis_updated = False, False + for ind, line in enumerate(lines): + if 'min_dist' in line and min_distance is not None: + min_dist_line = lines[ind].split() + min_dist_line[2] = str(min_distance) + lines[ind] = ' '.join(min_dist_line) + '\n' + min_dis_updated = True + elif 'max_dist' in line and max_distance is not None: + max_dist_line = lines[ind].split() + max_dist_line[2] = str(max_distance) + lines[ind] = ' '.join(max_dist_line) + '\n' + max_dis_updated = True + elif min_dis_updated and max_dis_updated: + break + return lines + + def _copy_potential(self, species_count, file_name, cwd=None): + if cwd is not None: + file_name = posixpath.join(cwd, file_name) + if self.input['filepath'] == 'auto': + source = self._find_potential(self.input['potential']) + else: + source = self.input['filepath'] + with open(source) as f: + lines = f.readlines() + if self.input['filepath'] == 'auto': + lines_modified = self._update_species_count(lines, species_count) + lines_modified = self._update_min_max_distance(lines=lines_modified, + min_distance=self.input['min_dist'], + max_distance=self.input['max_dist']) + else: + lines_modified = lines + with open(file_name, 'w') as f: + f.writelines(lines_modified) + + @staticmethod + def stress_tensor_components(stresses): + return np.array([stresses[0][0], stresses[1][1], stresses[2][2], + stresses[1][2], stresses[0][1], stresses[0][2]]) * gpa_to_ev_ang + + def _write_configurations(self, file_name='training.cfg', cwd=None, respect_step=True, return_max_index=False): + if cwd is not None: + file_name = posixpath.join(cwd, file_name) + indices_lst, position_lst, forces_lst, cell_lst, energy_lst, track_lst, stress_lst = [], [], [], [], [], [], [] + for job_id, value in self._job_dict.items(): + ham = self.project.inspect(job_id) + if respect_step: + start = value['time_step_start'] + end = value['time_step_end']+1 + delta = value['time_step_delta'] + else: + start, end, delta = 0, None, 1 + time_step = start + # HACK: until the training container has a proper HDF5 interface + if ham.__name__ == "TrainingContainer": + job = ham.to_object() + pd = job.to_pandas() + for time_step, (name, atoms, energy, forces, _) in islice(enumerate(pd.itertuples(index=False)), + start, end, delta): + atoms = ase_to_pyiron(atoms) + indices_lst.append(atoms.indices) + position_lst.append(atoms.positions) + forces_lst.append(forces) + cell_lst.append(atoms.cell) + energy_lst.append(energy) + track_lst.append(str(ham.job_id) + "_" + str(time_step)) + continue + original_dict = {el: ind for ind, el in enumerate(sorted(ham['input/structure/species']))} + species_dict = {ind: original_dict[el] for ind, el in enumerate(ham['input/structure/species'])} + if ham.__name__ in ['Vasp', 'ThermoIntDftEam', 'ThermoIntDftMtp', 'ThermoIntVasp']: + if len(ham['output/outcar/stresses']) != 0: + for position, forces, cell, energy, stresses, volume in zip(ham['output/generic/positions'][start:end:delta], + ham['output/generic/forces'][start:end:delta], + ham['output/generic/cells'][start:end:delta], + ham['output/generic/dft/energy_free'][start:end:delta], + ham['output/outcar/stresses'][start:end:delta], + ham['output/generic/volume'][start:end:delta]): + indices_lst.append([species_dict[el] for el in ham['input/structure/indices']]) + position_lst.append(position) + forces_lst.append(forces) + cell_lst.append(cell) + energy_lst.append(energy) + stress_lst.append(stresses * volume / gpa_to_ev_ang) + track_lst.append(str(ham.job_id) + '_' + str(time_step)) + time_step += delta + else: + for position, forces, cell, energy in zip(ham['output/generic/positions'][start:end:delta], + ham['output/generic/forces'][start:end:delta], + ham['output/generic/cells'][start:end:delta], + ham['output/generic/dft/energy_free'][start:end:delta]): + indices_lst.append([species_dict[el] for el in ham['input/structure/indices']]) + position_lst.append(position) + forces_lst.append(forces) + cell_lst.append(cell) + energy_lst.append(energy) + track_lst.append(str(ham.job_id) + '_' + str(time_step)) + time_step += delta + elif ham.__name__ in ['Lammps', 'LammpsInt2', 'LammpsMlip']: + for position, forces, cell, energy, stresses, volume in zip(ham['output/generic/positions'][start:end:delta], + ham['output/generic/forces'][start:end:delta], + ham['output/generic/cells'][start:end:delta], + ham['output/generic/energy_pot'][start:end:delta], + ham['output/generic/pressures'][start:end:delta], + ham['output/generic/volume'][start:end:delta]): + indices_lst.append([species_dict[el] for el in ham['input/structure/indices']]) + position_lst.append(position) + forces_lst.append(forces) + cell_lst.append(cell) + energy_lst.append(energy) + stress_lst.append(self.stress_tensor_components(stresses * volume)) + track_lst.append(str(ham.job_id) + '_' + str(time_step)) + time_step += delta + else: + for position, forces, cell, energy, stresses, volume in zip(ham['output/generic/positions'][start:end:delta], + ham['output/generic/forces'][start:end:delta], + ham['output/generic/cells'][start:end:delta], + ham['output/generic/energy_pot'][start:end:delta], + ham['output/generic/pressures'][start:end:delta], + ham['output/generic/volume'][start:end:delta]): + indices_lst.append([species_dict[el] for el in ham['input/structure/indices']]) + position_lst.append(position) + forces_lst.append(forces) + cell_lst.append(cell) + energy_lst.append(energy) + stress_lst.append(self.stress_tensor_components(stresses * volume)) + track_lst.append(str(ham.job_id) + '_' + str(time_step)) + time_step += delta + write_cfg(file_name=file_name, + indices_lst=indices_lst, + position_lst=position_lst, + cell_lst=cell_lst, + forces_lst=forces_lst, + energy_lst=energy_lst, + track_lst=track_lst, + stress_lst=stress_lst) + if return_max_index: + total_lst = [] + for ind_lst in indices_lst: + if isinstance(ind_lst, list): + total_lst += ind_lst + elif isinstance(ind_lst, np.ndarray): + total_lst += ind_lst.tolist() + else: + raise TypeError() + return np.max(total_lst) + 1 + + def _write_test_set(self, file_name='testing.cfg', cwd=None): + return self._write_configurations(file_name=file_name, cwd=cwd, respect_step=False, return_max_index=True) + + def _write_training_set(self, file_name='training.cfg', cwd=None): + self._write_configurations(file_name=file_name, cwd=cwd, respect_step=True) + + + @staticmethod + def _job_dict_lst(job_dict): + job_id_lst, time_step_start_lst, time_step_end_lst, time_step_delta_lst = [], [], [], [] + for key, value in job_dict.items(): + job_id_lst.append(key) + time_step_start_lst.append(value['time_step_start']) + time_step_end_lst.append(value['time_step_end']) + time_step_delta_lst.append(value['time_step_delta']) + return job_id_lst, time_step_start_lst, time_step_end_lst, time_step_delta_lst + + @staticmethod + def _find_potential(potential_name): + for resource_path in s.resource_paths: + if os.path.exists(os.path.join(resource_path, 'mlip', 'potentials', 'templates')): + resource_path = os.path.join(resource_path, 'mlip', 'potentials', 'templates') + if 'potentials' in resource_path and \ + os.path.exists(os.path.join(resource_path, potential_name)): + return os.path.join(resource_path, potential_name) + raise ValueError('Potential not found!') + + +class MlipParameter(GenericParameters): + def __init__(self, separator_char=' ', comment_char='#', table_name="mlip_inp"): + super(MlipParameter, self).__init__(separator_char=separator_char, comment_char=comment_char, + table_name=table_name) + + def load_default(self, file_content=None): + if file_content is None: + file_content = '''\ +potential 16g.mtp +filepath auto +energy-weight 1 +force-weight 0.01 +stress-weight 0.001 +iteration 1000 +min_dist 2.0 +max_dist 5.0 +''' + self.load_string(file_content) + + +class CommandLine(GenericParameters): + def __init__(self, input_file_name=None, **qwargs): + super(CommandLine, self).__init__(input_file_name=input_file_name, + table_name="cmd", + comment_char="#", + val_only=True) + + def load_default(self, file_content=None): + if file_content is None: + file_content = '''\ +$MLP_COMMAND_PARALLEL train --energy-weight=energy_auto --force-weight=force_auto --stress-weight=stress_auto --max-iter=iteration_auto start.mtp training.cfg > training.log +''' + self.load_string(file_content) + + def activate_postprocessing(self, file_content=None): + if file_content is None: + file_content = '''\ +$MLP_COMMAND_PARALLEL train --energy-weight=energy_auto --force-weight=force_auto --stress-weight=stress_auto --max-iter=iteration_auto start.mtp training.cfg > training.log +$MLP_COMMAND_SERIAL calc-grade Trained.mtp_ training.cfg testing.cfg grades.cfg --mvs-filename=state.mvs > grading.log +$MLP_COMMAND_SERIAL select-add Trained.mtp_ training.cfg testing.cfg diff.cfg > select.log +cp training.cfg training_new.cfg +cat diff.cfg >> training_new.cfg +''' + self.load_string(file_content) + + +def write_cfg(file_name, indices_lst, position_lst, cell_lst, forces_lst=None, energy_lst=None, track_lst=None, + stress_lst=None): + if stress_lst is None or len(stress_lst) == 0: + stress_lst = [None] * len(position_lst) + if forces_lst is None or len(forces_lst) == 0: + forces_lst = [None] * len(position_lst) + if track_lst is None or len(track_lst) == 0: + track_lst = [None] * len(position_lst) + if energy_lst is None or len(energy_lst) == 0: + energy_lst = [None] * len(position_lst) + cfg_lst = [] + for indices, position, forces, cell, energy, stress, track_str in zip( + indices_lst, + position_lst, + forces_lst, + cell_lst, + energy_lst, + stress_lst, + track_lst + ): + cfg_object = Cfg() + cfg_object.pos = position + cfg_object.lat = cell + cfg_object.types = indices + cfg_object.energy = energy + cfg_object.forces = forces + cfg_object.stresses = stress + if track_str is not None: + cfg_object.desc = 'pyiron\t' + track_str + cfg_lst.append(cfg_object) + savecfgs(filename=file_name, cfgs=cfg_lst, desc=None) + + +def read_cgfs(file_name): + cgfs_lst = loadcfgs(filename=file_name, max_cfgs=None) + cell, positions, forces, stress, energy, indicies, grades, jobids, timesteps = [], [], [], [], [], [], [], [], [] + for cgf in cgfs_lst: + if cgf.pos is not None: + positions.append(cgf.pos) + if cgf.lat is not None: + cell.append(cgf.lat) + if cgf.types is not None: + indicies.append(cgf.types) + if cgf.energy is not None: + energy.append(cgf.energy) + if cgf.forces is not None: + forces.append(cgf.forces) + if cgf.stresses is not None: + stress.append([ + [cgf.stresses[0], cgf.stresses[5], cgf.stresses[4]], + [cgf.stresses[5], cgf.stresses[1], cgf.stresses[3]], + [cgf.stresses[4], cgf.stresses[3], cgf.stresses[2]] + ]) + if cgf.grade is not None: + grades.append(cgf.grade) + if cgf.desc is not None: + job_id, timestep = cgf.desc.split('_') + jobids.append(int(job_id)) + timesteps.append(int(timestep)) + return [np.array(cell), np.array(positions), np.array(forces), np.array(stress), np.array(energy), + np.array(indicies), np.array(grades), np.array(jobids), np.array(timesteps)] diff --git a/pyiron_contrib/atomistic/mlip/mlipjob.py b/pyiron_contrib/atomistic/mlip/mlipjob.py new file mode 100644 index 000000000..21448c157 --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/mlipjob.py @@ -0,0 +1,92 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +import os +import shutil +from pyiron_atomistics.atomistics.job.atomistic import AtomisticGenericJob +from pyiron_contrib.atomistic.mlip.mlip import write_cfg, read_cgfs +from pyiron_base import GenericParameters + +__author__ = "Jan Janssen" +__copyright__ = "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - " \ + "Computational Materials Design (CM) Department" +__version__ = "1.0" +__maintainer__ = "Jan Janssen" +__email__ = "janssen@mpie.de" +__status__ = "development" +__date__ = "Sep 1, 2018" + + +class MlipJob(AtomisticGenericJob): + def __init__(self, project, job_name): + super(MlipJob, self).__init__(project, job_name) + self.__name__ = "MlipJob" + self.__version__ = None + self._executable_activate() + self.input = MlipParameter() + + @property + def potential(self): + return self.input['mlip:load-from'] + + @potential.setter + def potential(self, potential_filename): + self.input['mlip:load-from'] = potential_filename + + def set_input_to_read_only(self): + """ + This function enforces read-only mode for the input classes, but it has to be implement in the individual + classes. + """ + super(MlipJob, self).set_input_to_read_only() + self.input.read_only = True + + def to_hdf(self, hdf=None, group_name=None): + super(MlipJob, self).to_hdf(hdf=hdf, group_name=group_name) + with self.project_hdf5.open("input") as hdf5_input: + self.input.to_hdf(hdf5_input) + + def from_hdf(self, hdf=None, group_name=None): + super(MlipJob, self).from_hdf(hdf=hdf, group_name=group_name) + with self.project_hdf5.open("input") as hdf5_input: + self.input.from_hdf(hdf5_input) + + def write_input(self): + write_cfg(file_name=os.path.join(self.working_directory, 'structure.cfg'), + indices_lst=[self.structure.indices], + position_lst=[self.structure.positions], + cell_lst=[self.structure.cell], + forces_lst=None, energy_lst=None, track_lst=None, stress_lst=None) + shutil.copyfile(self.input['mlip:load-from'], os.path.join(self.working_directory, 'potential.mtp')) + self.input['mlip:load-from'] = 'potential.mtp' + self.input.write_file(file_name="mlip.ini", cwd=self.working_directory) + + def collect_output(self): + file_name = os.path.join(self.working_directory, 'structurebyMTP.cfg') + cell, positions, forces, stress, energy, indicies, grades, jobids, timesteps = read_cgfs(file_name=file_name) + with self.project_hdf5.open('output') as hdf5_output: + hdf5_output['forces'] = forces + hdf5_output['energy_tot'] = energy + hdf5_output['pressures'] = stress + hdf5_output['cells'] = cell + hdf5_output['positions'] = positions + hdf5_output['indicies'] = indicies + + +class MlipParameter(GenericParameters): + def __init__(self, separator_char=' ', comment_char='#', table_name="mlip_inp"): + super(MlipParameter, self).__init__(separator_char=separator_char, comment_char=comment_char, + table_name=table_name) + self._bool_dict = {True: "TRUE", False: "FALSE"} + + def load_default(self, file_content=None): + if file_content is None: + file_content = '''\ +abinitio void +mlip mtpr +mlip:load-from auto +calculate-efs TRUE +write-cfgs structurebyMTP.cfg +''' + self.load_string(file_content) diff --git a/pyiron_contrib/atomistic/mlip/mlipselect.py b/pyiron_contrib/atomistic/mlip/mlipselect.py new file mode 100644 index 000000000..51c1b88d3 --- /dev/null +++ b/pyiron_contrib/atomistic/mlip/mlipselect.py @@ -0,0 +1,52 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. + +import os +from pyiron_contrib.atomistic.mlip.mlip import Mlip, read_cgfs + +__author__ = "Jan Janssen" +__copyright__ = "Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - " \ + "Computational Materials Design (CM) Department" +__version__ = "1.0" +__maintainer__ = "Jan Janssen" +__email__ = "janssen@mpie.de" +__status__ = "development" +__date__ = "Sep 1, 2018" + + +class MlipSelect(Mlip): + def __init__(self, project, job_name): + super(MlipSelect, self).__init__(project, job_name) + self.__version__ = None + self.__name__ = "MlipSelect" + self._executable_activate() + del self.input['min_dist'] + del self.input['max_dist'] + del self.input['iteration'] + del self.input['energy-weight'] + del self.input['force-weight'] + del self.input['stress-weight'] + self._command_line._delete_line(0) + self._command_line._delete_line(0) + + def validate_ready_to_run(self): + if len(self.restart_file_list) == 0: + raise ValueError() + + def write_input(self): + species_count = self._write_test_set(file_name='testing.cfg', cwd=self.working_directory) + self._copy_potential(species_count, file_name='start.mtp', cwd=self.working_directory) + self._command_line[0] = self._command_line[0].replace('Trained.mtp_', 'start.mtp') + self._command_line.write_file(file_name='mlip.sh', cwd=self.working_directory) + + def collect_output(self): + file_name = os.path.join(self.working_directory, 'diff.cfg') + cell, positions, forces, stress, energy, indicies, grades, jobids, timesteps = read_cgfs(file_name=file_name) + with self.project_hdf5.open('output') as hdf5_output: + hdf5_output['forces'] = forces + hdf5_output['energy_tot'] = energy + hdf5_output['pressures'] = stress + hdf5_output['cells'] = cell + hdf5_output['positions'] = positions + hdf5_output['indicies'] = indicies diff --git a/setup.py b/setup.py index e278a1e33..d670e42d3 100644 --- a/setup.py +++ b/setup.py @@ -22,27 +22,32 @@ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Science/Research', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8' + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9' ], keywords='pyiron', packages=find_packages(exclude=["*tests*"]), - install_requires=[ - 'ase==3.21.1', + install_requires=[ 'matplotlib==3.3.4', - 'numpy==1.20.0', - 'pyiron_atomistics==0.2.3', + 'numpy==1.20.1', + 'pyiron_base==0.1.48', 'scipy==1.6.0', 'seaborn==0.11.1', - 'scikit-image==0.18.1', - 'fenics==2019.1.0', - 'mshr==2019.1.0', ], + extras_require={ + 'atomistic': [ + 'ase==3.21.1', + 'pyiron_atomistics==0.2.4', + ], + 'fenics': [ + 'fenics==2019.1.0', + 'mshr==2019.1.0', + ], + 'image': ['scikit-image==0.18.1'], + }, cmdclass=versioneer.get_cmdclass(), )