Merge remote-tracking branch 'origin/main' into conda_ex

pyiron · Mar 26, 2024 · 33575b5 · 33575b5
2 parents abb3688 + 7303661
commit 33575b5
Show file tree

Hide file tree

Showing 26 changed files with 461 additions and 273 deletions.
diff --git a/.ci_support/environment-docs.yml b/.ci_support/environment-docs.yml
@@ -20,6 +20,6 @@ dependencies:
 - pympipool =0.7.17
 - pysqa =0.1.17
 - pytables =3.9.2
-- sqlalchemy =2.0.28
+- sqlalchemy =2.0.29
 - tqdm =4.66.2
 - traitlets =5.14.2
diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml
@@ -19,6 +19,6 @@ dependencies:
 - pympipool =0.7.17
 - pysqa =0.1.17
 - pytables =3.9.2
-- sqlalchemy =2.0.28
+- sqlalchemy =2.0.29
 - tqdm =4.66.2
 - traitlets =5.14.2
diff --git a/pyiron_base/database/generic.py b/pyiron_base/database/generic.py
@@ -33,6 +33,7 @@
 from pyiron_base.database.tables import get_historical_table
 from pyiron_base.utils.error import retry
 from pyiron_base.database.interface import IsDatabase
+from pyiron_base.database.sqlcolumnlength import CHEMICALFORMULA_STR_LENGTH
 
 __author__ = "Murat Han Celik"
 __copyright__ = (
@@ -215,7 +216,7 @@ def __init__(self, connection_string, table_name, timeout=60):
         except Exception as except_msg:
             raise ValueError("Connection to database failed: " + str(except_msg))
 
-        self._chem_formula_lim_length = 50
+        self._chem_formula_lim_length = CHEMICALFORMULA_STR_LENGTH
 
         def _create_table():
             self.__reload_db()

diff --git a/pyiron_base/database/sqlcolumnlength.py b/pyiron_base/database/sqlcolumnlength.py
@@ -0,0 +1,11 @@
+DEFAULT_STR_LENGTH = 255
+PROJECT_PATH_STR_LENGTH = DEFAULT_STR_LENGTH
+PROJECT_STR_LENGTH = DEFAULT_STR_LENGTH
+JOB_STR_LENGTH = DEFAULT_STR_LENGTH
+SUBJOB_STR_LENGTH = DEFAULT_STR_LENGTH
+CHEMICALFORMULA_STR_LENGTH = 50
+STATUS_STR_LENGTH = 20
+HAMILTON_STR_LENGTH = 20
+HAMVERSION_STR_LENGTH = 50
+USERNAME_STR_LENGTH = 20
+COMPUTER_STR_LENGTH = 100
diff --git a/pyiron_base/database/tables.py b/pyiron_base/database/tables.py
@@ -13,6 +13,18 @@
     String,
     Table,
 )
+from pyiron_base.database.sqlcolumnlength import (
+    PROJECT_PATH_STR_LENGTH,
+    PROJECT_STR_LENGTH,
+    JOB_STR_LENGTH,
+    SUBJOB_STR_LENGTH,
+    CHEMICALFORMULA_STR_LENGTH,
+    STATUS_STR_LENGTH,
+    HAMILTON_STR_LENGTH,
+    HAMVERSION_STR_LENGTH,
+    USERNAME_STR_LENGTH,
+    COMPUTER_STR_LENGTH,
+)
 
 __author__ = "Murat Han Celik, Liam Huber"
 __copyright__ = (
@@ -34,16 +46,16 @@ def get_historical_table(table_name, metadata, extend_existing=True):
         Column("id", Integer, primary_key=True, autoincrement=True),
         Column("parentid", Integer),
         Column("masterid", Integer),
-        Column("projectpath", String(50)),
-        Column("project", String(255)),
-        Column("job", String(50)),
-        Column("subjob", String(255)),
-        Column("chemicalformula", String(50)),
-        Column("status", String(20)),
-        Column("hamilton", String(20)),
-        Column("hamversion", String(50)),
-        Column("username", String(20)),
-        Column("computer", String(100)),
+        Column("projectpath", String(PROJECT_PATH_STR_LENGTH)),
+        Column("project", String(PROJECT_STR_LENGTH)),
+        Column("job", String(JOB_STR_LENGTH)),
+        Column("subjob", String(SUBJOB_STR_LENGTH)),
+        Column("chemicalformula", String(CHEMICALFORMULA_STR_LENGTH)),
+        Column("status", String(STATUS_STR_LENGTH)),
+        Column("hamilton", String(HAMILTON_STR_LENGTH)),
+        Column("hamversion", String(HAMVERSION_STR_LENGTH)),
+        Column("username", String(USERNAME_STR_LENGTH)),
+        Column("computer", String(COMPUTER_STR_LENGTH)),
         Column("timestart", DateTime),
         Column("timestop", DateTime),
         Column("totalcputime", Float),

diff --git a/pyiron_base/jobs/datamining.py b/pyiron_base/jobs/datamining.py
@@ -658,6 +658,62 @@ def _save_output(self):
                 hdf5_output.file_name, key=hdf5_output.h5_path + "/table"
             )
 
+    def to_dict(self):
+        job_dict = super().to_dict()
+        job_dict["input/bool_dict"] = {
+            "enforce_update": self._enforce_update,
+            "convert_to_object": self._pyiron_table.convert_to_object,
+        }
+        if self._analysis_project is not None:
+            job_dict["input/project"] = {
+                "path": self._analysis_project.path,
+                "user": self._analysis_project.user,
+                "sql_query": self._analysis_project.sql_query,
+                "filter": self._analysis_project._filter,
+                "inspect_mode": self._analysis_project._inspect_mode,
+            }
+        add_dict = {}
+        self._pyiron_table.add._to_hdf(add_dict)
+        for k, v in add_dict.items():
+            job_dict["input/" + k] = v
+        if self.pyiron_table._filter_function is not None:
+            _to_pickle(job_dict, "input/filter", self.pyiron_table._filter_function)
+        if self.pyiron_table._db_filter_function is not None:
+            _to_pickle(
+                job_dict, "input/db_filter", self.pyiron_table._db_filter_function
+            )
+        return job_dict
+
+    def from_dict(self, job_dict):
+        super().from_dict(job_dict=job_dict)
+        if "project" in job_dict["input"].keys():
+            project_dict = job_dict["input"]["project"]
+            if os.path.exists(project_dict["path"]):
+                project = self.project.__class__(
+                    path=project_dict["path"],
+                    user=project_dict["user"],
+                    sql_query=project_dict["sql_query"],
+                )
+                project._filter = project_dict["filter"]
+                project._inspect_mode = project_dict["inspect_mode"]
+                self.analysis_project = project
+            else:
+                self._logger.warning(
+                    f"Could not instantiate analysis_project, no such path {project_dict['path']}."
+                )
+        if "filter" in job_dict["input"].keys():
+            self.pyiron_table.filter_function = _from_pickle(
+                job_dict["input"], "filter"
+            )
+        if "db_filter" in job_dict["input"].keys():
+            self.pyiron_table.db_filter_function = _from_pickle(
+                job_dict["input"], "db_filter"
+            )
+        bool_dict = job_dict["input"]["bool_dict"]
+        self._enforce_update = bool_dict["enforce_update"]
+        self._pyiron_table.convert_to_object = bool_dict["convert_to_object"]
+        self._pyiron_table.add._from_hdf(job_dict["input"])
+
     def to_hdf(self, hdf=None, group_name=None):
         """
         Store pyiron table job in HDF5
@@ -668,26 +724,6 @@ def to_hdf(self, hdf=None, group_name=None):
 
         """
         super(TableJob, self).to_hdf(hdf=hdf, group_name=group_name)
-        with self.project_hdf5.open("input") as hdf5_input:
-            hdf5_input["bool_dict"] = {
-                "enforce_update": self._enforce_update,
-                "convert_to_object": self._pyiron_table.convert_to_object,
-            }
-            self._pyiron_table.add._to_hdf(hdf5_input)
-            if self._analysis_project is not None:
-                hdf5_input["project"] = {
-                    "path": self._analysis_project.path,
-                    "user": self._analysis_project.user,
-                    "sql_query": self._analysis_project.sql_query,
-                    "filter": self._analysis_project._filter,
-                    "inspect_mode": self._analysis_project._inspect_mode,
-                }
-            if self.pyiron_table._filter_function is not None:
-                _to_pickle(hdf5_input, "filter", self.pyiron_table._filter_function)
-            if self.pyiron_table._db_filter_function is not None:
-                _to_pickle(
-                    hdf5_input, "db_filter", self.pyiron_table._db_filter_function
-                )
         if len(self.pyiron_table._df) != 0:
             self._save_output()
 
@@ -701,46 +737,6 @@ def from_hdf(self, hdf=None, group_name=None):
         """
         super(TableJob, self).from_hdf(hdf=hdf, group_name=group_name)
         hdf_version = self.project_hdf5.get("HDF_VERSION", "0.1.0")
-        with self.project_hdf5.open("input") as hdf5_input:
-            if "project" in hdf5_input.list_nodes():
-                project_dict = hdf5_input["project"]
-                if os.path.exists(project_dict["path"]):
-                    project = self.project.__class__(
-                        path=project_dict["path"],
-                        user=project_dict["user"],
-                        sql_query=project_dict["sql_query"],
-                    )
-                    project._filter = project_dict["filter"]
-                    project._inspect_mode = project_dict["inspect_mode"]
-                    self.analysis_project = project
-                else:
-                    self._logger.warning(
-                        f"Could not instantiate analysis_project, no such path {project_dict['path']}."
-                    )
-            if "filter" in hdf5_input.list_nodes():
-                if hdf_version == "0.1.0":
-                    self.pyiron_table._filter_function_str = hdf5_input["filter"]
-                    self.pyiron_table.filter_function = get_function_from_string(
-                        hdf5_input["filter"]
-                    )
-                else:
-                    self.pyiron_table.filter_function = _from_pickle(
-                        hdf5_input, "filter"
-                    )
-            if "db_filter" in hdf5_input.list_nodes():
-                if hdf_version == "0.1.0":
-                    self.pyiron_table._db_filter_function_str = hdf5_input["db_filter"]
-                    self.pyiron_table.db_filter_function = get_function_from_string(
-                        hdf5_input["db_filter"]
-                    )
-                else:
-                    self.pyiron_table.db_filter_function = _from_pickle(
-                        hdf5_input, "db_filter"
-                    )
-            bool_dict = hdf5_input["bool_dict"]
-            self._enforce_update = bool_dict["enforce_update"]
-            self._pyiron_table.convert_to_object = bool_dict["convert_to_object"]
-            self._pyiron_table.add._from_hdf(hdf5_input)
         if hdf_version == "0.3.0":
             with self.project_hdf5.open("output") as hdf5_output:
                 if "table" in hdf5_output.list_groups():

diff --git a/pyiron_base/jobs/flex/executablecontainer.py b/pyiron_base/jobs/flex/executablecontainer.py
@@ -79,31 +79,35 @@ def write_input(self):
                 working_directory=self.working_directory,
             )
 
+    def run_static(self):
+        self.storage.output.stdout = super().run_static()
+
     def collect_output(self):
         if self._collect_output_funct is not None:
             self.output.update(
                 self._collect_output_funct(working_directory=self.working_directory)
             )
             self.to_hdf()
 
-    def to_hdf(self, hdf=None, group_name=None):
-        super().to_hdf(hdf=hdf, group_name=group_name)
+    def to_dict(self):
+        job_dict = super().to_dict()
         if self._write_input_funct is not None:
-            self.project_hdf5["write_input_function"] = np.void(
+            job_dict["write_input_function"] = np.void(
                 cloudpickle.dumps(self._write_input_funct)
             )
         if self._collect_output_funct is not None:
-            self.project_hdf5["collect_output_function"] = np.void(
+            job_dict["collect_output_function"] = np.void(
                 cloudpickle.dumps(self._collect_output_funct)
             )
+        return job_dict
 
-    def from_hdf(self, hdf=None, group_name=None):
-        super().from_hdf(hdf=hdf, group_name=group_name)
-        if "write_input_function" in self.project_hdf5.list_nodes():
+    def from_dict(self, job_dict):
+        super().from_dict(job_dict=job_dict)
+        if "write_input_function" in job_dict.keys():
             self._write_input_funct = cloudpickle.loads(
-                self.project_hdf5["write_input_function"]
+                job_dict["write_input_function"]
             )
-        if "write_input_function" in self.project_hdf5.list_nodes():
+        if "write_input_function" in job_dict.keys():
             self._collect_output_funct = cloudpickle.loads(
-                self.project_hdf5["collect_output_function"]
+                job_dict["collect_output_function"]
             )
diff --git a/pyiron_base/jobs/flex/pythonfunctioncontainer.py b/pyiron_base/jobs/flex/pythonfunctioncontainer.py
@@ -67,18 +67,19 @@ def __call__(self, *args, **kwargs):
         self.run()
         return self.output["result"]
 
-    def to_hdf(self, hdf=None, group_name=None):
-        super().to_hdf(hdf=hdf, group_name=group_name)
-        self.project_hdf5["function"] = np.void(cloudpickle.dumps(self._function))
-        self.project_hdf5["_automatically_rename_on_save_using_input"] = (
+    def to_dict(self):
+        job_dict = super().to_dict()
+        job_dict["function"] = np.void(cloudpickle.dumps(self._function))
+        job_dict["_automatically_rename_on_save_using_input"] = (
             self._automatically_rename_on_save_using_input
         )
+        return job_dict
 
-    def from_hdf(self, hdf=None, group_name=None):
-        super().from_hdf(hdf=hdf, group_name=group_name)
-        self._function = cloudpickle.loads(self.project_hdf5["function"])
+    def from_dict(self, job_dict):
+        super().from_dict(job_dict=job_dict)
+        self._function = cloudpickle.loads(job_dict["function"])
         self._automatically_rename_on_save_using_input = bool(
-            self.project_hdf5["_automatically_rename_on_save_using_input"]
+            job_dict["_automatically_rename_on_save_using_input"]
         )
 
     def save(self):