/
generic.py
1965 lines (1702 loc) · 70.2 KB
/
generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# coding: utf-8
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.
"""
The project object is the central import point of pyiron - all other objects can be created from this one
"""
from __future__ import annotations
import os
import posixpath
import shutil
import stat
from tqdm.auto import tqdm
import pandas
import pint
import math
import numpy as np
from pyiron_base.project.jobloader import JobLoader, JobInspector
from pyiron_base.project.maintenance import Maintenance
from pyiron_base.project.path import ProjectPath
from pyiron_base.database.filetable import FileTable
from pyiron_base.state import state
from pyiron_base.database.jobtable import (
get_job_ids,
get_job_id,
get_jobs,
set_job_status,
get_child_ids,
get_job_working_directory,
get_job_status,
)
from pyiron_base.storage.hdfio import ProjectHDFio
from pyiron_base.storage.filedata import load_file
from pyiron_base.utils.deprecate import deprecate
from pyiron_base.interfaces.has_groups import HasGroups
from pyiron_base.jobs.flex.factory import create_job_factory
from pyiron_base.jobs.job.util import _special_symbol_replacements, _get_safe_job_name
from pyiron_base.jobs.job.jobtype import (
JobType,
JobTypeChoice,
JobFactory,
JOB_CLASS_DICT,
)
from pyiron_base.jobs.job.extension.server.queuestatus import (
queue_delete_job,
queue_is_empty,
queue_table,
wait_for_job,
wait_for_jobs,
update_from_remote,
queue_enable_reservation,
queue_check_job_is_waiting_or_running,
)
from pyiron_base.project.external import Notebook
from pyiron_base.project.data import ProjectData
from pyiron_base.project.archiving import export_archive, import_archive
from typing import Generator, Union, Dict, TYPE_CHECKING, Literal
if TYPE_CHECKING:
from pyiron_base.jobs.job.generic import GenericJob
__author__ = "Joerg Neugebauer, Jan Janssen"
__copyright__ = (
"Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - "
"Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Jan Janssen"
__email__ = "janssen@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"
class Project(ProjectPath, HasGroups):
"""
The project is the central class in pyiron, all other objects can be created from the project object.
Implements :class:`.HasGroups`. Groups are sub directories in the project, nodes are jobs inside the project.
Args:
path (GenericPath, str): path of the project defined by GenericPath, absolute or relative (with respect to
current working directory) path
user (str): current pyiron user
sql_query (str): SQL query to only select a subset of the existing jobs within the current project
default_working_directory (bool): Access default working directory, for ScriptJobs this equals the project
directory of the ScriptJob for regular projects it falls back to the current
directory.
Attributes:
root_path (): The pyiron user directory, defined in the .pyiron configuration.
project_path (): The relative path of the current project / folder starting from the root path of the pyiron
user directory
path (): The absolute path of the current project / folder.
base_name (): The name of the current project / folder.
history (): Previously opened projects / folders.
parent_group (): Parent project - one level above the current project.
user (): Current unix/linux/windows user who is running pyiron
sql_query (): An SQL query to limit the jobs within the project to a subset which matches the SQL query.
db (): Connection to the SQL database.
job_type (): Job Type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster',
'ScriptJob', 'ListMaster'].
view_mode (): If viewer_mode is enable pyiron has read only access to the database.
data (pyiron_base.project.data.ProjectData): A storage container for project-level data.
Examples:
Storing data:
>>> pr = Project('example')
>>> pr.data.foo = 42
>>> pr.data.write()
Some time later or in a different notebook, but in the same file location...
>>> other_pr_instance = Project('example')
>>> print(pr.data)
{'foo': 42}
"""
def __init__(
self, path="", user=None, sql_query=None, default_working_directory=False
):
if default_working_directory and path == "":
inputdict = Notebook.get_custom_dict()
if inputdict is not None and "project_dir" in inputdict.keys():
path = inputdict["project_dir"]
else:
path = "."
super(Project, self).__init__(path=path)
self.user = user
self.sql_query = sql_query
self._filter = ["groups", "nodes", "objects"]
self._inspect_mode = False
self._data = None
self._creator = Creator(project=self)
self._loader = JobLoader(project=self)
self._inspector = JobInspector(project=self)
self.job_type = JobTypeChoice()
self._maintenance = None
@property
def state(self):
return state
@property
def db(self):
if not state.database.database_is_disabled:
return state.database.database
else:
return FileTable(index_from=self.path)
@property
def maintenance(self):
if self._maintenance is None:
self._maintenance = Maintenance(self)
return self._maintenance
@property
def parent_group(self):
"""
Get the parent group of the current project
Returns:
Project: parent project
"""
return self.create_group("..")
@property
@deprecate("use db.view_mode")
def view_mode(self):
"""
Get viewer_mode - if viewer_mode is enable pyiron has read only access to the database.
Change it via
`Project('my_project').switch_to_viewer_mode()`
and
`Project('my_project').switch_to_user_mode()`
Returns:
bool: returns TRUE when viewer_mode is enabled
"""
return self.db.view_mode
@property
def name(self):
"""
The name of the current project folder
Returns:
str: name of the current project folder
"""
return self.base_name
@property
def create(self):
return self._creator
@property
def data(self):
if self._data is None:
self._data = ProjectData(project=self, table_name="data")
try:
self._data.read()
except KeyError:
pass
return self._data
@property
def size(self):
"""
Get the size of the project
"""
size = (
sum(
[
sum([os.path.getsize(os.path.join(path, f)) for f in files])
for path, dirs, files in os.walk(self.path)
]
)
* pint.UnitRegistry().byte
)
return self._size_conversion(size)
@staticmethod
def _size_conversion(size: pint.Quantity):
sign_prefactor = 1
if size < 0:
sign_prefactor = -1
size *= -1
elif size == 0:
return size
prefix_index = math.floor(math.log2(size) / 10) - 1
prefix = ["Ki", "Mi", "Gi", "Ti", "Pi"]
size *= sign_prefactor
if prefix_index < 0:
return size
elif prefix_index < 5:
return size.to(f"{prefix[prefix_index]}byte")
else:
return size.to(f"{prefix[-1]}byte")
def copy(self):
"""
Copy the project object - copying just the Python object but maintaining the same pyiron path
Returns:
Project: copy of the project object
"""
new = self.__class__(path=self.path, user=self.user, sql_query=self.sql_query)
new._filter = self._filter
new._inspect_mode = self._inspect_mode
return new
def copy_to(self, destination):
"""
Copy the project object to a different pyiron path - including the content of the project (all jobs).
In order to move individual jobs, use `copy_to` from the job objects.
Args:
destination (Project): project path to copy the project content to
Returns:
Project: pointing to the new project path
"""
if not self.view_mode:
if not isinstance(destination, Project):
raise TypeError("A project can only be copied to another project.")
for sub_project_name in tqdm(
self.list_groups(), desc="Copying sub-projects"
):
if "_hdf5" not in sub_project_name:
sub_project = self.open(sub_project_name)
destination_sub_project = destination.open(sub_project_name)
sub_project.copy_to(destination_sub_project)
for job_id in tqdm(self.get_job_ids(recursive=False), desc="Copying jobs"):
ham = self.load(job_id)
ham.copy_to(project=destination)
for file in tqdm(self.list_files(), desc="Copying files"):
if ".h5" not in file:
shutil.copy(os.path.join(self.path, file), destination.path)
return destination
else:
raise EnvironmentError("copy_to: is not available in Viewermode !")
def create_from_job(self, job_old, new_job_name):
"""
Create a new job from an existing pyiron job
Args:
job_old (GenericJob): Job to copy
new_job_name (str): New job name
Returns:
GenericJob: New job with the new job name.
"""
job_id = self.get_job_id(new_job_name)
if job_id is not None:
state.logger.info(
f"create_from_job: {new_job_name} has already job_id {job_id}!"
)
return None
print("job_old: ", job_old.status)
job_new = job_old.copy_to(
project=self,
new_job_name=new_job_name,
input_only=False,
new_database_entry=True,
)
state.logger.debug(
"create_job:: {} {} from id {}".format(
self.path, new_job_name, job_old.job_id
)
)
return job_new
def create_group(self, group):
"""
Create a new subproject/ group/ folder
Args:
group (str): name of the new project
Returns:
Project: New subproject
"""
new = self.copy()
return new.open(group, history=False)
@staticmethod
def create_job_class(
class_name,
write_input_funct,
collect_output_funct,
default_input_dict,
executable_str,
):
"""
Create a new job class based on pre-defined write_input() and collect_output() function plus a dictionary of
default inputs and an executable string.
Args:
class_name (str): A name for the newly created job class, so it is accessible via pr.create.job.<class_name>
write_input_funct (callable): The write input function write_input(input_dict, working_directory)
collect_output_funct (callable): The collect output function collect_output(working_directory)
default_input_dict (dict): Default input for the newly created job class
executable_str (str): Call to an external executable
Example:
>>> def write_input(input_dict, working_directory="."):
>>> with open(os.path.join(working_directory, "input_file"), "w") as f:
>>> f.write(str(input_dict["energy"]))
>>>
>>>
>>> def collect_output(working_directory="."):
>>> with open(os.path.join(working_directory, "output_file"), "r") as f:
>>> return {"energy": float(f.readline())}
>>>
>>>
>>> from pyiron_base import Project
>>> pr = Project("test")
>>> pr.create_job_class(
>>> class_name="CatJob",
>>> write_input_funct=write_input,
>>> collect_output_funct=collect_output,
>>> default_input_dict={"energy": 1.0},
>>> executable_str="cat input_file > output_file",
>>> )
>>> job = pr.create.job.CatJob(job_name="job_test")
>>> job.input["energy"] = 2.0
>>> job.run()
>>> job.output
"""
JOB_CLASS_DICT[class_name] = create_job_factory(
write_input_funct=write_input_funct,
collect_output_funct=collect_output_funct,
default_input_dict=default_input_dict,
executable_str=executable_str,
)
def create_job(self, job_type, job_name, delete_existing_job=False):
"""
Create one of the following jobs:
- 'ExampleJob': example job just generating random number
- 'SerialMaster': series of jobs run in serial
- 'ParallelMaster': series of jobs run in parallel
- 'ScriptJob': Python script or jupyter notebook job container
- 'ListMaster': list of jobs
Args:
job_type (str): job type can be ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob', 'ListMaster']
job_name (str): name of the job
delete_existing_job (bool): delete an existing job - default false
Returns:
GenericJob: job object depending on the job_type selected
"""
job_name = _get_safe_job_name(name=job_name)
job = JobType(
job_type,
project=ProjectHDFio(project=self.copy(), file_name=job_name),
job_name=job_name,
job_class_dict=self.job_type.job_class_dict,
delete_existing_job=delete_existing_job,
)
if self.user is not None:
job.user = self.user
return job
def create_table(self, job_name="table", delete_existing_job=False):
"""
Create pyiron table
Args:
job_name (str): job name of the pyiron table job
delete_existing_job (bool): Delete the existing table and run the analysis again.
Returns:
pyiron.table.datamining.TableJob
"""
table = self.create_job(
job_type=self.job_type.TableJob,
job_name=job_name,
delete_existing_job=delete_existing_job,
)
table.analysis_project = self
return table
def wrap_python_function(self, python_function):
"""
Create a pyiron job object from any python function
Args:
python_function (callable): python function to create a job object from
Returns:
pyiron_base.jobs.flex.pythonfunctioncontainer.PythonFunctionContainerJob: pyiron job object
Example:
>>> def test_function(a, b=8):
>>> return a+b
>>>
>>> from pyiron_base import Project
>>> pr = Project("test")
>>> job = pr.wrap_python_function(test_function)
>>> job.input["a"] = 4
>>> job.input["b"] = 5
>>> job.run()
>>> job.output
>>>
>>> test_function_wrapped = pr.wrap_python_function(test_function)
>>> test_function_wrapped(4, b=6)
"""
job = self.create.job.PythonFunctionContainerJob(
job_name=python_function.__name__
)
job.python_function = python_function
return job
def get_child_ids(self, job_specifier, project=None):
"""
Get the childs for a specific job
Args:
job_specifier (str, int): name of the job or job ID
project (Project): Project the job is located in - optional
Returns:
list: list of child IDs
"""
if project is None:
project = self.project_path
return get_child_ids(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=project,
job_specifier=job_specifier,
)
def get_db_columns(self):
"""
Get column names
Returns:
list: list of column names like:
['id',
'parentid',
'masterid',
'projectpath',
'project',
'job',
'subjob',
'chemicalformula',
'status',
'hamilton',
'hamversion',
'username',
'computer',
'timestart',
'timestop',
'totalcputime']
"""
return self.db.get_table_headings()
def get_jobs(self, recursive=True, columns=None):
"""
Internal function to return the jobs as dictionary rather than a pandas.Dataframe
Args:
recursive (bool): search subprojects [True/False]
columns (list): by default only the columns ['id', 'project'] are selected, but the user can select a subset
of ['id', 'status', 'chemicalformula', 'job', 'subjob', 'project', 'projectpath',
'timestart', 'timestop', 'totalcputime', 'computer', 'hamilton', 'hamversion', 'parentid',
'masterid']
Returns:
dict: columns are used as keys and point to a list of the corresponding values
"""
return get_jobs(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=self.project_path,
recursive=recursive,
columns=columns,
)
def get_job_ids(self, recursive=True):
"""
Return the job IDs matching a specific query
Args:
recursive (bool): search subprojects [True/False]
Returns:
list: a list of job IDs
"""
return get_job_ids(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=self.project_path,
recursive=recursive,
)
def get_job_id(self, job_specifier):
"""
get the job_id for job named job_name in the local project path from database
Args:
job_specifier (str, int): name of the job or job ID
Returns:
int: job ID of the job
"""
return get_job_id(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=self.project_path,
job_specifier=job_specifier,
)
def get_job_status(self, job_specifier, project=None):
"""
Get the status of a particular job
Args:
job_specifier (str, int): name of the job or job ID
project (Project): Project the job is located in - optional
Returns:
str: job status can be one of the following ['initialized', 'appended', 'created', 'submitted', 'running',
'aborted', 'collect', 'suspended', 'refresh', 'busy', 'finished']
"""
if project is None:
project = self.project_path
return get_job_status(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=project,
job_specifier=job_specifier,
)
def get_job_working_directory(self, job_specifier, project=None):
"""
Get the working directory of a particular job
Args:
job_specifier (str, int): name of the job or job ID
project (Project): Project the job is located in - optional
Returns:
str: working directory as absolute path
"""
if project is None:
project = self.project_path
return get_job_working_directory(
sql_query=self.sql_query,
user=self.user,
project_path=project,
database=self.db,
job_specifier=job_specifier,
)
@deprecate("use self.size instead.")
def get_project_size(self):
"""
Get the size of the project.
Returns:
float: project size
"""
return self.size
@deprecate("use maintenance.get_repository_status() instead.")
def get_repository_status(self):
return self.maintenance.get_repository_status()
def groups(self):
"""
Filter project by groups
Returns:
Project: a project which is filtered by groups
"""
new = self.copy()
new._filter = ["groups"]
return new
@property
def inspect(self):
return self._inspector
def iter_jobs(
self,
path: str = None,
recursive: bool = True,
convert_to_object: bool = True,
progress: bool = True,
**kwargs: dict,
) -> Generator:
"""
Iterate over the jobs within the current project and it is sub projects
Args:
path (str): HDF5 path inside each job object. (Default is None, which just uses the top level of the job's
HDF5 path.)
recursive (bool): search subprojects. (Default is True.)
convert_to_object (bool): load the full GenericJob object, else just return the HDF5 / JobCore object.
(Default is True, convert everything to the full python object.)
progress (bool): add an interactive progress bar to the iteration. (Default is True, show the bar.)
**kwargs (dict): Optional arguments for filtering with keys matching the project database column name
(eg. status="finished"). Asterisk can be used to denote a wildcard, for zero or more
instances of any character
Returns:
yield: Yield of GenericJob or JobCore
Note:
The default behavior of converting to object can cause **significant** slowdown in larger projects. In this
case, you may seriously wish to consider setting `convert_to_object=False` and access only the HDF5/JobCore
representation of the jobs instead.
"""
job_table = self.job_table(recursive=recursive, **kwargs)
if not isinstance(self.db, FileTable):
job_lst = [[job_id, None] for job_id in job_table["id"]]
else:
# From all the possible database columns, the following ones are removed:
# ["id", "chemicalformula", "timestart", "computer", "parentid",
# "username", "timestop", "totalcputime", "masterid"]
# because those are not used when running without database and can lead errors.
table_columns = [
"job",
"subjob",
"projectpath",
"project",
"status",
"hamilton",
"hamversion",
]
job_lst = [
[None, {column: db_entry[column] for column in table_columns}]
for db_entry in [row[1].to_dict() for row in job_table.iterrows()]
]
if progress:
job_lst = tqdm(job_lst)
for job_id, db_entry in job_lst:
if path is not None:
yield self.load_from_jobpath(
job_id=job_id,
db_entry=db_entry,
convert_to_object=False,
)[path]
else: # Backwards compatibility - in future the option convert_to_object should be removed
yield self.load_from_jobpath(
job_id=job_id,
db_entry=db_entry,
convert_to_object=convert_to_object,
)
def iter_output(self, recursive=True):
"""
Iterate over the output of jobs within the current project and it is sub projects
Args:
recursive (bool): search subprojects [True/False] - True by default
Returns:
yield: Yield of GenericJob or JobCore
"""
return self.iter_jobs(path="output", recursive=recursive)
def iter_groups(self, progress: bool = True) -> Generator:
"""
Iterate over the groups within the current project
Args:
progress (bool): Display a progress bar during the iteration
Yields:
:class:`.Project`: sub projects/ groups/ folders
"""
groups = self.list_groups()
if progress:
groups = tqdm(groups)
for group in groups:
if progress:
groups.set_postfix(group=group)
yield self[group]
def items(self):
"""
All items in the current project - this includes jobs, sub projects/ groups/ folders and any kind of files
Returns:
list: items in the project
"""
return [(key, self[key]) for key in self.keys()]
def update_from_remote(
self, recursive=True, ignore_exceptions=False, try_collecting=False
):
"""
Update jobs from the remote server
Args:
recursive (bool): search subprojects [True/False] - default=True
ignore_exceptions (bool): ignore eventual exceptions when retrieving jobs - default=False
Returns:
returns None if ignore_exceptions is False or when no error occured.
returns a list with job ids when errors occured, but were ignored
"""
return update_from_remote(
project=self,
recursive=recursive,
ignore_exceptions=ignore_exceptions,
try_collecting=try_collecting,
)
def job_table(
self,
recursive=True,
columns=None,
all_columns=True,
sort_by="id",
full_table=False,
element_lst=None,
job_name_contains="",
auto_refresh_job_status=False,
mode: Literal["regex", "glob"] = "glob",
**kwargs: dict,
):
"""
auto_refresh_job_status (bool): will automatically reload job status by calling refresh_job_status() upon calling job_table
"""
if not isinstance(self.db, FileTable) and auto_refresh_job_status:
self.refresh_job_status()
job_table = self.db.job_table(
sql_query=self.sql_query,
user=self.user,
project_path=self.project_path,
recursive=recursive,
columns=columns,
all_columns=all_columns,
sort_by=sort_by,
full_table=full_table,
element_lst=element_lst,
mode=mode,
**kwargs,
)
if not isinstance(self.db, FileTable) or not auto_refresh_job_status:
return job_table
else:
return self._refresh_job_status_file_table(df=job_table)
job_table.__doc__ = "\n".join(
[
ll
for ll in FileTable.job_table.__doc__.split("\n")
if not any(
[
item in ll
for item in ["sql_query (str)", "user (str)", "project_path (str)"]
]
)
]
)
def get_jobs_status(self, recursive=True, **kwargs):
"""
Gives a overview of all jobs status.
Args:
recursive (bool): search subprojects [True/False] - default=True
kwargs: passed directly to :method:`.job_table` and can be used to filter jobs you want to have the status
for
Returns:
pandas.Series: prints an overview of the job status.
"""
df = self.job_table(recursive=recursive, all_columns=True, **kwargs)
return df["status"].value_counts()
def keys(self):
"""
List of file-, folder- and objectnames
Returns:
list: list of the names of project directories and project nodes
"""
return self.list_dirs() + self.list_nodes()
def _list_all(self):
"""
Combination of list_groups(), list_nodes() and list_files() all in one dictionary with the corresponding keys:
- 'groups': Subprojects/ -folder/ -groups.
- 'nodes': Jobs or pyiron objects
- 'files': Files inside a project which do not belong to any pyiron object
Returns:
dict: dictionary with all items in the project
"""
return {
"groups": self.list_groups(),
"nodes": self.list_nodes(),
"files": self.list_files(),
}
def list_dirs(self, skip_hdf5=True):
"""
List directories inside the project
Args:
skip_hdf5 (bool): Skip directories which belong to a pyiron object/ pyiron job - default=True
Returns:
list: list of directory names
"""
if "groups" not in self._filter:
return []
files = set(next(os.walk(self.path))[2])
dirs = set(os.listdir(self.path)) - files
dirs = sorted([direct for direct in dirs if not (direct[0] == ".")])
if skip_hdf5:
return [d for d in dirs if not self._is_hdf5_dir(d)]
return dirs
def list_files(self, extension=None):
"""
List files inside the project
Args:
extension (str): filter by a specific extension
Returns:
list: list of file names
"""
if "nodes" not in self._filter:
return []
try:
files = next(os.walk(self.path))[2]
if extension is None:
return files
return [
".".join(f.split(".")[:-1])
for f in files
if f.split(".")[-1] in extension
]
except StopIteration:
return []
_list_groups = list_dirs
def _list_nodes(self, recursive=False):
"""
List nodes/ jobs/ pyiron objects inside the project
Args:
recursive (bool): search subprojects [True/False] - default=False
Returns:
list: list of nodes/ jobs/ pyiron objects inside the project
"""
if "nodes" not in self._filter:
return []
return self.get_jobs(recursive=recursive, columns=["job"])["job"]
@property
def load(self):
return self._loader
def load_from_jobpath(self, job_id=None, db_entry=None, convert_to_object=True):
"""
Internal function to load an existing job either based on the job ID or based on the database entry dictionary.
Args:
job_id (int/ None): Job ID - optional, but either the job_id or the db_entry is required.
db_entry (dict): database entry dictionary - optional, but either the job_id or the db_entry is required.
convert_to_object (bool): convert the object to an pyiron object or only access the HDF5 file - default=True
accessing only the HDF5 file is about an order of magnitude faster, but only
provides limited functionality. Compare the GenericJob object to JobCore object.
Returns:
GenericJob, JobCore: Either the full GenericJob object or just a reduced JobCore object
"""
from pyiron_base.jobs.job.path import JobPath
if job_id is not None:
job = JobPath.from_job_id(db=self.db, job_id=job_id)
if convert_to_object:
job = job.to_object()
job.reset_job_id(job_id=job_id)
job.set_input_to_read_only()
return job
elif db_entry is not None:
job = JobPath.from_db_entry(db_entry)
if convert_to_object:
job = job.to_object()
job.set_input_to_read_only()
return job
else:
raise ValueError("Either a job ID or an database entry has to be provided.")
def move_to(self, destination):
"""
Similar to the copy_to() function move the project object to a different pyiron path - including the content of
the project (all jobs). In order to move individual jobs, use `move_to` from the job objects.
Args:
destination (Project): project path to move the project content to
Returns:
Project: pointing to the new project path
"""
if not self.view_mode:
if not isinstance(destination, Project):
raise TypeError("A project can only be copied to another project.")
for sub_project_name in tqdm(
self.list_groups(), desc="Moving sub-projects"
):
if "_hdf5" not in sub_project_name:
sub_project = self.open(sub_project_name)
destination_sub_project = destination.open(sub_project_name)
sub_project.move_to(destination_sub_project)
for job_id in tqdm(self.get_job_ids(recursive=False), desc="Moving jobs"):
ham = self.load(job_id)
ham.move_to(destination)
for file in tqdm(self.list_files(), desc="Moving files"):
shutil.move(os.path.join(self.path, file), destination.path)
else:
raise EnvironmentError("move_to: is not available in Viewermode !")
def nodes(self):
"""
Filter project by nodes
Returns:
Project: a project which is filtered by nodes
"""
new = self.copy()
new._filter = ["nodes"]
return new
def queue_table(self, project_only=True, recursive=True, full_table=False):
"""
Display the queuing system table as pandas.Dataframe