-
Notifications
You must be signed in to change notification settings - Fork 13
/
generic.py
1515 lines (1333 loc) · 56.3 KB
/
generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# coding: utf-8
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.
"""
Generic Job class extends the JobCore class with all the functionality to run the job object.
"""
from concurrent.futures import Future
from datetime import datetime
import os
import posixpath
import signal
import warnings
from pyiron_base.state import state
from pyiron_base.state.signal import catch_signals
from pyiron_base.jobs.job.extension.executable import Executable
from pyiron_base.jobs.job.extension.jobstatus import JobStatus
from pyiron_base.jobs.job.core import (
JobCore,
_doc_str_job_core_args,
_doc_str_job_core_attr,
)
from pyiron_base.jobs.job.runfunction import (
run_job_with_parameter_repair,
run_job_with_status_initialized,
run_job_with_status_created,
run_job_with_status_submitted,
run_job_with_status_running,
run_job_with_status_refresh,
run_job_with_status_busy,
run_job_with_status_collect,
run_job_with_status_suspended,
run_job_with_status_finished,
run_job_with_runmode_modal,
run_job_with_runmode_queue,
execute_job_with_external_executable,
)
from pyiron_base.jobs.job.util import (
_copy_restart_files,
_kill_child,
_job_store_before_copy,
_job_reload_after_copy,
)
from pyiron_base.utils.instance import static_isinstance
from pyiron_base.utils.deprecate import deprecate
from pyiron_base.jobs.job.extension.server.generic import Server
from pyiron_base.database.filetable import FileTable
from pyiron_base.storage.helper_functions import write_hdf5, read_hdf5
from pyiron_base.interfaces.has_dict import HasDict
__author__ = "Joerg Neugebauer, Jan Janssen"
__copyright__ = (
"Copyright 2020, Max-Planck-Institut für Eisenforschung GmbH - "
"Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Jan Janssen"
__email__ = "janssen@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"
# Modular Docstrings
_doc_str_generic_job_attr = (
_doc_str_job_core_attr
+ "\n"
+ """\
.. attribute:: version
Version of the hamiltonian, which is also the version of the executable unless a custom executable is used.
.. attribute:: executable
Executable used to run the job - usually the path to an external executable.
.. attribute:: library_activated
For job types which offer a Python library pyiron can use the python library instead of an external
executable.
.. attribute:: server
Server object to handle the execution environment for the job.
.. attribute:: queue_id
the ID returned from the queuing system - it is most likely not the same as the job ID.
.. attribute:: logger
logger object to monitor the external execution and internal pyiron warnings.
.. attribute:: restart_file_list
list of files which are used to restart the calculation from these files.
.. attribute:: exclude_nodes_hdf
list of nodes which are excluded from storing in the hdf5 file.
.. attribute:: exclude_groups_hdf
list of groups which are excluded from storing in the hdf5 file.
.. attribute:: job_type
Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster',
'ScriptJob', 'ListMaster']
"""
)
class GenericJob(JobCore, HasDict):
__doc__ = (
"""
Generic Job class extends the JobCore class with all the functionality to run the job object. From this class
all specific job types are derived. Therefore it should contain the properties/routines common to all jobs.
The functions in this module should be as generic as possible.
Sub classes that need to add special behavior after :method:`.copy_to()` can override
:method:`._after_generic_copy_to()`.
"""
+ "\n"
+ _doc_str_job_core_args
+ "\n"
+ _doc_str_generic_job_attr
)
def __init__(self, project, job_name):
super(GenericJob, self).__init__(project, job_name)
self.__name__ = type(self).__name__
self.__version__ = "0.4"
self.__hdf_version__ = "0.1.0"
self._server = Server()
self._logger = state.logger
self._executable = None
if not state.database.database_is_disabled:
self._status = JobStatus(db=project.db, job_id=self.job_id)
self.refresh_job_status()
elif os.path.exists(self.project_hdf5.file_name):
initial_status = read_hdf5(
# in most cases self.project_hdf5.h5_path == / + self.job_name but not for child jobs of GenericMasters
self.project_hdf5.file_name,
self.project_hdf5.h5_path + "/status",
)
self._status = JobStatus(initial_status=initial_status)
if "job_id" in self.list_nodes():
self._job_id = read_hdf5(
# in most cases self.project_hdf5.h5_path == / + self.job_name but not for child jobs of GenericMasters
self.project_hdf5.file_name,
self.project_hdf5.h5_path + "/job_id",
)
else:
self._status = JobStatus()
self._restart_file_list = list()
self._restart_file_dict = dict()
self._exclude_nodes_hdf = list()
self._exclude_groups_hdf = list()
self._process = None
self._compress_by_default = False
self._python_only_job = False
self._write_work_dir_warnings = True
self.interactive_cache = None
self.error = GenericError(job=self)
@property
def version(self):
"""
Get the version of the hamiltonian, which is also the version of the executable unless a custom executable is
used.
Returns:
str: version number
"""
if self.__version__:
return self.__version__
else:
self._executable_activate()
if self._executable is not None:
return self._executable.version
else:
return None
@version.setter
def version(self, new_version):
"""
Set the version of the hamiltonian, which is also the version of the executable unless a custom executable is
used.
Args:
new_version (str): version
"""
self._executable_activate()
self._executable.version = new_version
@property
def executable(self):
"""
Get the executable used to run the job - usually the path to an external executable.
Returns:
(str/pyiron_base.job.executable.Executable): exectuable path
"""
self._executable_activate()
return self._executable
@executable.setter
def executable(self, exe):
"""
Set the executable used to run the job - usually the path to an external executable.
Args:
exe (str): executable path, if no valid path is provided an executable is chosen based on version.
"""
self._executable_activate()
self._executable.executable_path = exe
@property
def server(self):
"""
Get the server object to handle the execution environment for the job.
Returns:
Server: server object
"""
return self._server
@server.setter
def server(self, server):
"""
Set the server object to handle the execution environment for the job.
Args:
server (Server): server object
"""
self._server = server
@property
def queue_id(self):
"""
Get the queue ID, the ID returned from the queuing system - it is most likely not the same as the job ID.
Returns:
int: queue ID
"""
return self.server.queue_id
@queue_id.setter
def queue_id(self, qid):
"""
Set the queue ID, the ID returned from the queuing system - it is most likely not the same as the job ID.
Args:
qid (int): queue ID
"""
self.server.queue_id = qid
@property
def logger(self):
"""
Get the logger object to monitor the external execution and internal pyiron warnings.
Returns:
logging.getLogger(): logger object
"""
return self._logger
@property
def restart_file_list(self):
"""
Get the list of files which are used to restart the calculation from these files.
Returns:
list: list of files
"""
return self._restart_file_list
@restart_file_list.setter
def restart_file_list(self, filenames):
"""
Append new files to the restart file list - the list of files which are used to restart the calculation from.
Args:
filenames (list):
"""
for f in filenames:
if not (os.path.isfile(f)):
raise IOError("File: {} does not exist".format(f))
self.restart_file_list.append(f)
@property
def restart_file_dict(self):
"""
A dictionary of the new name of the copied restart files
"""
for actual_name in [os.path.basename(f) for f in self._restart_file_list]:
if actual_name not in self._restart_file_dict.keys():
self._restart_file_dict[actual_name] = actual_name
return self._restart_file_dict
@restart_file_dict.setter
def restart_file_dict(self, val):
if not isinstance(val, dict):
raise ValueError("restart_file_dict should be a dictionary!")
else:
self._restart_file_dict = val
@property
def exclude_nodes_hdf(self):
"""
Get the list of nodes which are excluded from storing in the hdf5 file
Returns:
nodes(list)
"""
return self._exclude_nodes_hdf
@exclude_nodes_hdf.setter
def exclude_nodes_hdf(self, val):
if isinstance(val, str):
val = [val]
elif not hasattr(val, "__len__"):
raise ValueError("Wrong type of variable.")
self._exclude_nodes_hdf = val
@property
def exclude_groups_hdf(self):
"""
Get the list of groups which are excluded from storing in the hdf5 file
Returns:
groups(list)
"""
return self._exclude_groups_hdf
@exclude_groups_hdf.setter
def exclude_groups_hdf(self, val):
if isinstance(val, str):
val = [val]
elif not hasattr(val, "__len__"):
raise ValueError("Wrong type of variable.")
self._exclude_groups_hdf = val
@property
def job_type(self):
"""
Job type object with all the available job types: ['ExampleJob', 'SerialMaster', 'ParallelMaster', 'ScriptJob',
'ListMaster']
Returns:
JobTypeChoice: Job type object
"""
return self.project.job_type
@property
def working_directory(self):
"""
Get the working directory of the job is executed in - outside the HDF5 file. The working directory equals the
path but it is represented by the filesystem:
/absolute/path/to/the/file.h5/path/inside/the/hdf5/file
becomes:
/absolute/path/to/the/file_hdf5/path/inside/the/hdf5/file
Returns:
str: absolute path to the working directory
"""
if self._import_directory is not None:
return self._import_directory
elif not self.project_hdf5.working_directory:
self._create_working_directory()
return self.project_hdf5.working_directory
def collect_logfiles(self):
"""
Collect the log files of the external executable and store the information in the HDF5 file. This method has
to be implemented in the individual hamiltonians.
"""
pass
def write_input(self):
"""
Write the input files for the external executable. This method has to be implemented in the individual
hamiltonians.
"""
if (
state.settings.configuration["write_work_dir_warnings"]
and self._write_work_dir_warnings
and not self._python_only_job
):
with open(
os.path.join(self.working_directory, "WARNING_pyiron_modified_content"),
"w",
) as f:
f.write(
"Files in this directory are intended to be written and read by pyiron. \n\n"
"pyiron may transform user input to enhance performance, thus, use these files with care!\n"
"Consult the log and/or the documentation to gain further information.\n\n"
"To disable writing these warning files, specify \n"
"WRITE_WORK_DIR_WARNINGS=False in the .pyiron configuration file (or set the "
"PYIRONWRITEWORKDIRWARNINGS environment variable accordingly)."
)
def collect_output(self):
"""
Collect the output files of the external executable and store the information in the HDF5 file. This method has
to be implemented in the individual hamiltonians.
"""
raise NotImplementedError(
"read procedure must be defined for derived Hamilton!"
)
def suspend(self):
"""
Suspend the job by storing the object and its state persistently in HDF5 file and exit it.
"""
self.to_hdf()
self.status.suspended = True
self._logger.info(
"{}, status: {}, job has been suspended".format(
self.job_info_str, self.status
)
)
self.clear_job()
def refresh_job_status(self):
"""
Refresh job status by updating the job status with the status from the database if a job ID is available.
"""
if self.job_id:
self._status = JobStatus(
initial_status=self.project.db.get_job_status(self.job_id),
db=self.project.db,
job_id=self.job_id,
)
elif state.database.database_is_disabled:
self._status = JobStatus(
initial_status=read_hdf5(
self.project_hdf5.file_name, self.job_name + "/status"
)
)
if (
isinstance(self.server.future, Future)
and not self.status.finished
and self.server.future.done()
):
if self.server.future.cancelled():
self.status.aborted = True
else:
self.status.finished = True
def clear_job(self):
"""
Convenience function to clear job info after suspend. Mimics deletion of all the job info after suspend in a
local test environment.
"""
del self.__name__
del self.__version__
del self._executable
del self._server
del self._logger
del self._import_directory
del self._status
del self._restart_file_list
del self._restart_file_dict
def copy(self):
"""
Copy the GenericJob object which links to the job and its HDF5 file
Returns:
GenericJob: New GenericJob object pointing to the same job
"""
# Store all job arguments in the HDF5 file
delete_file_after_copy = _job_store_before_copy(job=self)
# Copy Python object - super().copy() causes recursion error for serial master
copied_self = self.__class__(
job_name=self.job_name, project=self.project_hdf5.open("..")
)
copied_self.reset_job_id()
# Reload object from HDF5 file
_job_reload_after_copy(
job=copied_self, delete_file_after_copy=delete_file_after_copy
)
# Copy executor - it cannot be copied and is just linked instead
if self.server.executor is not None:
copied_self.server.executor = self.server.executor
if self.server.future is not None and not self.server.future.done():
raise RuntimeError(
"Jobs whose server has executor and future attributes cannot be copied unless the future is `done()`"
)
return copied_self
def _internal_copy_to(
self,
project=None,
new_job_name=None,
new_database_entry=True,
copy_files=True,
delete_existing_job=False,
):
# Store all job arguments in the HDF5 file
delete_file_after_copy = _job_store_before_copy(job=self)
# Call the copy_to() function defined in the JobCore
new_job_core, file_project, hdf5_project, reloaded = super(
GenericJob, self
)._internal_copy_to(
project=project,
new_job_name=new_job_name,
new_database_entry=new_database_entry,
copy_files=copy_files,
delete_existing_job=delete_existing_job,
)
if reloaded:
return new_job_core, file_project, hdf5_project, reloaded
# Reload object from HDF5 file
if not static_isinstance(
obj=project.__class__, obj_type="pyiron_base.jobs.job.core.JobCore"
):
_job_reload_after_copy(
job=new_job_core, delete_file_after_copy=delete_file_after_copy
)
if delete_file_after_copy:
self.project_hdf5.remove_file()
return new_job_core, file_project, hdf5_project, reloaded
def copy_to(
self,
project=None,
new_job_name=None,
input_only=False,
new_database_entry=True,
delete_existing_job=False,
copy_files=True,
):
"""
Copy the content of the job including the HDF5 file to a new location.
Args:
project (JobCore/ProjectHDFio/Project/None): The project to copy the job to.
(Default is None, use the same project.)
new_job_name (str): The new name to assign the duplicate job. Required if the project is `None` or the same
project as the copied job. (Default is None, try to keep the same name.)
input_only (bool): [True/False] Whether to copy only the input. (Default is False.)
new_database_entry (bool): [True/False] Whether to create a new database entry. If input_only is True then
new_database_entry is False. (Default is True.)
delete_existing_job (bool): [True/False] Delete existing job in case it exists already (Default is False.)
copy_files (bool): If True copy all files the working directory of the job, too
Returns:
GenericJob: GenericJob object pointing to the new location.
"""
# Update flags
if input_only and new_database_entry:
warnings.warn(
"input_only conflicts new_database_entry; setting new_database_entry=False"
)
new_database_entry = False
# Call the copy_to() function defined in the JobCore
new_job_core, file_project, hdf5_project, reloaded = self._internal_copy_to(
project=project,
new_job_name=new_job_name,
new_database_entry=new_database_entry,
copy_files=copy_files,
delete_existing_job=delete_existing_job,
)
# Remove output if it should not be copied
if input_only:
for group in new_job_core.project_hdf5.list_groups():
if "output" in group:
del new_job_core.project_hdf5[
posixpath.join(new_job_core.project_hdf5.h5_path, group)
]
new_job_core.status.initialized = True
new_job_core._after_generic_copy_to(
self, new_database_entry=new_database_entry, reloaded=reloaded
)
return new_job_core
def _after_generic_copy_to(self, original, new_database_entry, reloaded):
"""
Called in :method:`.copy_to()` after :method`._internal_copy_to()` to allow sub classes to modify copy behavior.
Args:
original (:class:`.GenericJob`): job that this job was copied from
new_database_entry (bool): Whether to create a new database entry was created.
reloaded (bool): True if this job was reloaded instead of copied.
"""
pass
def copy_file_to_working_directory(self, file):
"""
Copy a specific file to the working directory before the job is executed.
Args:
file (str): path of the file to be copied.
"""
if os.path.isabs(file):
self.restart_file_list.append(file)
else:
self.restart_file_list.append(os.path.abspath(file))
def copy_template(self, project=None, new_job_name=None):
"""
Copy the content of the job including the HDF5 file but without the output data to a new location
Args:
project (JobCore/ProjectHDFio/Project/None): The project to copy the job to.
(Default is None, use the same project.)
new_job_name (str): The new name to assign the duplicate job. Required if the project is `None` or the same
project as the copied job. (Default is None, try to keep the same name.)
Returns:
GenericJob: GenericJob object pointing to the new location.
"""
return self.copy_to(
project=project,
new_job_name=new_job_name,
input_only=True,
new_database_entry=False,
)
def remove(self, _protect_childs=True):
"""
Remove the job - this removes the HDF5 file, all data stored in the HDF5 file an the corresponding database entry.
Args:
_protect_childs (bool): [True/False] by default child jobs can not be deleted, to maintain the consistency
- default=True
"""
if isinstance(self.server.future, Future) and not self.server.future.done():
self.server.future.cancel()
super().remove(_protect_childs=_protect_childs)
def remove_child(self):
"""
internal function to remove command that removes also child jobs.
Do never use this command, since it will destroy the integrity of your project.
"""
_kill_child(job=self)
super(GenericJob, self).remove_child()
def remove_and_reset_id(self, _protect_childs=True):
if self.job_id is not None:
master_id, parent_id = self.master_id, self.parent_id
self.remove(_protect_childs=_protect_childs)
self.reset_job_id()
self.master_id, self.parent_id = master_id, parent_id
else:
self.remove(_protect_childs=_protect_childs)
def kill(self):
if self.status.running or self.status.submitted:
self.remove_and_reset_id()
else:
raise ValueError(
"The kill() function is only available during the execution of the job."
)
def validate_ready_to_run(self):
"""
Validate that the calculation is ready to be executed. By default no generic checks are performed, but one could
check that the input information is complete or validate the consistency of the input at this point.
Raises:
ValueError: if ready check is unsuccessful
"""
pass
def check_setup(self):
"""
Checks whether certain parameters (such as plane wave cutoff radius in DFT) are changed from the pyiron standard
values to allow for a physically meaningful results. This function is called manually or only when the job is
submitted to the queueing system.
"""
pass
def reset_job_id(self, job_id=None):
"""
Reset the job id sets the job_id to None in the GenericJob as well as all connected modules like JobStatus.
"""
super().reset_job_id(job_id=job_id)
self._status = JobStatus(db=self.project.db, job_id=self._job_id)
@deprecate(
run_again="Either delete the job via job.remove() or use delete_existing_job=True.",
version="0.4.0",
)
def run(
self,
delete_existing_job=False,
repair=False,
debug=False,
run_mode=None,
run_again=False,
):
"""
This is the main run function, depending on the job status ['initialized', 'created', 'submitted', 'running',
'collect','finished', 'refresh', 'suspended'] the corresponding run mode is chosen.
Args:
delete_existing_job (bool): Delete the existing job and run the simulation again.
repair (bool): Set the job status to created and run the simulation again.
debug (bool): Debug Mode - defines the log level of the subprocess the job is executed in.
run_mode (str): ['modal', 'non_modal', 'queue', 'manual'] overwrites self.server.run_mode
run_again (bool): Same as delete_existing_job (deprecated)
"""
with catch_signals(self.signal_intercept):
if run_again:
delete_existing_job = True
try:
self._logger.info(
"run {}, status: {}".format(self.job_info_str, self.status)
)
status = self.status.string
if run_mode is not None:
self.server.run_mode = run_mode
if delete_existing_job:
status = "initialized"
self.remove_and_reset_id(_protect_childs=False)
if repair and self.job_id and not self.status.finished:
self._run_if_repair()
elif status == "initialized":
self._run_if_new(debug=debug)
elif status == "created":
self._run_if_created()
elif status == "submitted":
run_job_with_status_submitted(job=self)
elif status == "running":
self._run_if_running()
elif status == "collect":
self._run_if_collect()
elif status == "suspend":
self._run_if_suspended()
elif status == "refresh":
self.run_if_refresh()
elif status == "busy":
self._run_if_busy()
elif status == "finished":
run_job_with_status_finished(job=self)
elif status == "aborted":
raise ValueError(
"Running an aborted job with `delete_existing_job=False` is meaningless."
)
except Exception:
self.drop_status_to_aborted()
raise
def run_if_modal(self):
"""
The run if modal function is called by run to execute the simulation, while waiting for the output. For this we
use subprocess.check_output()
"""
run_job_with_runmode_modal(job=self)
def run_static(self):
"""
The run static function is called by run to execute the simulation.
"""
execute_job_with_external_executable(job=self)
def run_if_scheduler(self):
"""
The run if queue function is called by run if the user decides to submit the job to and queing system. The job
is submitted to the queuing system using subprocess.Popen()
Returns:
int: Returns the queue ID for the job.
"""
return run_job_with_runmode_queue(job=self)
def transfer_from_remote(self):
state.queue_adapter.get_job_from_remote(
working_directory="/".join(self.working_directory.split("/")[:-1]),
)
state.queue_adapter.transfer_file_to_remote(
file=self.project_hdf5.file_name,
transfer_back=True,
)
if state.database.database_is_disabled:
self.project.db.update()
else:
ft = FileTable(index_from=self.project_hdf5.path + "_hdf5/")
df = ft.job_table(
sql_query=None,
user=state.settings.login_user,
project_path=None,
all_columns=True,
)
db_dict_lst = []
for j, st, sj, p, h, hv, c, ts, tp, tc in zip(
df.job.values,
df.status.values,
df.subjob.values,
df.project.values,
df.hamilton.values,
df.hamversion.values,
df.computer.values,
df.timestart.values,
df.timestop.values,
df.totalcputime.values,
):
gp = self.project._convert_str_to_generic_path(p)
db_dict_lst.append(
{
"username": state.settings.login_user,
"projectpath": gp.root_path,
"project": gp.project_path,
"job": j,
"subjob": sj,
"hamversion": hv,
"hamilton": h,
"status": st,
"computer": c,
"timestart": datetime.utcfromtimestamp(ts.tolist() / 1e9),
"timestop": datetime.utcfromtimestamp(tp.tolist() / 1e9),
"totalcputime": tc,
"masterid": self.master_id,
"parentid": None,
}
)
_ = [self.project.db.add_item_dict(d) for d in db_dict_lst]
self.status.string = self.project_hdf5["status"]
if self.master_id is not None:
self._reload_update_master(project=self.project, master_id=self.master_id)
def run_if_interactive(self):
"""
For jobs which executables are available as Python library, those can also be executed with a library call
instead of calling an external executable. This is usually faster than a single core python job.
"""
raise NotImplementedError(
"This function needs to be implemented in the specific class."
)
def run_if_interactive_non_modal(self):
"""
For jobs which executables are available as Python library, those can also be executed with a library call
instead of calling an external executable. This is usually faster than a single core python job.
"""
raise NotImplementedError(
"This function needs to be implemented in the specific class."
)
def interactive_close(self):
"""
For jobs which executables are available as Python library, those can also be executed with a library call
instead of calling an external executable. This is usually faster than a single core python job. After the
interactive execution, the job can be closed using the interactive_close function.
"""
raise NotImplementedError(
"This function needs to be implemented in the specific class."
)
def interactive_fetch(self):
"""
For jobs which executables are available as Python library, those can also be executed with a library call
instead of calling an external executable. This is usually faster than a single core python job. To access the
output data during the execution the interactive_fetch function is used.
"""
raise NotImplementedError(
"This function needs to be implemented in the specific class."
)
def interactive_flush(self, path="generic", include_last_step=True):
"""
For jobs which executables are available as Python library, those can also be executed with a library call
instead of calling an external executable. This is usually faster than a single core python job. To write the
interactive cache to the HDF5 file the interactive flush function is used.
"""
raise NotImplementedError(
"This function needs to be implemented in the specific class."
)
def send_to_database(self):
"""
if the jobs should be store in the external/public database this could be implemented here, but currently it is
just a placeholder.
"""
if self.server.send_to_db:
pass
def _init_child_job(self, parent):
"""
Finalize job initialization when job instance is created as a child from another one.
Master jobs use this to set their own reference job, when created from that reference job.
Args:
parent (:class:`.GenericJob`): job instance that this job was created from
"""
pass
def create_job(self, job_type, job_name, delete_existing_job=False):
"""
Create one of the following jobs:
- 'StructureContainer’:
- ‘StructurePipeline’:
- ‘AtomisticExampleJob’: example job just generating random number
- ‘ExampleJob’: example job just generating random number
- ‘Lammps’:
- ‘KMC’:
- ‘Sphinx’:
- ‘Vasp’:
- ‘GenericMaster’:
- ‘SerialMaster’: series of jobs run in serial
- ‘AtomisticSerialMaster’:
- ‘ParallelMaster’: series of jobs run in parallel
- ‘KmcMaster’:
- ‘ThermoLambdaMaster’:
- ‘RandomSeedMaster’:
- ‘MeamFit’:
- ‘Murnaghan’:
- ‘MinimizeMurnaghan’:
- ‘ElasticMatrix’:
- ‘ConvergenceVolume’:
- ‘ConvergenceEncutParallel’:
- ‘ConvergenceKpointParallel’:
- ’PhonopyMaster’:
- ‘DefectFormationEnergy’:
- ‘LammpsASE’:
- ‘PipelineMaster’:
- ’TransformationPath’:
- ‘ThermoIntEamQh’:
- ‘ThermoIntDftEam’:
- ‘ScriptJob’: Python script or jupyter notebook job container
- ‘ListMaster': list of jobs
Args:
job_type (str): job type can be ['StructureContainer’, ‘StructurePipeline’, ‘AtomisticExampleJob’,
‘ExampleJob’, ‘Lammps’, ‘KMC’, ‘Sphinx’, ‘Vasp’, ‘GenericMaster’,
‘SerialMaster’, ‘AtomisticSerialMaster’, ‘ParallelMaster’, ‘KmcMaster’,
‘ThermoLambdaMaster’, ‘RandomSeedMaster’, ‘MeamFit’, ‘Murnaghan’,
‘MinimizeMurnaghan’, ‘ElasticMatrix’, ‘ConvergenceVolume’,
‘ConvergenceEncutParallel’, ‘ConvergenceKpointParallel’, ’PhonopyMaster’,
‘DefectFormationEnergy’, ‘LammpsASE’, ‘PipelineMaster’,
’TransformationPath’, ‘ThermoIntEamQh’, ‘ThermoIntDftEam’, ‘ScriptJob’,
‘ListMaster']
job_name (str): name of the job
delete_existing_job (bool): delete an existing job - default false
Returns:
GenericJob: job object depending on the job_type selected
"""
job = self.project.create_job(
job_type=job_type,
job_name=job_name,
delete_existing_job=delete_existing_job,
)
job._init_child_job(self)
return job
def update_master(self, force_update=False):
"""
After a job is finished it checks whether it is linked to any metajob - meaning the master ID is pointing to
this jobs job ID. If this is the case and the master job is in status suspended - the child wakes up the master
job, sets the status to refresh and execute run on the master job. During the execution the master job is set to
status refresh. If another child calls update_master, while the master is in refresh the status of the master is
set to busy and if the master is in status busy at the end of the update_master process another update is
triggered.
Args:
force_update (bool): Whether to check run mode for updating master
"""
if not state.database.database_is_disabled:
master_id = self.master_id
project = self.project
self._logger.info(
"update master: {} {} {}".format(
master_id, self.get_job_id(), self.server.run_mode
)
)
if master_id is not None and (
force_update
or not (
self.server.run_mode.thread
or self.server.run_mode.modal
or self.server.run_mode.interactive
or self.server.run_mode.worker
)
):
self._reload_update_master(project=project, master_id=master_id)
def job_file_name(self, file_name, cwd=None):
"""
combine the file name file_name with the path of the current working directory
Args:
file_name (str): name of the file
cwd (str): current working directory - this overwrites self.project_hdf5.working_directory - optional
Returns:
str: absolute path to the file in the current working directory
"""
if cwd is None:
cwd = self.project_hdf5.working_directory
return posixpath.join(cwd, file_name)