/
Test_CheckpointAndExitAfterWallclock.cpp
151 lines (140 loc) · 6.72 KB
/
Test_CheckpointAndExitAfterWallclock.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Distributed under the MIT License.
// See LICENSE.txt for details.
#include "Framework/TestingFramework.hpp"
#include <optional>
#include <utility>
#include "DataStructures/DataBox/DataBox.hpp"
#include "Framework/TestCreation.hpp"
#include "Options/Protocols/FactoryCreation.hpp"
#include "Parallel/ExitCode.hpp"
#include "Parallel/GlobalCache.hpp"
#include "Parallel/Phase.hpp"
#include "Parallel/PhaseControl/CheckpointAndExitAfterWallclock.hpp"
#include "Parallel/PhaseControl/PhaseControlTags.hpp"
#include "ParallelAlgorithms/EventsAndTriggers/LogicalTriggers.hpp"
#include "ParallelAlgorithms/EventsAndTriggers/Trigger.hpp"
#include "Utilities/Gsl.hpp"
#include "Utilities/ProtocolHelpers.hpp"
#include "Utilities/TMPL.hpp"
struct Metavariables {
using component_list = tmpl::list<>;
struct factory_creation
: tt::ConformsTo<Options::protocols::FactoryCreation> {
using factory_classes = tmpl::map<
tmpl::pair<PhaseChange,
tmpl::list<PhaseControl::CheckpointAndExitAfterWallclock>>,
tmpl::pair<Trigger, tmpl::list<Triggers::Always>>>;
};
};
SPECTRE_TEST_CASE("Unit.Parallel.PhaseControl.CheckpointAndExitAfterWallclock",
"[Unit][Parallel]") {
// note that the `contribute_phase_data_impl` function is currently untested
// in this unit test, because we do not have good support for reductions in
// the action testing framework.
const auto created_phase_changes = TestHelpers::test_option_tag<
PhaseControl::OptionTags::PhaseChangeAndTriggers, Metavariables>(
" - - Always:\n"
" - - CheckpointAndExitAfterWallclock:\n"
" WallclockHours: 0.0");
Parallel::GlobalCache<Metavariables> cache{};
using PhaseChangeDecisionData = tuples::tagged_tuple_from_typelist<
PhaseControl::get_phase_change_tags<Metavariables>>;
const PhaseControl::CheckpointAndExitAfterWallclock phase_change0(0.0);
const PhaseControl::CheckpointAndExitAfterWallclock phase_change1(1.0);
{
INFO("Test initialize phase change decision data");
PhaseChangeDecisionData phase_change_decision_data{
Parallel::Phase::Execute, true, 1.0, true,
Parallel::ExitCode::Complete};
phase_change0.initialize_phase_data<Metavariables>(
make_not_null(&phase_change_decision_data));
// extra parens in the check prevent Catch from trying to stream the tuple
CHECK((phase_change_decision_data ==
PhaseChangeDecisionData{std::nullopt, std::nullopt, false, true,
Parallel::ExitCode::Complete}));
}
{
INFO("Wallclock time < big trigger time");
// Check behavior when a checkpoint-and-exit has been requested
// First check case where wallclock time < trigger wallclock time, using
// the PhaseChange with a big trigger time.
// (this assumes the test doesn't take 1h to get here)
PhaseChangeDecisionData phase_change_decision_data{
std::nullopt, std::nullopt, true, true, Parallel::ExitCode::Complete};
const auto decision_result = phase_change1.arbitrate_phase_change(
make_not_null(&phase_change_decision_data), Parallel::Phase::Execute,
cache);
CHECK((decision_result == std::nullopt));
CHECK((phase_change_decision_data ==
PhaseChangeDecisionData{std::nullopt, std::nullopt, false, true,
Parallel::ExitCode::Complete}));
}
{
INFO("Wallclock time > small trigger time");
// Now check case where wallclock time > trigger wallclock time, using
// the PhaseChange with a tiny trigger time.
// (this assumes the test takes at least a few cycles to get here)
PhaseChangeDecisionData phase_change_decision_data{
std::nullopt, std::nullopt, true, true, Parallel::ExitCode::Complete};
const auto decision_result = phase_change0.arbitrate_phase_change(
make_not_null(&phase_change_decision_data), Parallel::Phase::Execute,
cache);
CHECK((decision_result ==
std::make_pair(
Parallel::Phase::WriteCheckpoint,
PhaseControl::ArbitrationStrategy::RunPhaseImmediately)));
// It's impossible to know what the elapsed wallclock time will be, so we
// check the tags one by one...
CHECK((tuples::get<PhaseControl::Tags::RestartPhase>(
phase_change_decision_data) == Parallel::Phase::Execute));
// Check recorded time in range: 0 second < time < 1 second
// (this assumes test run duration falls in this time window)
CHECK(tuples::get<PhaseControl::Tags::WallclockHoursAtCheckpoint>(
phase_change_decision_data) > 0.0);
const double one_second = 1.0 / 3600.0;
CHECK(tuples::get<PhaseControl::Tags::WallclockHoursAtCheckpoint>(
phase_change_decision_data) < one_second);
CHECK(tuples::get<PhaseControl::Tags::CheckpointAndExitRequested>(
phase_change_decision_data) == false);
}
{
INFO("Restarting from checkpoint");
// Check behavior following the checkpoint phase
// First check case where wallclock time < recorded time, which corresponds
// to restarting from a checkpoint.
// (this assumes the test doesn't take 1h to get here)
PhaseChangeDecisionData phase_change_decision_data{
Parallel::Phase::Execute, 1.0, false, true,
Parallel::ExitCode::Complete};
const auto decision_result = phase_change0.arbitrate_phase_change(
make_not_null(&phase_change_decision_data),
Parallel::Phase::WriteCheckpoint, cache);
CHECK((decision_result ==
std::make_pair(
Parallel::Phase::Execute,
PhaseControl::ArbitrationStrategy::PermitAdditionalJumps)));
CHECK((phase_change_decision_data ==
PhaseChangeDecisionData{std::nullopt, std::nullopt, false, true,
Parallel::ExitCode::Complete}));
}
{
INFO("Exiting after checkpoint");
// Now check case where wallclock time > recorded time, which corresponds to
// having just written a checkpoint. We want to exit with exit code 2 now.
// (this assumes the test takes at least a few cycles to get here)
PhaseChangeDecisionData phase_change_decision_data{
Parallel::Phase::Execute, 1e-15, false, true,
Parallel::ExitCode::Complete};
const auto decision_result = phase_change0.arbitrate_phase_change(
make_not_null(&phase_change_decision_data),
Parallel::Phase::WriteCheckpoint, cache);
CHECK((decision_result ==
std::make_pair(
Parallel::Phase::Exit,
PhaseControl::ArbitrationStrategy::RunPhaseImmediately)));
CHECK(
(phase_change_decision_data ==
PhaseChangeDecisionData{Parallel::Phase::Execute, 1e-15, false, true,
Parallel::ExitCode::ContinueFromCheckpoint}));
}
}