-
Notifications
You must be signed in to change notification settings - Fork 122
/
SubmitRemoteJob.cpp
123 lines (97 loc) · 4.85 KB
/
SubmitRemoteJob.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
// NScD Oak Ridge National Laboratory, European Spallation Source,
// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
// SPDX - License - Identifier: GPL - 3.0 +
#include "MantidRemoteAlgorithms/SubmitRemoteJob.h"
#include "MantidKernel/BoundedValidator.h"
#include "MantidKernel/ConfigService.h"
#include "MantidKernel/Exception.h"
#include "MantidKernel/FacilityInfo.h"
#include "MantidKernel/ListValidator.h"
#include "MantidKernel/MandatoryValidator.h"
#include "MantidKernel/RemoteJobManager.h"
#include "MantidKernel/SimpleJSON.h"
#include <memory>
namespace Mantid {
namespace RemoteAlgorithms {
// Register the algorithm into the AlgorithmFactory
DECLARE_ALGORITHM(SubmitRemoteJob)
using namespace Mantid::Kernel;
// using namespace Mantid::API;
// using namespace Mantid::Geometry;
// A reference to the logger is provided by the base class, it is called g_log.
void SubmitRemoteJob::init() {
// Unlike most algorithms, this wone doesn't deal with workspaces....
auto mustBePositive = std::make_shared<BoundedValidator<int>>();
mustBePositive->setLower(0);
auto requireValue = std::make_shared<MandatoryValidator<std::string>>();
// Compute Resources
std::vector<std::string> computes = Mantid::Kernel::ConfigService::Instance().getFacility().computeResources();
declareProperty("ComputeResource", "", std::make_shared<StringListValidator>(computes),
"The name of the remote computer to submit the job to", Direction::Input);
// Note: these 2 properties are 'implementation specific'. We know that Fermi
// needs them, but we really
// ought to query the information URL before requiring them.
declareProperty("NumNodes", 0, mustBePositive, "The number of compute nodes the job requires", Direction::Input);
declareProperty("CoresPerNode", 0, mustBePositive, "The number of processes to start on each compute node",
Direction::Input);
// Number of actual MPI processes will be (NumNodes * CoresPerNode)
// This is just an easy way to reference remote jobs (such as when we display
// a list of
// all the jobs the user has submitted recently...)
declareProperty("TaskName", std::string(""), "A short name for the job.", Direction::Input);
// The transaction ID comes from the StartRemoteTransaction algortithm
declareProperty("TransactionID", "", requireValue, "The transaction ID to associate with this job", Direction::Input);
// Name of the python script to execute
declareProperty("ScriptName", "", requireValue, "A name for the python script that will be executed",
Direction::Input);
// The actual python code
declareProperty("PythonScript", "", requireValue, "The actual python code to execute", Direction::Input);
// Assuming the submission succeeded, this property will be set with a value
// we can use to track the job
declareProperty("JobID", std::string(""), "An ID string for this job", Direction::Output);
}
void SubmitRemoteJob::exec() {
// Put the algorithm execution code here...
// The first thing to do will almost certainly be to retrieve the input
// workspace.
// Here's the line for that - just uncomment it:
// MatrixWorkspace_sptr inputWorkspace = getProperty("InputWorkspace");
std::shared_ptr<RemoteJobManager> jobManager =
ConfigService::Instance().getFacility().getRemoteJobManager(getPropertyValue("ComputeResource"));
// jobManager is a std::shared_ptr...
if (!jobManager) {
// Requested compute resource doesn't exist
// TODO: should we create our own exception class for this??
throw(std::runtime_error(
std::string("Unable to create a compute resource named " + getPropertyValue("ComputeResource"))));
}
RemoteJobManager::PostDataMap postData;
postData["TransID"] = getPropertyValue("TransactionID");
postData["NumNodes"] = getPropertyValue("NumNodes");
postData["CoresPerNode"] = getPropertyValue("CoresPerNode");
postData["ScriptName"] = getPropertyValue("ScriptName");
postData[getPropertyValue("ScriptName")] = getPropertyValue("PythonScript");
// Job name is optional
std::string jobName = getPropertyValue("TaskName");
if (jobName.length() > 0) {
postData["JobName"] = jobName;
}
std::istream &respStream = jobManager->httpPost("/submit", postData);
JSONObject resp;
initFromStream(resp, respStream);
if (jobManager->lastStatus() == Poco::Net::HTTPResponse::HTTP_CREATED) {
std::string jobId;
resp["JobID"].getValue(jobId);
setPropertyValue("JobID", jobId);
g_log.information() << "Job submitted. Job ID = " << getPropertyValue("JobID") << '\n';
} else {
std::string errMsg;
resp["Err_Msg"].getValue(errMsg);
throw(std::runtime_error(errMsg));
}
}
} // end namespace RemoteAlgorithms
} // end namespace Mantid