Skip to content

Commit

Permalink
Allow distance matrix script to run across nodes and simplify SLURM i…
Browse files Browse the repository at this point in the history
…nterface
  • Loading branch information
srbdev committed Jan 13, 2016
1 parent 689bb60 commit d8501a3
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 30 deletions.
8 changes: 5 additions & 3 deletions agent/slycat-agent-create-image-distance-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def convert_gray(image):
parser.add_argument("input", help="Input CSV file containing image paths")
parser.add_argument("output", help="Output CSV file")
parser.add_argument("--distance-measure", default="jaccard", help="Distance metric to be used. Options: jaccard, jaccard2 (for very light-colored images), one-norm, correlation, cosine, or hamming. For most image data, correlation or jaccard will yield best results.")
parser.add_argument("--profile", default=None, help="Name of the IPython profile to use")
arguments = parser.parse_args()

###########################################################################################
Expand Down Expand Up @@ -210,9 +211,10 @@ def convert_gray(image):
# for use by the parallel code.

try:
workers = IPython.parallel.Client()[:]
except:
raise Exception("A running IPython paralle cluster is required.")
workers = IPython.parallel.Client(profile=arguments.profile)[:]
except Exception, e:
print str(e)
raise Exception("A running IPython parallel cluster is required.")

workers.use_dill()
with workers.sync_imports():
Expand Down
6 changes: 3 additions & 3 deletions packages/slycat/web/server/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,16 +1420,16 @@ def run_agent_function():
nnodes = cherrypy.request.json["nnodes"]
partition = cherrypy.request.json["partition"]
ntasks_per_node = cherrypy.request.json["ntasks_per_node"]
ntasks = cherrypy.request.json["ntasks"]
ncpu_per_task = cherrypy.request.json["ncpu_per_task"]
# ntasks = cherrypy.request.json["ntasks"]
# ncpu_per_task = cherrypy.request.json["ncpu_per_task"]
time_hours = cherrypy.request.json["time_hours"]
time_minutes = cherrypy.request.json["time_minutes"]
time_seconds = cherrypy.request.json["time_seconds"]
fn = cherrypy.request.json["fn"]
fn_params = cherrypy.request.json["fn_params"]
uid = cherrypy.request.json["uid"]
with slycat.web.server.remote.get_session(sid) as session:
return session.run_agent_function(wckey, nnodes, partition, ntasks_per_node, ntasks, ncpu_per_task, time_hours, time_minutes, time_seconds, fn, fn_params, uid)
return session.run_agent_function(wckey, nnodes, partition, ntasks_per_node, time_hours, time_minutes, time_seconds, fn, fn_params, uid)

@cherrypy.tools.json_in(on = True)
@cherrypy.tools.json_out(on = True)
Expand Down
26 changes: 21 additions & 5 deletions packages/slycat/web/server/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def get_job_output(self, jid, path):
slycat.email.send_error("slycat.web.server.remote.py get_job_output", "cherrypy.HTTPError 500 no Slycat agent present on remote host.")
raise cherrypy.HTTPError(500)

def run_agent_function(self, wckey, nnodes, partition, ntasks_per_node, ntasks, ncpu_per_task, time_hours, time_minutes, time_seconds, fn, fn_params, uid):
def run_agent_function(self, wckey, nnodes, partition, ntasks_per_node, time_hours, time_minutes, time_seconds, fn, fn_params, uid):
"""Submits a command to the slycat-agent to run a predefined function on a cluster running SLURM.
Parameters
Expand Down Expand Up @@ -327,10 +327,28 @@ def create_distance_matrix(fn_id, params):
else:
module_name = "slycat"

arr = ["source /etc/profile.d/modules.sh", "module load %s" % module_name, "ipcluster start -n %s &" % ncpu_per_task, "sleep 2m"]
arr = [
"source /etc/profile.d/modules.sh",
"module load %s" % module_name,

"profile=slurm_${SLURM_JOB_ID}_$(hostname)",
"echo \"Creating profile ${profile}\"",
"ipython profile create --parallel --profile=${profile}",

"echo \"Launching controller\"",
"ipcontroller --ip='*' --profile=${profile} &",
"sleep 1m",

"echo \"Launching engines\"",
"srun ipengine --profile=${profile} --location=$(hostname) &",
"sleep 1m",

"echo \"Launching job\""
]

for c in params["image_columns_names"]:
arr.append("python $SLYCAT_HOME/agent/slycat-agent-create-image-distance-matrix.py --distance-measure %s --distance-column %s %s ~/slycat_%s_%s_%s_distance_matrix.csv" % (f, c, params["input"], c, uid, f))
# arr.append("python $SLYCAT_HOME/agent/slycat-agent-create-image-distance-matrix.py --distance-measure %s --distance-column %s %s ~/slycat_%s_%s_%s_distance_matrix.csv --profile ${profile}" % (f, c, params["input"], c, uid, f))
arr.append("python slycat-agent-create-image-distance-matrix.py --distance-measure %s --distance-column %s %s ~/slycat_%s_%s_%s_distance_matrix.csv --profile ${profile}" % (f, c, params["input"], c, uid, f))

return arr

Expand All @@ -353,8 +371,6 @@ def agent_functions(fn_id, params):
"nnodes": nnodes,
"partition": partition,
"ntasks_per_node": ntasks_per_node,
"ntasks": ntasks,
"ncpu_per_task": ncpu_per_task,
"time_hours": time_hours,
"time_minutes": time_minutes,
"time_seconds": time_seconds,
Expand Down
26 changes: 13 additions & 13 deletions web-server/js/slycat-remote-interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ define('slycat-remote-interface', ['knockout', 'knockout-mapping', 'slycat-serve
vm.batch = ko.observable('');

vm.wckey = ko.observable('');
vm.nnodes = ko.observable(1);
vm.nnodes = ko.observable(4);
vm.partition = ko.observable('');
vm.ntasks_per_node = ko.observable(1);
vm.ntasks = ko.observable(1);
vm.ncpu_per_task = ko.observable(4);
// vm.ntasks = ko.observable(1);
// vm.ncpu_per_task = ko.observable(4);
vm.time_hours = ko.observable();
vm.time_minutes = ko.observable(5);
vm.time_seconds = ko.observable();
Expand Down Expand Up @@ -105,15 +105,15 @@ define('slycat-remote-interface', ['knockout', 'knockout-mapping', 'slycat-serve
invalid = true;
}

if (vm.ntasks() === undefined || parseInt(vm.ntasks(), 10) < 1) {
out += '\n' + 'Invalid input for the number of task(s): ' + vm.ntasks() + '.';
invalid = true;
}
// if (vm.ntasks() === undefined || parseInt(vm.ntasks(), 10) < 1) {
// out += '\n' + 'Invalid input for the number of task(s): ' + vm.ntasks() + '.';
// invalid = true;
// }

if (vm.ncpu_per_task() === undefined || parseInt(vm.ncpu_per_task(), 10) < 1) {
out += '\n' + 'Invalid input for the number of CPU(s) per task: ' + vm.ncpu_per_task() + '.';
invalid = true;
}
// if (vm.ncpu_per_task() === undefined || parseInt(vm.ncpu_per_task(), 10) < 1) {
// out += '\n' + 'Invalid input for the number of CPU(s) per task: ' + vm.ncpu_per_task() + '.';
// invalid = true;
// }


var hr = vm.time_hours() === undefined ? 0 : parseInt(vm.time_hours(), 10);
Expand Down Expand Up @@ -258,8 +258,8 @@ define('slycat-remote-interface', ['knockout', 'knockout-mapping', 'slycat-serve
nnodes: vm.nnodes(),
partition: vm.partition(),
ntasks_per_node: vm.ntasks_per_node(),
ntasks: vm.ntasks(),
ncpu_per_task: vm.ncpu_per_task(),
// ntasks: vm.ntasks(),
// ncpu_per_task: vm.ncpu_per_task(),
time_hours: vm.time_hours() === undefined ? 0 : vm.time_hours(),
time_minutes: vm.time_minutes() === undefined ? 0 : vm.time_minutes(),
time_seconds: vm.time_seconds() === undefined ? 0 : vm.time_seconds(),
Expand Down
18 changes: 12 additions & 6 deletions web-server/templates/slycat-remote-interface.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,21 @@
</div>
</div>
<div class="form-group">
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Number of nodes</label>
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Partition</label>
<div class="col-sm-4">
<input class="form-control slycat-remote-interface-prebuilt-field" type="number" data-bind="value: nnodes, attr: {'disabled' : disabled}">
<input class="form-control slycat-remote-interface-prebuilt-field" type="text" data-bind="value: partition, attr: {'disabled' : disabled}">
</div>
</div>
<div class="form-group">
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Partition</label>
<label class="col-sm-4 control-label"></label>
<div class="col-sm-8">
Note: increasing the number of tasks per node will increase the memory usage for the node.
</div>
</div>
<div class="form-group">
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Number of nodes</label>
<div class="col-sm-4">
<input class="form-control slycat-remote-interface-prebuilt-field" type="text" data-bind="value: partition, attr: {'disabled' : disabled}">
<input class="form-control slycat-remote-interface-prebuilt-field" type="number" data-bind="value: nnodes, attr: {'disabled' : disabled}">
</div>
</div>
<div class="form-group">
Expand All @@ -46,7 +52,7 @@
<input class="form-control slycat-remote-interface-prebuilt-field" type="number" data-bind="value: ntasks_per_node, attr: {'disabled' : disabled}">
</div>
</div>
<div class="form-group">
<!-- <div class="form-group">
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Number of tasks</label>
<div class="col-sm-4">
<input class="form-control slycat-remote-interface-prebuilt-field" type="number" data-bind="value: ntasks, attr: {'disabled' : disabled}">
Expand All @@ -57,7 +63,7 @@
<div class="col-sm-4">
<input class="form-control slycat-remote-interface-prebuilt-field" type="number" data-bind="value: ncpu_per_task, attr: {'disabled' : disabled}">
</div>
</div>
</div> -->
<div class="form-group">
<label for="slycat-remote-interface-command" class="col-sm-4 control-label">Time</label>
<div class="col-sm-2">
Expand Down

0 comments on commit d8501a3

Please sign in to comment.