Skip to content

Commit

Permalink
added option to run task based on dependency exit code
Browse files Browse the repository at this point in the history
  • Loading branch information
justanhduc committed Feb 4, 2021
1 parent 1488f71 commit 1916f1c
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 26 deletions.
58 changes: 33 additions & 25 deletions client.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <signal.h>
#include <time.h>

Expand Down Expand Up @@ -136,33 +134,43 @@ int c_wait_server_commands() {
if (m.type == RUNJOB) {
struct Result result;
result.skipped = 0;
if (command_line.gpus) {
int numFree;
int *freeList = getFreeGpuList(&numFree);
if (command_line.gpus > numFree) {
result.errorlevel = -1;
result.user_ms = 0.;
result.system_ms = 0.;
result.real_ms = 0.;
result.skipped = 1;
c_send_runjob_ok(0, -1);
} else {
char tmp[50];
strcpy(tmp, "CUDA_VISIBLE_DEVICES=");
shuffle(freeList, numFree);
for (int i = 0; i < command_line.gpus; i++) {
char tmp2[5];
sprintf(tmp2, "%d", freeList[i]);
strcat(tmp, tmp2);
if (i < command_line.gpus - 1)
strcat(tmp, ",");
if (command_line.do_depend && command_line.require_elevel && m.u.last_errorlevel != 0) {
result.errorlevel = -1;
result.user_ms = 0.;
result.system_ms = 0.;
result.real_ms = 0.;
result.skipped = 1;
c_send_runjob_ok(0, -1);
} else {
if (command_line.gpus) {
int numFree;
int *freeList = getFreeGpuList(&numFree);
if ((command_line.gpus > numFree)) {
result.errorlevel = -1;
result.user_ms = 0.;
result.system_ms = 0.;
result.real_ms = 0.;
result.skipped = 1;
c_send_runjob_ok(0, -1);
} else {
char tmp[50];
strcpy(tmp, "CUDA_VISIBLE_DEVICES=");
shuffle(freeList, numFree);
for (int i = 0; i < command_line.gpus; i++) {
char tmp2[5];
sprintf(tmp2, "%d", freeList[i]);
strcat(tmp, tmp2);
if (i < command_line.gpus - 1)
strcat(tmp, ",");
}
putenv(tmp);
}
putenv(tmp);
free(freeList);
}
free(freeList);

run_job(&result);
}

run_job(&result);
c_end_of_job(&result);
return result.errorlevel;
} else if (m.type == REMINDER) {
Expand Down
7 changes: 7 additions & 0 deletions jobs.c
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,13 @@ void s_send_runjob(int s, int jobid) {
error("Job %i was expected to run", jobid);

m.type = RUNJOB;

/* TODO
* We should make the dependencies update the jobids they're do_depending on.
* Then, on finish, these could set the errorlevel to send to its dependency childs.
* We cannot consider that the jobs will leave traces in the finished job list (-nf?) . */

m.u.last_errorlevel = p->dependency_errorlevel;
send_msg(s, &m);
}

Expand Down
9 changes: 8 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ static void default_command_line() {
command_line.wait_enqueuing = 1;
command_line.stderr_apart = 0;
command_line.num_slots = 1;
command_line.require_elevel = 0;
command_line.gpus = 0;
}

Expand Down Expand Up @@ -93,7 +94,7 @@ void parse_opts(int argc, char **argv) {

/* Parse options */
while (1) {
c = getopt_long(argc, argv, ":RTVhKgClnfmBEr:a:F:t:c:o:p:w:k:u:s:U:qi:N:L:dS:D:G:",
c = getopt_long(argc, argv, ":RTVhKgClnfmBEr:a:F:t:c:o:p:w:k:u:s:U:qi:N:L:dS:D:G:W:",
longOptions, &optionIdx);

if (c == -1)
Expand Down Expand Up @@ -225,6 +226,11 @@ void parse_opts(int argc, char **argv) {
command_line.do_depend = 1;
command_line.depend_on = atoi(optarg);
break;
case 'W':
command_line.do_depend = 1;
command_line.depend_on = atoi(optarg);
command_line.require_elevel = 1;
break;
case 'U':
command_line.request = c_SWAP_JOBS;
res = get_two_jobs(optarg, &command_line.jobid,
Expand Down Expand Up @@ -426,6 +432,7 @@ static void print_help(const char *cmd) {
printf(" -m send the output by e-mail (uses sendmail).\n");
printf(" -d the job will be run after the last job ends.\n");
printf(" -D <id> the job will be run after the job of given id ends.\n");
printf(" -W <id> the job will be run after the job of given id ends well (exit code 0).\n");
printf(" -L <lab> name this task with a label, to be distinguished on listing.\n");
printf(" -N <num> number of slots required by the job (1 default).\n");
}
Expand Down
1 change: 1 addition & 0 deletions main.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ struct CommandLine {
} command;
char *label;
int num_slots; /* Slots for the job to use. Default 1 */
int require_elevel; /* whether requires error level of dependencies or not */
int gpus;
int gpu_wait_time;
};
Expand Down

0 comments on commit 1916f1c

Please sign in to comment.