Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fix getManagerId() messages from sbatchd. Untabify the mail modules.

  • Loading branch information...
commit 2aec3353d8ff03c34c56a77d5537d5467fc68d3f 1 parent b33aec1
@openlava authored
Showing with 204 additions and 203 deletions.
  1. +42 −42 lsbatch/daemons/mail.c
  2. +158 −161 lsbatch/daemons/sbd.misc.c
  3. +4 −0 spec/openlava.spec
View
84 lsbatch/daemons/mail.c
@@ -33,7 +33,7 @@
# include <stdarg.h>
-#define NL_SETN 10
+#define NL_SETN 10
#ifdef NO_MAIL
void lsb_mperr (char *msg) {}
@@ -52,7 +52,7 @@ lsb_mperr (char *msg)
char err[MAXLINELEN];
if (lsb_CheckMode)
- return;
+ return;
p=strchr(msg,'\n');
if(p != NULL )
@@ -75,7 +75,7 @@ lsb_merr (char *s)
char *myhostnm;
if (lsb_CheckMode)
- return;
+ return;
if ((myhostnm = ls_getmyhostname()) == NULL) {
ls_syslog(LOG_ERR, I18N_FUNC_FAIL_MM, fname, "ls_getmyhostname");
@@ -87,13 +87,13 @@ lsb_merr (char *s)
if (lsbManager == NULL || (getpwlsfuser_(lsbManager)) == NULL) {
if (lsbManager == NULL)
ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8601,
- "%s: LSF administrator name is NULL"), /* catgets 8601 */
- fname);
+ "%s: LSF administrator name is NULL"), /* catgets 8601 */
+ fname);
else
ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8602,
- "%s: Bad LSF administrator name <%s>"), /* catgets 8602 */
- fname,
- lsbManager);
+ "%s: Bad LSF administrator name <%s>"), /* catgets 8602 */
+ fname,
+ lsbManager);
if (masterme)
die (MASTER_FATAL);
else
@@ -103,12 +103,12 @@ lsb_merr (char *s)
if (masterme)
fprintf(mail, _i18n_msg_get(ls_catd, NL_SETN, 3201,
- "Subject: mbatchd on %s: %s\n"), /* catgets 3201 */
- myhostnm, s);
+ "Subject: mbatchd on %s: %s\n"), /* catgets 3201 */
+ myhostnm, s);
else
fprintf(mail, _i18n_msg_get(ls_catd, NL_SETN, 3202,
- "Subject: sbatchd on %s: %s\n"), /* catgets 3202 */
- myhostnm, s);
+ "Subject: sbatchd on %s: %s\n"), /* catgets 3202 */
+ myhostnm, s);
mclose(mail);
}
@@ -122,18 +122,18 @@ merr_user (char *user, char *host, char *msg, char *type)
if ((myhostnm = ls_getmyhostname()) == NULL) {
ls_syslog(LOG_ERR, I18N_FUNC_FAIL_MM, fname, "ls_getmyhostname");
- die(MASTER_FATAL);
+ die(MASTER_FATAL);
}
mail = smail(user, host);
fprintf(mail, _i18n_msg_get(ls_catd, NL_SETN, 3203,
- "Subject: job %s report from %s\n"), /* catgets 3203 */
- type,
- myhostnm);
+ "Subject: job %s report from %s\n"), /* catgets 3203 */
+ type,
+ myhostnm);
fprintf(mail, _i18n_msg_get(ls_catd, NL_SETN, 3204,
- "\n\nDear %s,\n\n%s\n\n"), /* catgets 3204 */
- user,
- msg);
+ "\n\nDear %s,\n\n%s\n\n"), /* catgets 3204 */
+ user,
+ msg);
mclose(mail);
}
@@ -150,24 +150,24 @@ addr_process (char *adbuf, char *user, char *tohost, char *spec)
bp = adbuf;
for (sp = spec ; *sp ; sp++)
{
- if ((*sp == '^') || (*sp == '!'))
- {
- switch (*++sp)
- {
- case 'U':
- for (up = user ; *up ; )
- *bp++ = *up++;
- continue;
- case 'H':
- for (up = tohost ; *up ; )
- *bp++ = *up++;
- continue;
- default:
- sp -= 1;
-
- }
- }
- *bp++ = *sp;
+ if ((*sp == '^') || (*sp == '!'))
+ {
+ switch (*++sp)
+ {
+ case 'U':
+ for (up = user ; *up ; )
+ *bp++ = *up++;
+ continue;
+ case 'H':
+ for (up = tohost ; *up ; )
+ *bp++ = *up++;
+ continue;
+ default:
+ sp -= 1;
+
+ }
+ }
+ *bp++ = *sp;
}
*bp = 0;
}
@@ -186,7 +186,7 @@ smail (char *to, char *tohost)
uid_t userid;
if (lsb_CheckMode)
- return stderr;
+ return stderr;
if ((to == NULL) || (tohost == NULL)) {
ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 8604,
@@ -202,14 +202,14 @@ smail (char *to, char *tohost)
if (debug > 2)
- return(stderr);
+ return(stderr);
if(pipe(maild) < 0) {
ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "pipe", osUserName);
return stderr;
}
addr_process(toaddr, osUserName, tohost,
- daemonParams[LSB_MAILTO].paramValue);
+ daemonParams[LSB_MAILTO].paramValue);
if (logclass & (LC_TRACE | LC_EXEC))
ls_syslog(LOG_DEBUG1, "%s: user=%s host=%s toaddr=%s spec=%s", fname,
osUserName, tohost, toaddr, daemonParams[LSB_MAILTO].paramValue);
@@ -222,7 +222,7 @@ smail (char *to, char *tohost)
}
close(maild[1]);
- sendmailp = daemonParams[LSB_MAILPROG].paramValue;
+ sendmailp = daemonParams[LSB_MAILPROG].paramValue;
userid = geteuid();
chuser(getuid());
@@ -258,7 +258,7 @@ smail (char *to, char *tohost)
if (ferror(fmail)) {
fclose(fmail);
ls_syslog(LOG_ERR, I18N_FUNC_S_S_FAIL_M, fname, "fprintf", "header",
- osUserName);
+ osUserName);
return stderr;
}
return fmail;
View
319 lsbatch/daemons/sbd.misc.c
@@ -21,19 +21,19 @@
#include "sbd.h"
#include "../../lsf/lib/lsi18n.h"
-#define NL_SETN 11
+#define NL_SETN 11
extern short mbdExitVal;
extern int mbdExitCnt;
-#define NL_SETN 11
+#define NL_SETN 11
void
milliSleep( int msec )
{
struct timeval dtime;
-
+
if (msec < 1)
- return;
+ return;
dtime.tv_sec = msec/1000;
dtime.tv_usec = (msec - dtime.tv_sec * 1000) * 1000;
@@ -43,7 +43,7 @@ milliSleep( int msec )
-char
+char
window_ok (struct jobCard *jobPtr)
{
windows_t *wp;
@@ -53,63 +53,61 @@ window_ok (struct jobCard *jobPtr)
time_t now;
now = time(0);
- active = jobPtr->active;
+ active = jobPtr->active;
if (active && (jobPtr->jobSpecs.options & SUB_WINDOW_SIG))
- ckTime = now + WARN_TIME;
+ ckTime = now + WARN_TIME;
else
ckTime = now;
if (jobPtr->windEdge > ckTime || jobPtr->windEdge == 0)
return (jobPtr->active);
- getDayHour (&dayhour, ckTime);
- if (jobPtr->week[dayhour.day] == NULL) {
+ getDayHour (&dayhour, ckTime);
+ if (jobPtr->week[dayhour.day] == NULL) {
jobPtr->active = TRUE;
jobPtr->windEdge = now + (24.0 - dayhour.hour) * 3600.0;
return (jobPtr->active);
}
-
+
jobPtr->active = FALSE;
jobPtr->windEdge = now + (24.0 - dayhour.hour) * 3600.0;
- for (wp = jobPtr->week[dayhour.day]; wp; wp=wp->nextwind)
+ for (wp = jobPtr->week[dayhour.day]; wp; wp=wp->nextwind)
checkWindow(&dayhour, &jobPtr->active, &jobPtr->windEdge, wp, now);
if (active && !jobPtr->active && now - jobPtr->windWarnTime >= WARN_TIME
&& (jobPtr->jobSpecs.options & SUB_WINDOW_SIG)) {
-
- if (!(jobPtr->jobSpecs.jStatus & JOB_STAT_RUN))
- job_resume(jobPtr);
- jobsig (jobPtr, sig_decode (jobPtr->jobSpecs.sigValue), TRUE);
- jobPtr->windWarnTime = now;
+
+ if (!(jobPtr->jobSpecs.jStatus & JOB_STAT_RUN))
+ job_resume(jobPtr);
+ jobsig (jobPtr, sig_decode (jobPtr->jobSpecs.sigValue), TRUE);
+ jobPtr->windWarnTime = now;
}
-
+
return (jobPtr->active);
-}
+}
void
shout_err (struct jobCard *jobPtr, char *msg)
{
char buf[MSGSIZE];
- sprintf(buf, _i18n_msg_get(ls_catd, NL_SETN, 600,
- "We are unable to run your job %s:<%s>. The error is:\n%s."), /* catgets 600 */
- lsb_jobid2str(jobPtr->jobSpecs.jobId),
- jobPtr->jobSpecs.command, msg);
-
-
- if (jobPtr->jobSpecs.options & SUB_MAIL_USER)
- merr_user (jobPtr->jobSpecs.mailUser, jobPtr->jobSpecs.fromHost,
- buf, I18N_error);
- else
- merr_user (jobPtr->jobSpecs.userName, jobPtr->jobSpecs.fromHost,
- buf, I18N_error);
-
-}
-
+ sprintf(buf, \
+"We are unable to run your job %s:<%s>. The error is:\n%s.",
+ lsb_jobid2str(jobPtr->jobSpecs.jobId),
+ jobPtr->jobSpecs.command, msg);
+
+ if (jobPtr->jobSpecs.options & SUB_MAIL_USER) {
+ merr_user(jobPtr->jobSpecs.mailUser, jobPtr->jobSpecs.fromHost,
+ buf, I18N_error);
+ } else {
+ merr_user(jobPtr->jobSpecs.userName, jobPtr->jobSpecs.fromHost,
+ buf, I18N_error);
+ }
+}
-void
-child_handler (int sig)
+void
+child_handler(int sig)
{
int pid;
LS_WAIT_T status;
@@ -124,17 +122,17 @@ child_handler (int sig)
now = time(0);
while ((pid=wait3(&status, WNOHANG, &rusage)) > 0) {
if (pid == mbdPid) {
- int sig = WTERMSIG(status);
+ int sig = WTERMSIG(status);
if (mbdExitCnt > 150)
- mbdExitCnt = 150;
- mbdExitVal = WIFSIGNALED(status);
- if (mbdExitVal) {
- ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5600,
- "mbatchd died with signal <%d> termination"), /* catgets 5600 */
- sig);
+ mbdExitCnt = 150;
+ mbdExitVal = WIFSIGNALED(status);
+ if (mbdExitVal) {
+ ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5600,
+ "mbatchd died with signal <%d> termination"), /* catgets 5600 */
+ sig);
if (WCOREDUMP(status))
- ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5601,
- "mbatchd core dumped")); /* catgets 5601 */
+ ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5601,
+ "mbatchd core dumped")); /* catgets 5601 */
mbdExitVal = sig;
if (mbdExitVal == lastMbdExitVal)
mbdExitCnt++;
@@ -142,86 +140,86 @@ child_handler (int sig)
mbdExitCnt = 0;
lastMbdExitVal = mbdExitVal;
}
- continue;
- } else {
- mbdExitVal = WEXITSTATUS(status);
+ continue;
+ } else {
+ mbdExitVal = WEXITSTATUS(status);
- if (mbdExitVal == lastMbdExitVal)
+ if (mbdExitVal == lastMbdExitVal)
mbdExitCnt++;
else {
mbdExitCnt = 0;
lastMbdExitVal = mbdExitVal;
}
- if (mbdExitVal == MASTER_RECONFIG) {
- ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5602,
- "mbatchd resigned for reconfiguration")); /* catgets 5602 */
- start_master();
- } else
- ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5603,
- "mbatchd exited with value <%d>"), /* catgets 5603 */
- mbdExitVal);
- continue;
- }
+ if (mbdExitVal == MASTER_RECONFIG) {
+ ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5602,
+ "mbatchd resigned for reconfiguration")); /* catgets 5602 */
+ start_master();
+ } else
+ ls_syslog(LOG_NOTICE, _i18n_msg_get(ls_catd , NL_SETN, 5603,
+ "mbatchd exited with value <%d>"), /* catgets 5603 */
+ mbdExitVal);
+ continue;
+ }
}
- ls_ruunix2lsf (&rusage, &lsfRusage);
- cpuTime = lsfRusage.ru_utime + lsfRusage.ru_stime;
-
- for (jobCard = jobQueHead->forw; (jobCard != jobQueHead);
- jobCard = jobCard->forw) {
-
- if (jobCard->exitPid == pid) {
- jobCard->w_status = LS_STATUS(status);
- jobCard->exitPid = -1;
- if (logclass & LC_EXEC) {
- ls_syslog(LOG_DEBUG, I18N(5604,
- "child_handler: Job <%s> exitPid <%d> status <%d> exitcode <%d>"),/*catgets 5604*/
- lsb_jobid2str(jobCard->jobSpecs.jobId),
- pid, jobCard->w_status,
- WEXITSTATUS(status));
- }
- }
-
- if (jobCard->jobSpecs.jobPid == pid) {
- jobCard->collectedChild = TRUE;
- jobCard->cpuTime = cpuTime;
- jobCard->w_status = LS_STATUS(status);
- jobCard->exitPid = -1;
- memcpy ((char *) &jobCard->lsfRusage, (char *) &lsfRusage,
- sizeof (struct lsfRusage));
+ ls_ruunix2lsf (&rusage, &lsfRusage);
+ cpuTime = lsfRusage.ru_utime + lsfRusage.ru_stime;
+
+ for (jobCard = jobQueHead->forw; (jobCard != jobQueHead);
+ jobCard = jobCard->forw) {
+
+ if (jobCard->exitPid == pid) {
+ jobCard->w_status = LS_STATUS(status);
+ jobCard->exitPid = -1;
+ if (logclass & LC_EXEC) {
+ ls_syslog(LOG_DEBUG, I18N(5604,
+ "child_handler: Job <%s> exitPid <%d> status <%d> exitcode <%d>"),/*catgets 5604*/
+ lsb_jobid2str(jobCard->jobSpecs.jobId),
+ pid, jobCard->w_status,
+ WEXITSTATUS(status));
+ }
+ }
+
+ if (jobCard->jobSpecs.jobPid == pid) {
+ jobCard->collectedChild = TRUE;
+ jobCard->cpuTime = cpuTime;
+ jobCard->w_status = LS_STATUS(status);
+ jobCard->exitPid = -1;
+ memcpy ((char *) &jobCard->lsfRusage, (char *) &lsfRusage,
+ sizeof (struct lsfRusage));
jobCard->notReported++;
-
-
-
- if (sbd_finish_sleep < 0) {
- if (daemonParams[LSB_SBD_FINISH_SLEEP].paramValue) {
- errno = 0;
- sbd_finish_sleep = atoi(daemonParams[LSB_SBD_FINISH_SLEEP].paramValue);
- if (errno)
- sbd_finish_sleep = 1000;
- } else {
- sbd_finish_sleep=1000;
- }
- }
- if (sbd_finish_sleep > 0) {
- millisleep_(sbd_finish_sleep);
- }
-
- if (logclass & LC_EXEC) {
- ls_syslog(LOG_DEBUG, I18N(5605,
- "child_handler: Job <%s> Pid <%d> status <%d> exitcode <%d>"), /*catgets 5605*/
- lsb_jobid2str(jobCard->jobSpecs.jobId), pid,
- jobCard->w_status, WEXITSTATUS(status));
- }
- need_checkfinish = TRUE;
-
- break;
- }
- }
- }
-
-
-}
+
+
+
+ if (sbd_finish_sleep < 0) {
+ if (daemonParams[LSB_SBD_FINISH_SLEEP].paramValue) {
+ errno = 0;
+ sbd_finish_sleep = atoi(daemonParams[LSB_SBD_FINISH_SLEEP].paramValue);
+ if (errno)
+ sbd_finish_sleep = 1000;
+ } else {
+ sbd_finish_sleep=1000;
+ }
+ }
+ if (sbd_finish_sleep > 0) {
+ millisleep_(sbd_finish_sleep);
+ }
+
+ if (logclass & LC_EXEC) {
+ ls_syslog(LOG_DEBUG, I18N(5605,
+ "child_handler: Job <%s> Pid <%d> status <%d> exitcode <%d>"), /*catgets 5605*/
+ lsb_jobid2str(jobCard->jobSpecs.jobId), pid,
+ jobCard->w_status, WEXITSTATUS(status));
+ }
+ need_checkfinish = TRUE;
+
+ break;
+ }
+ }
+ }
+
+
+}
#ifndef BSIZE
#define BSIZE 1024
@@ -238,41 +236,41 @@ fcp(char *file1, char *file2, struct hostent *hp)
fd1 = myopen_(file1, O_RDONLY, 0, hp);
if (fd1 < 0)
- return -1;
+ return -1;
if (fstat(fd1, &sbuf) < 0) {
- ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "fstat", file1);
- close(fd1);
- return -1;
+ ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "fstat", file1);
+ close(fd1);
+ return -1;
}
fd2 = myopen_(file2, O_CREAT | O_TRUNC | O_WRONLY, (int) sbuf.st_mode, hp);
if (fd2 < 0) {
- ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "creat", file1);
- close(fd1);
- return -1;
+ ls_syslog(LOG_ERR, I18N_FUNC_S_FAIL_M, fname, "creat", file1);
+ close(fd1);
+ return -1;
}
for (;;) {
- cc = read(fd1, buf, BSIZE);
- if (cc == 0)
- break;
- if (cc < 0) {
- close(fd1);
- close(fd2);
- return -1;
- }
- if (write(fd2, buf, cc) != cc) {
- close(fd1);
- close(fd2);
- return -1;
- }
+ cc = read(fd1, buf, BSIZE);
+ if (cc == 0)
+ break;
+ if (cc < 0) {
+ close(fd1);
+ close(fd2);
+ return -1;
+ }
+ if (write(fd2, buf, cc) != cc) {
+ close(fd1);
+ close(fd2);
+ return -1;
+ }
}
close(fd1);
close(fd2);
return (0);
-}
+}
#include <sys/dir.h>
@@ -284,57 +282,56 @@ rmDir(char *dir)
char path[MAXPATHLEN];
if ((dirp = opendir(dir)) == NULL)
- return -1;
+ return -1;
readdir(dirp); readdir(dirp);
for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
- sprintf (path, "%s/%s", dir, dp->d_name);
- rmdir (path);
+ sprintf (path, "%s/%s", dir, dp->d_name);
+ rmdir (path);
unlink (path);
}
closedir (dirp);
- return (rmdir(dir));
-}
+ return rmdir(dir);
+}
-void closeBatchSocket (void)
+void closeBatchSocket (void)
{
- if (batchSock > 0) {
+ if (batchSock > 0) {
chanClose_(batchSock);
batchSock = -1;
- }
-}
+ }
+}
void
getManagerId(struct sbdPackage *sbdPackage)
{
- static char fname[]="getManagerId";
- struct passwd *pw;
+ struct passwd *pw;
int i;
FREEUP(lsbManager);
if (sbdPackage->nAdmins <= 0) {
- ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5609,
- "%s: No LSF administrator defined in sbdPackage"),
- fname); /* catgets 5609 */
- die(FATAL_ERR);
+ ls_syslog(LOG_ERR, "\
+%s: No LSF administrator defined in sbdPackage from MBD.", __func__);
+ die(FATAL_ERR);
}
for (i = 0; i < sbdPackage->nAdmins; i++) {
- if ((pw = getpwlsfuser_(sbdPackage->admins[i])) != NULL) {
- lsbManager = safeSave(sbdPackage->admins[i]);
- managerId = pw->pw_uid;
- break;
- }
+ if ((pw = getpwlsfuser_(sbdPackage->admins[i])) != NULL) {
+ lsbManager = safeSave(sbdPackage->admins[i]);
+ managerId = pw->pw_uid;
+ break;
+ }
}
if (lsbManager == NULL) {
- ls_syslog(LOG_ERR, _i18n_msg_get(ls_catd , NL_SETN, 5609,
- "%s: No LSF administrator defined in sbdPackage"),
- fname); /* catgets 5609 */
- die(FATAL_ERR);
+ ls_syslog(LOG_ERR, "\
+%s: getpwlsfuser_() failed for LSF administrator defined in sbdPackage.\
+ Non uniform userid space?",
+ __func__);
+ die(FATAL_ERR);
}
-}
+}
View
4 spec/openlava.spec
@@ -138,6 +138,8 @@ install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsmon $RPM_BUI
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsplace $RPM_BUILD_ROOT%{_openlavatop}/bin
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsrcp $RPM_BUILD_ROOT%{_openlavatop}/bin
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsrun $RPM_BUILD_ROOT%{_openlavatop}/bin
+install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsaddhost $RPM_BUILD_ROOT%{_openlavatop}/bin
+install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/lsf/lstools/lsrmhost $RPM_BUILD_ROOT%{_openlavatop}/bin
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/scripts/mpich2-mpiexec $RPM_BUILD_ROOT%{_openlavatop}/bin
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/scripts/mpich-mpirun $RPM_BUILD_ROOT%{_openlavatop}/bin
install -m 755 $RPM_BUILD_DIR/%{name}-%{version}/scripts/openmpi-mpirun $RPM_BUILD_ROOT%{_openlavatop}/bin
@@ -341,6 +343,8 @@ exit 0
%{_bindir}/lsplace
%{_bindir}/lsrcp
%{_bindir}/lsrun
+%{_bindir}/lsaddhost
+%{_bindir}/lsrmhost
# Man pages
%{_mandir}/man1/bbot.1
Please sign in to comment.
Something went wrong with that request. Please try again.