Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion opal/mca/pmix/pmix120/pmix/src/server/pmix_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
{
int debug_level;
char *tdir, *evar;
char * pmix_pid;
pid_t pid;

/* initialize the output system */
Expand Down Expand Up @@ -219,7 +220,14 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
/* now set the address - we use the pid here to reduce collisions */
memset(&myaddress, 0, sizeof(struct sockaddr_un));
myaddress.sun_family = AF_UNIX;
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/pmix-%d", tdir, pid);
asprintf(&pmix_pid, "pmix-%d", pid);
// If the above set temporary directory name plus the pmix-PID string
// plus the '/' separator are too long, just fail, so the caller
// may provide the user with a proper help... *Cough*, *Cough* OSX...
if ((strlen(tdir) + strlen(pmix_pid) + 1) > sizeof(myaddress.sun_path)-1) {
return PMIX_ERR_INVALID_LENGTH;
}
snprintf(myaddress.sun_path, sizeof(myaddress.sun_path)-1, "%s/%s", tdir, pmix_pid);
asprintf(&myuri, "%s:%lu:%s", pmix_globals.myid.nspace, (unsigned long)pmix_globals.myid.rank, myaddress.sun_path);


Expand Down
13 changes: 7 additions & 6 deletions opal/mca/pmix/pmix120/pmix/src/server/pmix_server_listener.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,20 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)
{
int flags;
pmix_status_t rc;
unsigned int addrlen;
socklen_t addrlen;
char *ptr;

/* create a listen socket for incoming connection attempts */
pmix_server_globals.listen_socket = socket(PF_UNIX, SOCK_STREAM, 0);
if (pmix_server_globals.listen_socket < 0) {
printf("%s:%d socket() failed", __FILE__, __LINE__);
printf("%s:%d socket() failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}

addrlen = sizeof(struct sockaddr_un);
if (bind(pmix_server_globals.listen_socket, (struct sockaddr*)address, addrlen) < 0) {
printf("%s:%d bind() failed", __FILE__, __LINE__);
printf("%s:%d bind() failed error:%s\n", __FILE__, __LINE__,
strerror(errno));
return PMIX_ERROR;
}
/* set the mode as required */
Expand All @@ -95,18 +96,18 @@ pmix_status_t pmix_start_listening(struct sockaddr_un *address)

/* setup listen backlog to maximum allowed by kernel */
if (listen(pmix_server_globals.listen_socket, SOMAXCONN) < 0) {
printf("%s:%d listen() failed", __FILE__, __LINE__);
printf("%s:%d listen() failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}

/* set socket up to be non-blocking, otherwise accept could block */
if ((flags = fcntl(pmix_server_globals.listen_socket, F_GETFL, 0)) < 0) {
printf("%s:%d fcntl(F_GETFL) failed", __FILE__, __LINE__);
printf("%s:%d fcntl(F_GETFL) failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}
flags |= O_NONBLOCK;
if (fcntl(pmix_server_globals.listen_socket, F_SETFL, flags) < 0) {
printf("%s:%d fcntl(F_SETFL) failed", __FILE__, __LINE__);
printf("%s:%d fcntl(F_SETFL) failed\n", __FILE__, __LINE__);
return PMIX_ERROR;
}

Expand Down
2 changes: 1 addition & 1 deletion orte/mca/ess/base/ess_base_std_orted.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ int orte_ess_base_orted_setup(char **hosts)
/* setup the PMIx server */
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
ORTE_ERROR_LOG(ret);
error = "pmix server init";
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
goto error;
}

Expand Down
2 changes: 1 addition & 1 deletion orte/mca/ess/hnp/ess_hnp_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ static int rte_init(void)
/* setup the PMIx server */
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
ORTE_ERROR_LOG(ret);
error = "pmix server init";
error = "Try a shorter TMPDIR var. or change your computer's name (see uname -n), since pmix_server_init";
goto error;
}

Expand Down
3 changes: 3 additions & 0 deletions orte/orted/pmix/pmix_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@ int pmix_server_init(void)
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server, &info))) {
ORTE_ERROR_LOG(rc);
/* memory cleanup will occur when finalize is called */
orte_show_help("help-orterun.txt", "orterun:pmix-failed", true,
orte_process_info.proc_session_dir);
return rc;
}
OPAL_LIST_DESTRUCT(&info);

Expand Down
9 changes: 9 additions & 0 deletions orte/tools/orterun/help-orterun.txt
Original file line number Diff line number Diff line change
Expand Up @@ -660,3 +660,12 @@ method and try launching your job again.

Your job will now abort.
#
[orterun:pmix-failed]
The call to pmix_init_server() failed. This may be due to your
system's restriction for Unix's socket's path-length.

orte_proc_session_dir: %s

Please try to set TMPDIR to something short (like /tmp) or change
Your computer's name (see uname -n).
#