@@ -460,6 +460,7 @@ void orte_state_base_report_progress(int fd, short argc, void *cbdata)
460460}
461461
462462static void _send_notification (int status ,
463+ orte_proc_state_t state ,
463464 orte_process_name_t * proc ,
464465 orte_process_name_t * target )
465466{
@@ -485,19 +486,43 @@ static void _send_notification(int status,
485486 return ;
486487 }
487488
488- /* the source is me */
489- if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , ORTE_PROC_MY_NAME , 1 , ORTE_NAME ))) {
489+ /* the source is the proc */
490+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , proc , 1 , ORTE_NAME ))) {
490491 ORTE_ERROR_LOG (rc );
491492 OBJ_RELEASE (buf );
492493 return ;
493494 }
494495
495- /* we are going to pass three opal_value_t's */
496- rc = 3 ;
497- if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
498- ORTE_ERROR_LOG (rc );
499- OBJ_RELEASE (buf );
500- return ;
496+ if (OPAL_ERR_PROC_ABORTED == status ) {
497+ /* we will pass four opal_value_t's */
498+ rc = 4 ;
499+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
500+ ORTE_ERROR_LOG (rc );
501+ OBJ_RELEASE (buf );
502+ return ;
503+ }
504+ /* pass along the affected proc(s) */
505+ OBJ_CONSTRUCT (& kv , opal_value_t );
506+ kv .key = strdup (OPAL_PMIX_EVENT_AFFECTED_PROC );
507+ kv .type = OPAL_NAME ;
508+ kv .data .name .jobid = proc -> jobid ;
509+ kv .data .name .vpid = proc -> vpid ;
510+ kvptr = & kv ;
511+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & kvptr , 1 , OPAL_VALUE ))) {
512+ ORTE_ERROR_LOG (rc );
513+ OBJ_DESTRUCT (& kv );
514+ OBJ_RELEASE (buf );
515+ return ;
516+ }
517+ OBJ_DESTRUCT (& kv );
518+ } else {
519+ /* we are going to pass three opal_value_t's */
520+ rc = 3 ;
521+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buf , & rc , 1 , OPAL_INT ))) {
522+ ORTE_ERROR_LOG (rc );
523+ OBJ_RELEASE (buf );
524+ return ;
525+ }
501526 }
502527
503528 /* pass along the affected proc(s) */
@@ -699,19 +724,19 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
699724 /* notify everyone who asked for it */
700725 target .jobid = jdata -> jobid ;
701726 target .vpid = ORTE_VPID_WILDCARD ;
702- _send_notification (OPAL_ERR_JOB_TERMINATED , & target , ORTE_NAME_WILDCARD );
727+ _send_notification (OPAL_ERR_JOB_TERMINATED , pdata -> state , & target , ORTE_NAME_WILDCARD );
703728 } else {
704729 target .jobid = jdata -> jobid ;
705730 target .vpid = ORTE_VPID_WILDCARD ;
706- _send_notification (OPAL_ERR_JOB_TERMINATED , & target , & parent );
731+ _send_notification (OPAL_ERR_JOB_TERMINATED , pdata -> state , & target , & parent );
707732 }
708733 }
709734 } else if (ORTE_PROC_STATE_TERMINATED < pdata -> state &&
710735 !orte_job_term_ordered ) {
711736 /* if this was an abnormal term, notify the other procs of the termination */
712737 parent .jobid = jdata -> jobid ;
713738 parent .vpid = ORTE_VPID_WILDCARD ;
714- _send_notification (OPAL_ERR_PROC_ABORTED , & pdata -> name , & parent );
739+ _send_notification (OPAL_ERR_PROC_ABORTED , pdata -> state , & pdata -> name , & parent );
715740 }
716741 }
717742
0 commit comments