quattor · jrha · Sep 4, 2014 · Sep 3, 2014 · Sep 3, 2014 · Sep 3, 2014
diff --git a/src/main/scripts/ncm-cdispd b/src/main/scripts/ncm-cdispd
@@ -268,8 +268,10 @@ use LC::Exception qw ( throw_error);
 use EDG::WP4::CCM::CacheManager;
 use CDISPD::Application;
 use CDISPD::Utils qw(COMP_CONFIG_PATH compare_profiles add_component clean_ICList);
+use CAF::FileEditor;
 
 use constant CONFIG_ROOT => "/";
+use constant NCD_EXECUTABLE => "/usr/sbin/ncm-ncd";
 
 our $this_app;
 our %SIG;
@@ -563,7 +565,7 @@ sub launch_ncd {
 
     my $result = 0;    # Assume success
 
-    my @cmd = ( '/usr/sbin/ncm-ncd', '--configure' );
+    my $p = CAF::Process->new([NCD_EXECUTABLE, '--configure'] , log => $this_app );
     if (   defined( $this_app->{ICLIST} ) && scalar(@{$this_app->{ICLIST}}) ) {
         # At this point, ICLIST should contain only components present in the last profile received.
         # The only case where a component may be in the list without being part of the configuration 
@@ -574,69 +576,104 @@ sub launch_ncd {
         #     X failed with profile N+1) and thus X removal is not detected.
         # As a result, X remains on the list of component to run. This should be harmless as ncm-ncd will ignore it.
         # This is probably rare enough to avoid complex processing to handle this in ncm-dispd.
-        push @cmd, @{ $this_app->{ICLIST} };
+        $p->pushargs(@{$this_app->{ICLIST}});
     } else {
         $this_app->info("no components to be run by NCM - ncm-ncd won't be called");
         return (0);
     }
 
     # ncd options
     if ( $this_app->option('state') ) {
-        push @cmd, "--state", $this_app->option('state');
+        $p->pushargs("--state", $this_app->option('state'));
     }
     if ( $this_app->option('ncd-retries') ) {
-        push @cmd, "--retries", $this_app->option('ncd-retries');
+        $p->pushargs("--retries", $this_app->option('ncd-retries'));
     }
     if ( $this_app->option('ncd-timeout') ) {
-        push @cmd, "--timeout", $this_app->option('ncd-timeout');
+        $p->pushargs("--timeout", $this_app->option('ncd-timeout'));
     }
     if ( $this_app->option('ncd-useprofile') ) {
-        push @cmd, "--useprofile", $this_app->option('ncd-useprofile');
+        $p->pushargs("--useprofile", $this_app->option('ncd-useprofile'));
     }
 
-    my $cmd_line = join( " ", @cmd );
     if ( $this_app->option('noaction') ) {
-        $this_app->info( "would run (noaction mode): " . $cmd_line );
+        $this_app->info( "would run (noaction mode): $p");
     } else {
-        $this_app->info( "about to run: " . $cmd_line );
-        my $verb = $cmd[0];
-        if ( -x $verb ) {
+        $this_app->info( "about to run: $p");
+        if ( $p->is_executable() ) {
             # Delay processing of some signals
             delay_signals();
 
             # Execute ncm-ncd and report exit status
-            my $p = CAF::Process->new( \@cmd, log => $this_app );
             sub act {
                 my ($logger, $message) = @_;
-                $logger->debug(1, $message);
+                $logger->verbose($message);
             }
             my $errormsg = $p->stream_output(\&act, mode => 'line', arguments => [$this_app]);
 
             my $ec = $?;
-            $this_app->debug(3, "ncm-ncd finished with full message $errormsg");
-            my $msg = "ncm-ncd finished with status: exitcode ". ($ec >> 8) . " (ec $ec)";
+            my $msg = "ncm-ncd finished with status: ". ($ec >> 8) . " (ec $ec";
+            my $log_level = 'info';
             if ( $ec ) {
-                $msg .= " (some configuration modules failed to run successfully)";
+                log_failed_components();
+                $log_level = 'warn';
+                $msg .= ", some configuration modules failed to run successfully)";
                 $result = 1;
             } else {
-                $msg .= " (all configuration modules ran successfully)";
+                $msg .= ", all configuration modules ran successfully)";
             }
-            $this_app->info($msg);
+            $this_app->$log_level($msg);
 
             # Process delayed signal if any and reestablish
             # immediate processing of signals
             process_signal();
             immediate_signals();
 
         } else {
-            $this_app->error("Command $verb not found");
+            $this_app->error("Command ". ${$p->get_command()}[0] . " not found or not executable");
             $result = 1;
         }
     }
     return $result;
 }
 
 
+# log_failed_components()
+#
+# Scan the ncm-ncd component state directory and for each failed component
+# (component with an entry in the directory), print a line with the name of the
+# component and the failure reason.
+#
+# Arguments: none
+#
+# Return value: none
+#
+sub log_failed_components {
+
+    unless ( $this_app->option('state') ) {
+        $this_app->debug(1,"No component state file defined: cannot list failed components");
+        return;
+    }
+
+    my $comp_state_dir = $this_app->option('state');
+    if ( opendir(my $dh, $comp_state_dir) ) {
+        my @comps = grep { -f "$comp_state_dir/$_" } readdir($dh);
+        $this_app->warn("No failed component found in the component state directory ($comp_state_dir)") if ( @comps == 0 );
+        foreach my $component (sort(@comps)) {
+            my $fh = CAF::FileEditor->new("$comp_state_dir/$component");
+            my $comp_msg = "$fh";
+            chomp $comp_msg;
+            $this_app->warn("Component $component failed with message: $comp_msg");
+            $fh->close();
+        }
+        closedir $dh;
+    } else {
+        $this_app->error("Failed to open component state directory ($comp_state_dir)");
+    }
+
+}
+
+
 #------------------------------------------------------------
 # main loop
 #------------------------------------------------------------