Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce new chart for process states metrics #12305

Merged
merged 11 commits into from
Mar 8, 2022
1 change: 1 addition & 0 deletions collectors/all.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#define NETDATA_CHART_PRIO_SYSTEM_IP 501
#define NETDATA_CHART_PRIO_SYSTEM_IPV6 502
#define NETDATA_CHART_PRIO_SYSTEM_PROCESSES 600
#define NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES 601
#define NETDATA_CHART_PRIO_SYSTEM_FORKS 700
#define NETDATA_CHART_PRIO_SYSTEM_ACTIVE_PROCESSES 750
#define NETDATA_CHART_PRIO_SYSTEM_CTXT 800
Expand Down
84 changes: 75 additions & 9 deletions collectors/apps.plugin/apps_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Released under GPL v3+
*/

#include "../all.h"
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
#include "libnetdata/libnetdata.h"
#include "libnetdata/required_dummies.h"

Expand Down Expand Up @@ -107,6 +108,25 @@ static int
static char *user_config_dir = CONFIG_DIR;
static char *stock_config_dir = LIBCONFIG_DIR;

// some variables for keeping track of processes count by states
typedef enum {
PROC_STATUS_RUNNING = 0,
PROC_STATUS_SLEEPING_D, // uninterruptible sleep
PROC_STATUS_SLEEPING, // interruptible sleep
PROC_STATUS_ZOMBIE,
PROC_STATUS_STOPPED,
PROC_STATUS_END, //place holder for ending enum fields
} proc_state;

static proc_state proc_state_count[PROC_STATUS_END];
static const char *proc_states[] = {
[PROC_STATUS_RUNNING] = "running",
[PROC_STATUS_SLEEPING] = "sleeping_interruptible",
[PROC_STATUS_SLEEPING_D] = "sleeping_uninterruptible",
[PROC_STATUS_ZOMBIE] = "zombie",
[PROC_STATUS_STOPPED] = "stopped",
};

// ----------------------------------------------------------------------------
// internal flags
// handled in code (automatically set)
Expand Down Expand Up @@ -286,7 +306,7 @@ struct pid_stat {

uint32_t log_thrown;

// char state;
char state;
int32_t ppid;
// int32_t pgrp;
// int32_t session;
Expand Down Expand Up @@ -1234,6 +1254,28 @@ void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *v
}
#endif // !__FreeBSD__

static void update_proc_state_count(char proc_state) {
switch (proc_state) {
case 'S':
proc_state_count[PROC_STATUS_SLEEPING] += 1;
break;
case 'R':
proc_state_count[PROC_STATUS_RUNNING] += 1;
break;
case 'D':
proc_state_count[PROC_STATUS_SLEEPING_D] += 1;
break;
case 'Z':
proc_state_count[PROC_STATUS_ZOMBIE] += 1;
break;
case 'T':
proc_state_count[PROC_STATUS_STOPPED] += 1;
break;
default:
break;
}
}

static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
p->status_vmsize = 0;
p->status_vmrss = 0;
Expand Down Expand Up @@ -1268,6 +1310,7 @@ static inline int read_proc_pid_status(struct pid_stat *p, void *ptr) {
arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr);
}


if(unlikely(!p->status_filename)) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid);
Expand Down Expand Up @@ -1313,7 +1356,6 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {

#ifdef __FreeBSD__
struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr;

if (unlikely(proc_info->ki_tdflags & TDF_IDLETD))
goto cleanup;
#else
Expand Down Expand Up @@ -1348,15 +1390,14 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
#else
// p->pid = str2pid_t(procfile_lineword(ff, 0, 0));
char *comm = procfile_lineword(ff, 0, 1);
// p->state = *(procfile_lineword(ff, 0, 2));
p->state = *(procfile_lineword(ff, 0, 2));
p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3));
// p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4));
// p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5));
// p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6));
// p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7));
// p->flags = str2uint64_t(procfile_lineword(ff, 0, 8));
#endif

if(strcmp(p->comm, comm) != 0) {
if(unlikely(debug_enabled)) {
if(p->comm[0])
Expand Down Expand Up @@ -1454,7 +1495,7 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
p->cstime = 0;
p->cgtime = 0;
}

update_proc_state_count(p->state);
return 1;

cleanup:
Expand Down Expand Up @@ -2534,6 +2575,8 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) {
static int collect_data_for_all_processes(void) {
struct pid_stat *p = NULL;

// clear process state counter
memset(proc_state_count, 0, sizeof proc_state_count);
#ifdef __FreeBSD__
int i, procnum;

Expand Down Expand Up @@ -2608,8 +2651,9 @@ static int collect_data_for_all_processes(void) {
// we forward read all running processes
// collect_data_for_pid() is smart enough,
// not to read the same pid twice per iteration
for(slc = 0; slc < all_pids_count; slc++)
for(slc = 0; slc < all_pids_count; slc++) {
collect_data_for_pid(all_pids_sortlist[slc], NULL);
}
}
#endif
}
Expand Down Expand Up @@ -2666,7 +2710,6 @@ static int collect_data_for_all_processes(void) {
// we do this by collecting the ownership of process
// if we manage to get the ownership, the process still runs
process_exited_processes();

return 1;
}

Expand Down Expand Up @@ -3640,7 +3683,7 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
debug_log_int("%s just added - regenerating charts.", w->name);
}
}

// nothing more to show
if(!newly_added && show_guest_time == show_guest_time_old) return;

Expand Down Expand Up @@ -3806,6 +3849,29 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
}
}

static void send_proc_states_count(usec_t dt)
{
static bool chart_added = false;
// create chart for count of processes in different states
if (!chart_added) {
fprintf(
stdout,
"CHART system.processes_state '' 'System Processes State' 'processes' processes system.processes_state line %d %d\n",
NETDATA_CHART_PRIO_SYSTEM_PROCESS_STATES,
update_every);
for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) {
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", proc_states[i]);
}
chart_added = true;
}

// send process state count
send_BEGIN("system", "processes_state", dt);
for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) {
send_SET(proc_states[i], proc_state_count[i]);
}
send_END();
}

// ----------------------------------------------------------------------------
// parse command line arguments
Expand Down Expand Up @@ -4181,10 +4247,10 @@ int main(int argc, char **argv) {
normalize_utilization(apps_groups_root_target);

send_resource_usage_to_netdata(dt);
send_proc_states_count(dt);

// this is smart enough to show only newly added apps, when needed
send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps");

if(likely(enable_users_charts))
send_charts_updates_to_netdata(users_root_target, "users", "Users");

Expand Down
13 changes: 13 additions & 0 deletions web/gui/dashboard_info.js
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,19 @@ netdataDashboard.context = {
'<b>Blocked</b> - currently blocked, waiting for I/O to complete.</p>'
},

'system.processes_state': {
info: '<p>The number of processes in different states. </p> '+
'<p><b>Running</b> - executing and using the CPU at a particular moment. '+
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
'<b>Sleeping(uninterruptible)</b> - will wake only as a result of a waited-upon resource becoming available or after a time-out occurs during that wait. '+
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
'Mostly used by device drivers waiting for disk or network I/O. '+
'<b>Sleeping(interruptible)</b> - waiting either for a particular time slot or for a particular event to occur. '+
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
'<b>Zombie</b> - have completed their execution, released the system resources, but their entries are not removed from the process table. '+
'Usually occur for child processes, as the parent process still needs to read its child’s exit status. '+
'Processes that stay zombies for a long time are generally an error and cause the system PID space leak. '+
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
'<b>Stopped</b> - suspended from proceeding further (STOP/TSTP signals). ' +
'In this state a process will not do anything until it receives a CONT signal, not even terminate.</p>'
ilyam8 marked this conversation as resolved.
Show resolved Hide resolved
},

'system.active_processes': {
info: 'The total number of processes in the system.'
},
Expand Down