Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ML-related charts #12574

Merged
merged 5 commits into from Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion database/rrd.h
Expand Up @@ -478,8 +478,8 @@ typedef enum rrdset_flags {
// least rrdset_free_obsolete_time seconds ago.
RRDSET_FLAG_ARCHIVED = 1 << 15,
RRDSET_FLAG_ACLK = 1 << 16,

RRDSET_FLAG_PENDING_FOREACH_ALARMS = 1 << 17, // contains dims with uninitialized foreach alarms
RRDSET_FLAG_ANOMALY_DETECTION = 1 << 18 // flag to identify anomaly detection charts.
} RRDSET_FLAGS;

#ifdef HAVE_C___ATOMIC
Expand Down
2 changes: 2 additions & 0 deletions ml/Config.cc
Expand Up @@ -133,4 +133,6 @@ void Config::readMLConfig(void) {
Cfg.ChartsToSkip = "anomaly_detection.* ";
Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*");
Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);

Cfg.StreamADCharts = config_get_boolean(ConfigSectionML, "stream anomaly detection charts", false);
}
2 changes: 2 additions & 0 deletions ml/Config.h
Expand Up @@ -33,6 +33,8 @@ class Config {
double ADWindowRateThreshold;
double ADDimensionRateThreshold;

bool StreamADCharts;

std::string HostsToSkip;
SIMPLE_PATTERN *SP_HostsToSkip;

Expand Down
127 changes: 68 additions & 59 deletions ml/Host.cc
Expand Up @@ -9,17 +9,6 @@

using namespace ml;

static std::pair<std::string, std::string>
getHostSpecificIdAndTitle(RRDHOST *RH, const std::string &IdPrefix,
const std::string &TitlePrefix) {
std::stringstream IdSS, TitleSS;

IdSS << IdPrefix << "_" << RH->machine_guid;
TitleSS << TitlePrefix << " " << RH->hostname;

return {IdSS.str(), TitleSS.str()};
}

static void updateDimensionsChart(RRDHOST *RH,
collected_number NumTrainedDimensions,
collected_number NumNormalDimensions,
Expand All @@ -31,24 +20,28 @@ static void updateDimensionsChart(RRDHOST *RH,
static thread_local RRDDIM *NumAnomalousDimensionsRD = nullptr;

if (!RS) {
std::string IdPrefix = "dimensions";
std::string TitlePrefix = "Anomaly detection dimensions for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

RS = rrdset_create_localhost(
IdSS << "dimensions_on_" << localhost->machine_guid;
NameSS << "dimensions_on_" << localhost->hostname;
TitleSS << "Anomaly detection dimensions for host " << RH->hostname;

RS = rrdset_create(
RH,
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"dimensions", // family
"anomaly_detection.dimensions", // ctx
IdTitlePair.second.c_str(), // title
TitleSS.str().c_str(), // title
"dimensions", // units
"netdata", // plugin
"ml", // module
39183, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_LINE // chart_type
);
rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION);

NumTotalDimensionsRD = rrddim_add(RS, "total", NULL,
1, 1, RRD_ALGORITHM_ABSOLUTE);
Expand All @@ -74,24 +67,28 @@ static void updateRateChart(RRDHOST *RH, collected_number AnomalyRate) {
static thread_local RRDDIM *AnomalyRateRD = nullptr;

if (!RS) {
std::string IdPrefix = "anomaly_rate";
std::string TitlePrefix = "Percentage of anomalous dimensions for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

RS = rrdset_create_localhost(
IdSS << "anomaly_rate_on_" << localhost->machine_guid;
NameSS << "anomaly_rate_on_" << localhost->hostname;
TitleSS << "Percentage of anomalous dimensions for host " << RH->hostname;

RS = rrdset_create(
RH,
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"anomaly_rate", // family
"anomaly_detection.anomaly_rate", // ctx
IdTitlePair.second.c_str(), // title
TitleSS.str().c_str(), // title
"percentage", // units
"netdata", // plugin
"ml", // module
39184, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_LINE // chart_type
);
rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION);

AnomalyRateRD = rrddim_add(RS, "anomaly_rate", NULL,
1, 100, RRD_ALGORITHM_ABSOLUTE);
Expand All @@ -108,24 +105,28 @@ static void updateWindowLengthChart(RRDHOST *RH, collected_number WindowLength)
static thread_local RRDDIM *WindowLengthRD = nullptr;

if (!RS) {
std::string IdPrefix = "detector_window";
std::string TitlePrefix = "Anomaly detector window length for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

RS = rrdset_create_localhost(
IdSS << "detector_window_on_" << localhost->machine_guid;
NameSS << "detector_window_on_" << localhost->hostname;
TitleSS << "Anomaly detector window length for host " << RH->hostname;

RS = rrdset_create(
RH,
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"detector_window", // family
"anomaly_detection.detector_window", // ctx
IdTitlePair.second.c_str(), // title
TitleSS.str().c_str(), // title
"seconds", // units
"netdata", // plugin
"ml", // module
39185, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_LINE // chart_type
);
rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION);

WindowLengthRD = rrddim_add(RS, "duration", NULL,
1, 1, RRD_ALGORITHM_ABSOLUTE);
Expand All @@ -146,24 +147,28 @@ static void updateEventsChart(RRDHOST *RH,
static thread_local RRDDIM *NewAnomalyEventRD = nullptr;

if (!RS) {
std::string IdPrefix = "detector_events";
std::string TitlePrefix = "Anomaly events triggered for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

RS = rrdset_create_localhost(
IdSS << "detector_events_on_" << localhost->machine_guid;
NameSS << "detector_events_on_" << localhost->hostname;
TitleSS << "Anomaly events triggered for host " << RH->hostname;

RS = rrdset_create(
RH,
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"detector_events", // family
"anomaly_detection.detector_events", // ctx
IdTitlePair.second.c_str(), // title
TitleSS.str().c_str(), // title
"boolean", // units
"netdata", // plugin
"ml", // module
39186, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_LINE // chart_type
);
rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION);

AboveThresholdRD = rrddim_add(RS, "above_threshold", NULL,
1, 1, RRD_ALGORITHM_ABSOLUTE);
Expand All @@ -189,21 +194,23 @@ static void updateDetectionChart(RRDHOST *RH) {
static thread_local RRDDIM *UserRD, *SystemRD = nullptr;

if (!RS) {
std::string IdPrefix = "prediction_stats";
std::string TitlePrefix = "Prediction thread CPU usage for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

IdSS << "prediction_stats_" << RH->machine_guid;
NameSS << "prediction_stats_for_" << RH->hostname;
TitleSS << "Prediction thread CPU usage for host " << RH->hostname;

RS = rrdset_create_localhost(
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
"prediction_stats", // family
"anomaly_detection.prediction_stats", // ctx
IdTitlePair.second.c_str(), // title
"netdata", // type
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"ml", // family
"prediction_stats", // ctx
TitleSS.str().c_str(), // title
"milliseconds/s", // units
"netdata", // plugin
"ml", // module
39187, // priority
136000, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_STACKED // chart_type
);
Expand All @@ -228,21 +235,23 @@ static void updateTrainingChart(RRDHOST *RH, struct rusage *TRU)
static thread_local RRDDIM *SystemRD = nullptr;

if (!RS) {
std::string IdPrefix = "training_stats";
std::string TitlePrefix = "Training thread CPU usage for host";
auto IdTitlePair = getHostSpecificIdAndTitle(RH, IdPrefix, TitlePrefix);
std::stringstream IdSS, NameSS, TitleSS;

IdSS << "training_stats_" << RH->machine_guid;
NameSS << "training_stats_for_" << RH->hostname;
TitleSS << "Training thread CPU usage for host " << RH->hostname;

RS = rrdset_create_localhost(
"anomaly_detection", // type
IdTitlePair.first.c_str(), // id
NULL, // name
"training_stats", // family
"anomaly_detection.training_stats", // ctx
IdTitlePair.second.c_str(), // title
"netdata", // type
IdSS.str().c_str(), // id
NameSS.str().c_str(), // name
"ml", // family
"training_stats", // ctx
TitleSS.str().c_str(), // title
"milliseconds/s", // units
"netdata", // plugin
"ml", // module
39188, // priority
136001, // priority
RH->rrd_update_every, // update_every
RRDSET_TYPE_STACKED // chart_type
);
Expand Down
4 changes: 4 additions & 0 deletions ml/ml-dummy.c
Expand Up @@ -58,4 +58,8 @@ void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *name) {
(void) name;
}

bool ml_streaming_enabled() {
return false;
}

#endif
4 changes: 4 additions & 0 deletions ml/ml.cc
Expand Up @@ -214,6 +214,10 @@ void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *Name) {
D->setAnomalyRateRDName(Name);
}

bool ml_streaming_enabled() {
return Cfg.StreamADCharts;
}

#if defined(ENABLE_ML_TESTS)

#include "gtest/gtest.h"
Expand Down
2 changes: 2 additions & 0 deletions ml/ml.h
Expand Up @@ -44,6 +44,8 @@ void ml_process_rrdr(RRDR *R, int MaxAnomalyRates);

void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *name);

bool ml_streaming_enabled();

#define ML_ANOMALY_RATES_CHART_ID "anomaly_detection.anomaly_rates"

#if defined(ENABLE_ML_TESTS)
Expand Down
3 changes: 3 additions & 0 deletions streaming/rrdpush.c
Expand Up @@ -133,6 +133,9 @@ static inline int should_send_chart_matching(RRDSET *st) {
if (unlikely(st->state->is_ar_chart))
return false;

if (rrdset_flag_check(st, RRDSET_FLAG_ANOMALY_DETECTION))
return ml_streaming_enabled();

if(unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ENABLED))) {
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND);
rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE);
Expand Down