Skip to content

Commit

Permalink
API: support metrics for prometheus.(#2899) (#3189)
Browse files Browse the repository at this point in the history
* API: support metrics for prometheus.

* Metrics: optimize metrics statistics info.

* Refine: remove redundant code.

* Refine: fix metrics srs_streams param.

* Metrics: add major param.

* Metrics: refine params and metric comments.

* For #2899: API: Support exporter for Prometheus. v5.0.67

Co-authored-by: winlin <winlin@vip.126.com>
  • Loading branch information
chundonglinlin and winlinvip committed Sep 27, 2022
1 parent e31f3b0 commit 981cab4
Show file tree
Hide file tree
Showing 17 changed files with 236 additions and 10 deletions.
15 changes: 15 additions & 0 deletions trunk/conf/full.conf
Expand Up @@ -657,6 +657,21 @@ tencentcloud_apm {
debug_logging off;
}

# Prometheus exporter config.
# See https://prometheus.io/docs/instrumenting/exporters
exporter {
# Whether exporter is enabled.
# Overwrite by env SRS_EXPORTER_ENABLED
# Default: off
enabled off;
# The logging label to category the cluster servers.
# Overwrite by env SRS_EXPORTER_LABEL
label cn-beijing;
# The logging tag to category the cluster servers.
# Overwrite by env SRS_EXPORTER_TAG
tag cn-edge;
}

#############################################################################################
# heartbeat/stats sections
#############################################################################################
Expand Down
1 change: 1 addition & 0 deletions trunk/doc/CHANGELOG.md
Expand Up @@ -7,6 +7,7 @@ The changelog for SRS.

## SRS 5.0 Changelog

* v5.0, 2022-09-27, For [#2899](https://github.com/ossrs/srs/issues/2899): API: Support exporter for Prometheus. v5.0.67
* v5.0, 2022-09-27, For [#3167](https://github.com/ossrs/srs/issues/3167): WebRTC: Refine sequence jitter algorithm. v5.0.66
* v5.0, 2022-09-22, Fix [#3164](https://github.com/ossrs/srs/issues/3164): SRT: Choppy when audio ts gap is too large. v5.0.65
* v5.0, 2022-09-16, APM: Support distributed tracing by Tencent Cloud APM. v5.0.64
Expand Down
58 changes: 58 additions & 0 deletions trunk/src/app/srs_app_config.cpp
Expand Up @@ -2227,6 +2227,7 @@ srs_error_t SrsConfig::check_normal_config()
&& n != "inotify_auto_reload" && n != "auto_reload_for_docker" && n != "tcmalloc_release_rate"
&& n != "query_latest_version" && n != "first_wait_for_qlv" && n != "threads"
&& n != "circuit_breaker" && n != "is_full" && n != "in_docker" && n != "tencentcloud_cls"
&& n != "exporter"
) {
return srs_error_new(ERROR_SYSTEM_CONFIG_INVALID, "illegal directive %s", n.c_str());
}
Expand Down Expand Up @@ -3563,6 +3564,63 @@ bool SrsConfig::get_tencentcloud_apm_debug_logging()
return SRS_CONF_PERFER_FALSE(conf->arg0());
}

bool SrsConfig::get_exporter_enabled()
{
SRS_OVERWRITE_BY_ENV_BOOL("SRS_EXPORTER_ENABLED");

static bool DEFAULT = false;

SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}

conf = conf->get("enabled");
if (!conf) {
return DEFAULT;
}

return SRS_CONF_PERFER_FALSE(conf->arg0());
}

string SrsConfig::get_exporter_label()
{
SRS_OVERWRITE_BY_ENV_STRING("SRS_EXPORTER_LABEL");

static string DEFAULT = "";

SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}

conf = conf->get("label");
if (!conf) {
return DEFAULT;
}

return conf->arg0();
}

string SrsConfig::get_exporter_tag()
{
SRS_OVERWRITE_BY_ENV_STRING("SRS_EXPORTER_TAG");

static string DEFAULT = "";

SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}

conf = conf->get("tag");
if (!conf) {
return DEFAULT;
}

return conf->arg0();
}

vector<SrsConfDirective*> SrsConfig::get_stream_casters()
{
srs_assert(root);
Expand Down
5 changes: 5 additions & 0 deletions trunk/src/app/srs_app_config.hpp
Expand Up @@ -1086,6 +1086,11 @@ class SrsConfig
// The device name configed in args of directive.
// @return the disk device name to stat. NULL if not configed.
virtual SrsConfDirective* get_stats_disk_device();
public:
// Get Prometheus exporter config.
virtual bool get_exporter_enabled();
virtual std::string get_exporter_label();
virtual std::string get_exporter_tag();
};

#endif
Expand Down
93 changes: 93 additions & 0 deletions trunk/src/app/srs_app_http_api.cpp
Expand Up @@ -1062,3 +1062,96 @@ srs_error_t SrsGoApiTcmalloc::serve_http(ISrsHttpResponseWriter* w, ISrsHttpMess
}
#endif


SrsGoApiMetrics::SrsGoApiMetrics()
{
enabled_ = _srs_config->get_exporter_enabled();
label_ = _srs_config->get_exporter_label();
tag_ = _srs_config->get_exporter_tag();
}

SrsGoApiMetrics::~SrsGoApiMetrics()
{
}

srs_error_t SrsGoApiMetrics::serve_http(ISrsHttpResponseWriter* w, ISrsHttpMessage* r)
{
// whether enabled the HTTP Metrics API.
if (!enabled_) {
return srs_api_response_code(w, r, ERROR_EXPORTER_DISABLED);
}

/*
* build_info gauge
* send_bytes_total counter
* receive_bytes_total counter
* streams gauge
* clients gauge
* clients_total counter
* error counter
*/

SrsStatistic* stat = SrsStatistic::instance();

std::stringstream ss;

// Build info from Config.
ss << "# HELP srs_build_info A metric with a constant '1' value labeled by build_date, version from which SRS was built.\n"
<< "# TYPE srs_build_info gauge\n"
<< "srs_build_info{"
<< "build_date=\"" << SRS_BUILD_DATE << "\","
<< "major=\"" << VERSION_MAJOR << "\","
<< "version=\"" << RTMP_SIG_SRS_VERSION << "\","
<< "code=\"" << RTMP_SIG_SRS_CODE<< "\"";
if (!label_.empty()) ss << ",label=\"" << label_ << "\"";
if (!tag_.empty()) ss << ",tag=\"" << tag_ << "\"";
ss << "} 1\n";

// Dump metrics by statistic.
int64_t send_bytes, recv_bytes, nstreams, nclients, total_nclients, nerrs;
stat->dumps_metrics(send_bytes, recv_bytes, nstreams, nclients, total_nclients, nerrs);

// The total of bytes sent.
ss << "# HELP srs_send_bytes_total SRS total sent bytes.\n"
<< "# TYPE srs_send_bytes_total counter\n"
<< "srs_send_bytes_total "
<< send_bytes
<< "\n";

// The total of bytes received.
ss << "# HELP srs_receive_bytes_total SRS total received bytes.\n"
<< "# TYPE srs_receive_bytes_total counter\n"
<< "srs_receive_bytes_total "
<< recv_bytes
<< "\n";

// Current number of online streams.
ss << "# HELP srs_streams The number of SRS concurrent streams.\n"
<< "# TYPE srs_streams gauge\n"
<< "srs_streams "
<< nstreams
<< "\n";

// Current number of online clients.
ss << "# HELP srs_clients The number of SRS concurrent clients.\n"
<< "# TYPE srs_clients gauge\n"
<< "srs_clients "
<< nclients
<< "\n";

// The total of clients connections.
ss << "# HELP srs_clients_total The total counts of SRS clients.\n"
<< "# TYPE srs_clients_total counter\n"
<< "srs_clients_total "
<< total_nclients
<< "\n";

// The total of clients errors.
ss << "# HELP srs_clients_errs_total The total errors of SRS clients.\n"
<< "# TYPE srs_clients_errs_total counter\n"
<< "srs_clients_errs_total "
<< nerrs
<< "\n";

return srs_api_response(w, r, ss.str());
}
13 changes: 13 additions & 0 deletions trunk/src/app/srs_app_http_api.hpp
Expand Up @@ -216,5 +216,18 @@ class SrsGoApiTcmalloc : public ISrsHttpHandler
};
#endif

class SrsGoApiMetrics : public ISrsHttpHandler
{
private:
bool enabled_;
std::string label_;
std::string tag_;
public:
SrsGoApiMetrics();
virtual ~SrsGoApiMetrics();
public:
virtual srs_error_t serve_http(ISrsHttpResponseWriter* w, ISrsHttpMessage* r);
};

#endif

2 changes: 1 addition & 1 deletion trunk/src/app/srs_app_http_conn.cpp
Expand Up @@ -406,7 +406,7 @@ srs_error_t SrsHttpxConn::on_conn_done(srs_error_t r0)
{
// Only stat the HTTP streaming clients, ignore all API clients.
if (enable_stat_) {
SrsStatistic::instance()->on_disconnect(get_id().c_str());
SrsStatistic::instance()->on_disconnect(get_id().c_str(), r0);
SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), conn->delta());
}

Expand Down
3 changes: 2 additions & 1 deletion trunk/src/app/srs_app_http_static.cpp
Expand Up @@ -331,7 +331,8 @@ srs_error_t SrsHlsStream::on_timer(srs_utime_t interval)
http_hooks_on_stop(info->req);

SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(ctx);
// TODO: FIXME: Should finger out the err.
stat->on_disconnect(ctx, srs_success);

map_ctx_info_.erase(it);
srs_freep(info);
Expand Down
6 changes: 4 additions & 2 deletions trunk/src/app/srs_app_rtc_conn.cpp
Expand Up @@ -458,7 +458,8 @@ SrsRtcPlayStream::~SrsRtcPlayStream()

// update the statistic when client coveried.
SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(cid_.c_str());
// TODO: FIXME: Should finger out the err.
stat->on_disconnect(cid_.c_str(), srs_success);
}

srs_error_t SrsRtcPlayStream::initialize(SrsRequest* req, std::map<uint32_t, SrsRtcTrackDescription*> sub_relations)
Expand Down Expand Up @@ -1108,7 +1109,8 @@ SrsRtcPublishStream::~SrsRtcPublishStream()

// update the statistic when client coveried.
SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(cid_.c_str());
// TODO: FIXME: Should finger out the err.
stat->on_disconnect(cid_.c_str(), srs_success);
}

srs_error_t SrsRtcPublishStream::initialize(SrsRequest* r, SrsRtcSourceDescription* stream_desc)
Expand Down
2 changes: 1 addition & 1 deletion trunk/src/app/srs_app_rtc_network.cpp
Expand Up @@ -749,7 +749,7 @@ srs_error_t SrsRtcTcpConn::cycle()
srs_error_t err = do_cycle();

// Only stat the HTTP streaming clients, ignore all API clients.
SrsStatistic::instance()->on_disconnect(get_id().c_str());
SrsStatistic::instance()->on_disconnect(get_id().c_str(), err);
SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), delta_);

// Because we use manager to manage this object, not the http connection object, so we must remove it here.
Expand Down
2 changes: 1 addition & 1 deletion trunk/src/app/srs_app_rtmp_conn.cpp
Expand Up @@ -1551,7 +1551,7 @@ srs_error_t SrsRtmpConn::cycle()
// Update statistic when done.
SrsStatistic* stat = SrsStatistic::instance();
stat->kbps_add_delta(get_id().c_str(), delta_);
stat->on_disconnect(get_id().c_str());
stat->on_disconnect(get_id().c_str(), err);

// Notify manager to remove it.
// Note that we create this object, so we use manager to remove it.
Expand Down
4 changes: 4 additions & 0 deletions trunk/src/app/srs_app_server.cpp
Expand Up @@ -887,6 +887,10 @@ srs_error_t SrsServer::http_handle()
return srs_error_wrap(err, "handle tests errors");
}
#endif
// metrics by prometheus
if ((err = http_api_mux->handle("/metrics", new SrsGoApiMetrics())) != srs_success) {
return srs_error_wrap(err, "handle tests errors");
}

// TODO: FIXME: for console.
// TODO: FIXME: support reload.
Expand Down
2 changes: 1 addition & 1 deletion trunk/src/app/srs_app_srt_conn.cpp
Expand Up @@ -228,7 +228,7 @@ srs_error_t SrsMpegtsSrtConn::cycle()
// Update statistic when done.
SrsStatistic* stat = SrsStatistic::instance();
stat->kbps_add_delta(get_id().c_str(), delta_);
stat->on_disconnect(get_id().c_str());
stat->on_disconnect(get_id().c_str(), err);

// Notify manager to remove it.
// Note that we create this object, so we use manager to remove it.
Expand Down
27 changes: 26 additions & 1 deletion trunk/src/app/srs_app_statistic.cpp
Expand Up @@ -241,6 +241,9 @@ SrsStatistic* SrsStatistic::_instance = NULL;
SrsStatistic::SrsStatistic()
{
kbps = new SrsKbps();

nb_clients_ = 0;
nb_errs_ = 0;
}

SrsStatistic::~SrsStatistic()
Expand Down Expand Up @@ -421,11 +424,13 @@ srs_error_t SrsStatistic::on_client(std::string id, SrsRequest* req, ISrsExpire*
// @see https://github.com/ossrs/srs/issues/2311
srs_freep(client->req);
client->req = req->copy();

nb_clients_++;

return err;
}

void SrsStatistic::on_disconnect(std::string id)
void SrsStatistic::on_disconnect(std::string id, srs_error_t err)
{
std::map<std::string, SrsStatisticClient*>::iterator it = clients.find(id);
if (it == clients.end()) return;
Expand All @@ -440,6 +445,10 @@ void SrsStatistic::on_disconnect(std::string id)
stream->nb_clients--;
vhost->nb_clients--;

if (srs_error_code(err) != ERROR_SUCCESS) {
nb_errs_++;
}

cleanup_stream(stream);
}

Expand Down Expand Up @@ -721,3 +730,19 @@ SrsStatisticStream* SrsStatistic::create_stream(SrsStatisticVhost* vhost, SrsReq
return stream;
}

srs_error_t SrsStatistic::dumps_metrics(int64_t& send_bytes, int64_t& recv_bytes, int64_t& nstreams, int64_t& nclients, int64_t& total_nclients, int64_t& nerrs)
{
srs_error_t err = srs_success;

send_bytes = kbps->get_send_bytes();
recv_bytes = kbps->get_recv_bytes();

nstreams = streams.size();
nclients = clients.size();

total_nclients = nb_clients_;
nerrs = nb_errs_;

return err;
}

10 changes: 9 additions & 1 deletion trunk/src/app/srs_app_statistic.hpp
Expand Up @@ -139,6 +139,11 @@ class SrsStatistic
std::map<std::string, SrsStatisticClient*> clients;
// The server total kbps.
SrsKbps* kbps;
private:
// The total of clients connections.
int64_t nb_clients_;
// The total of clients errors.
int64_t nb_errs_;
private:
SrsStatistic();
virtual ~SrsStatistic();
Expand Down Expand Up @@ -177,7 +182,7 @@ class SrsStatistic
// @remark the on_disconnect always call, while the on_client is call when
// only got the request object, so the client specified by id maybe not
// exists in stat.
virtual void on_disconnect(std::string id);
virtual void on_disconnect(std::string id, srs_error_t err);
private:
// Cleanup the stream if stream is not active and for the last client.
void cleanup_stream(SrsStatisticStream* stream);
Expand Down Expand Up @@ -210,6 +215,9 @@ class SrsStatistic
private:
virtual SrsStatisticVhost* create_vhost(SrsRequest* req);
virtual SrsStatisticStream* create_stream(SrsStatisticVhost* vhost, SrsRequest* req);
public:
// Dumps exporter metrics.
virtual srs_error_t dumps_metrics(int64_t& send_bytes, int64_t& recv_bytes, int64_t& nstreams, int64_t& nclients, int64_t& total_nclients, int64_t& nerrs);
};

// Generate a random string id, with constant prefix.
Expand Down

0 comments on commit 981cab4

Please sign in to comment.