Skip to content

Commit

Permalink
sflow: Export OVS datapath performance counters via sFlow.
Browse files Browse the repository at this point in the history
The OVS cache hit/miss counters and memory/CPU usage statistics have been
identified as important metrics when managing large deployments. This patch
allows them to be pushed periodically as part of the sFlow feed,  and
represents a more efficient and scalable alternative to polling via
ovs-dpctl(1).

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
  • Loading branch information
sflow authored and blp committed Jan 1, 2015
1 parent 849222d commit 3d2912f
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 3 deletions.
2 changes: 1 addition & 1 deletion NEWS
Expand Up @@ -12,7 +12,7 @@ Post-v2.3.0
http://tools.ietf.org/html/draft-gross-geneve-00
- The OVS database now reports controller rate limiting statistics.
- sflow now exports information about LACP-based bonds, port names, and
OpenFlow port numbers.
OpenFlow port numbers, as well as datapath performance counters.
- ovs-dpctl functionality is now available for datapaths integrated
into ovs-vswitchd, via ovs-appctl. Some existing ovs-appctl
commands are now redundant and will be removed in a future
Expand Down
34 changes: 33 additions & 1 deletion lib/sflow.h
Expand Up @@ -543,6 +543,34 @@ typedef struct _SFLLACP_counters {

#define SFL_CTR_LACP_XDR_SIZE 56

/* Application resource counters */

typedef struct _SFLAPPResources_counters {
uint32_t user_time; /* in milliseconds */
uint32_t system_time; /* in milliseconds */
uint64_t mem_used;
uint64_t mem_max;
uint32_t fd_open;
uint32_t fd_max;
uint32_t conn_open;
uint32_t conn_max;
} SFLAPPResources_counters;

#define SFL_CTR_APP_RESOURCES_XDR_SIZE 40

/* OVS datapath stats */

typedef struct _SFLOVSDP_counters {
uint32_t n_hit;
uint32_t n_missed;
uint32_t n_lost;
uint32_t n_mask_hit;
uint32_t n_flows;
uint32_t n_masks;
} SFLOVSDP_counters;

#define SFL_CTR_OVSDP_XDR_SIZE 24

/* Counters data */

enum SFLCounters_type_tag {
Expand All @@ -554,7 +582,9 @@ enum SFLCounters_type_tag {
SFLCOUNTERS_VLAN = 5,
SFLCOUNTERS_LACP = 7,
SFLCOUNTERS_OPENFLOWPORT = 1004,
SFLCOUNTERS_PORTNAME = 1005
SFLCOUNTERS_PORTNAME = 1005,
SFLCOUNTERS_APP_RESOURCES = 2203,
SFLCOUNTERS_OVSDP = 2207
};

typedef union _SFLCounters_type {
Expand All @@ -566,6 +596,8 @@ typedef union _SFLCounters_type {
SFLLACP_counters lacp;
SFLOpenFlowPort ofPort;
SFLPortName portName;
SFLAPPResources_counters appResources;
SFLOVSDP_counters ovsdp;
} SFLCounters_type;

typedef struct _SFLCounters_sample_element {
Expand Down
20 changes: 20 additions & 0 deletions lib/sflow_receiver.c
Expand Up @@ -652,6 +652,8 @@ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
case SFLCOUNTERS_LACP: elemSiz = SFL_CTR_LACP_XDR_SIZE; break;
case SFLCOUNTERS_OPENFLOWPORT: elemSiz = SFL_CTR_OPENFLOWPORT_XDR_SIZE; break;
case SFLCOUNTERS_PORTNAME: elemSiz = stringEncodingLength(&elem->counterBlock.portName.portName); break;
case SFLCOUNTERS_APP_RESOURCES: elemSiz = SFL_CTR_APP_RESOURCES_XDR_SIZE; break;
case SFLCOUNTERS_OVSDP: elemSiz = SFL_CTR_OVSDP_XDR_SIZE; break;
default:
sflError(receiver, "unexpected counters_tag");
return -1;
Expand Down Expand Up @@ -774,6 +776,24 @@ int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
case SFLCOUNTERS_PORTNAME:
putString(receiver, &elem->counterBlock.portName.portName);
break;
case SFLCOUNTERS_APP_RESOURCES:
putNet32(receiver, elem->counterBlock.appResources.user_time);
putNet32(receiver, elem->counterBlock.appResources.system_time);
putNet64(receiver, elem->counterBlock.appResources.mem_used);
putNet64(receiver, elem->counterBlock.appResources.mem_max);
putNet32(receiver, elem->counterBlock.appResources.fd_open);
putNet32(receiver, elem->counterBlock.appResources.fd_max);
putNet32(receiver, elem->counterBlock.appResources.conn_open);
putNet32(receiver, elem->counterBlock.appResources.conn_max);
break;
case SFLCOUNTERS_OVSDP:
putNet32(receiver, elem->counterBlock.ovsdp.n_hit);
putNet32(receiver, elem->counterBlock.ovsdp.n_missed);
putNet32(receiver, elem->counterBlock.ovsdp.n_lost);
putNet32(receiver, elem->counterBlock.ovsdp.n_mask_hit);
putNet32(receiver, elem->counterBlock.ovsdp.n_flows);
putNet32(receiver, elem->counterBlock.ovsdp.n_masks);
break;
default:
sflError(receiver, "unexpected counters_tag");
return -1;
Expand Down
135 changes: 134 additions & 1 deletion ofproto/ofproto-dpif-sflow.c
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
* Copyright (c) 2009 InMon Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -18,6 +18,7 @@
#include <config.h>
#include "ofproto-dpif-sflow.h"
#include <inttypes.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdlib.h>
Expand Down Expand Up @@ -46,6 +47,11 @@ VLOG_DEFINE_THIS_MODULE(sflow);

static struct ovs_mutex mutex;

/* This global var is used to determine which sFlow
sub-agent should send the datapath counters. */
#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;

struct dpif_sflow_port {
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
Expand Down Expand Up @@ -161,6 +167,123 @@ dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
return NULL;
}

/* Call to get the datapath stats. Modeled after the dpctl utility.
*
* It might be more efficient for this module to be given a handle it can use
* to get these stats more efficiently, but this is only going to be called
* once every 20-30 seconds. Return number of datapaths found (normally expect
* 1). */
static int
sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
struct dpif_dp_stats *dp_totals)
{
struct sset types;
const char *type;
int count = 0;

memset(dp_totals, 0, sizeof *dp_totals);
sset_init(&types);
dp_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
struct sset names;
const char *name;
sset_init(&names);
if (dp_enumerate_names(type, &names) == 0) {
SSET_FOR_EACH (name, &names) {
struct dpif *dpif;
if (dpif_open(name, type, &dpif) == 0) {
struct dpif_dp_stats dp_stats;
if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
count++;
dp_totals->n_hit += dp_stats.n_hit;
dp_totals->n_missed += dp_stats.n_missed;
dp_totals->n_lost += dp_stats.n_lost;
dp_totals->n_flows += dp_stats.n_flows;
dp_totals->n_mask_hit += dp_stats.n_mask_hit;
dp_totals->n_masks += dp_stats.n_masks;
}
dpif_close(dpif);
}
}
sset_destroy(&names);
}
}
sset_destroy(&types);
return count;
}

/* If there are multiple bridges defined then we need some
minimal artibration to decide which one should send the
global counters. This function allows each sub-agent to
ask if he should do it or not. */
static bool
sflow_global_counters_subid_test(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
/* The role is up for grabs. */
sflow_global_counters_subid = subid;
}
return (sflow_global_counters_subid == subid);
}

static void
sflow_global_counters_subid_clear(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == subid) {
/* The sub-agent that was sending global counters
is going away, so reset to allow another
to take over. */
sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
}
}

static void
sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element dp_elem, res_elem;
struct dpif_dp_stats dp_totals;
struct rusage usage;

if (!sflow_global_counters_subid_test(poller->agent->subId)) {
/* Another sub-agent is currently responsible for this. */
return;
}

/* datapath stats */
if (sflow_get_dp_stats(ds, &dp_totals)) {
dp_elem.tag = SFLCOUNTERS_OVSDP;
dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
SFLADD_ELEMENT(cs, &dp_elem);
}

/* resource usage */
getrusage(RUSAGE_SELF, &usage);
res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
res_elem.counterBlock.appResources.user_time
= timeval_to_msec(&usage.ru_utime);
res_elem.counterBlock.appResources.system_time
= timeval_to_msec(&usage.ru_stime);
res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);

SFLADD_ELEMENT(cs, &res_elem);
sfl_poller_writeCountersSample(poller, cs);
}

static void
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
Expand Down Expand Up @@ -343,6 +466,7 @@ static void
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
{
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
free(ds->sflow_agent);
ds->sflow_agent = NULL;
Expand Down Expand Up @@ -516,6 +640,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
SFLDataSource_instance dsi;
uint32_t dsIndex;
SFLSampler *sampler;
SFLPoller *poller;

ovs_mutex_lock(&mutex);
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
Expand Down Expand Up @@ -562,6 +687,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
/* Create agent. */
VLOG_INFO("creating sFlow agent %d", options->sub_id);
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
}
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
Expand Down Expand Up @@ -595,6 +721,13 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);

/* Add a counter poller for the bridge so we can use it to send
global counters such as datapath cache hit/miss stats. */
poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
sflow_agent_get_global_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);

/* Add pollers for the currently known ifindex-ports */
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
dpif_sflow_add_poller(ds, dsp);
Expand Down

0 comments on commit 3d2912f

Please sign in to comment.