Skip to content

Commit

Permalink
app/mldev: enable reporting stats in mldev app
Browse files Browse the repository at this point in the history
Enable reporting driver xstats and inference end-to-end
latency and throughput in mldev inference tests. Reporting
of stats can be enabled using "--stats" option.

Signed-off-by: Srikanth Yalavarthi <syalavarthi@marvell.com>
Acked-by: Anup Prabhu <aprabhu@marvell.com>
Signed-off-by: 0-day Robot <robot@bytheb.org>
  • Loading branch information
syalavarthi authored and ovsrobot committed Mar 16, 2023
1 parent 0de7cfa commit 3c2ab5e
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 1 deletion.
10 changes: 9 additions & 1 deletion app/test-mldev/ml_options.c
Expand Up @@ -30,6 +30,7 @@ ml_options_default(struct ml_options *opt)
opt->queue_size = 1;
opt->batches = 0;
opt->tolerance = 0.0;
opt->stats = false;
opt->debug = false;
}

Expand Down Expand Up @@ -216,7 +217,8 @@ ml_dump_test_options(const char *testname)
"\t\t--queue_pairs : number of queue pairs to create\n"
"\t\t--queue_size : size fo queue-pair\n"
"\t\t--batches : number of batches of input\n"
"\t\t--tolerance : maximum tolerance (%%) for output validation\n");
"\t\t--tolerance : maximum tolerance (%%) for output validation\n"
"\t\t--stats : enable reporting performance statistics\n");
printf("\n");
}
}
Expand Down Expand Up @@ -248,6 +250,7 @@ static struct option lgopts[] = {
{ML_QUEUE_SIZE, 1, 0, 0},
{ML_BATCHES, 1, 0, 0},
{ML_TOLERANCE, 1, 0, 0},
{ML_STATS, 0, 0, 0},
{ML_DEBUG, 0, 0, 0},
{ML_HELP, 0, 0, 0},
{NULL, 0, 0, 0}};
Expand Down Expand Up @@ -290,6 +293,11 @@ ml_options_parse(struct ml_options *opt, int argc, char **argv)
while ((opts = getopt_long(argc, argv, "", lgopts, &opt_idx)) != EOF) {
switch (opts) {
case 0: /* parse long options */
if (!strcmp(lgopts[opt_idx].name, "stats")) {
opt->stats = true;
break;
}

if (!strcmp(lgopts[opt_idx].name, "debug")) {
opt->debug = true;
break;
Expand Down
2 changes: 2 additions & 0 deletions app/test-mldev/ml_options.h
Expand Up @@ -23,6 +23,7 @@
#define ML_QUEUE_SIZE ("queue_size")
#define ML_BATCHES ("batches")
#define ML_TOLERANCE ("tolerance")
#define ML_STATS ("stats")
#define ML_DEBUG ("debug")
#define ML_HELP ("help")

Expand All @@ -45,6 +46,7 @@ struct ml_options {
uint16_t queue_size;
uint16_t batches;
float tolerance;
bool stats;
bool debug;
};

Expand Down
140 changes: 140 additions & 0 deletions app/test-mldev/test_inference_common.c
Expand Up @@ -6,6 +6,7 @@
#include <unistd.h>

#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_hash_crc.h>
#include <rte_launch.h>
#include <rte_lcore.h>
Expand Down Expand Up @@ -37,6 +38,17 @@
} \
} while (0)

static void
print_line(uint16_t len)
{
uint16_t i;

for (i = 0; i < len; i++)
printf("-");

printf("\n");
}

/* Enqueue inference requests with burst size equal to 1 */
static int
ml_enqueue_single(void *arg)
Expand All @@ -46,13 +58,15 @@ ml_enqueue_single(void *arg)
struct rte_ml_op *op = NULL;
struct ml_core_args *args;
uint64_t model_enq = 0;
uint64_t start_cycle;
uint32_t burst_enq;
uint32_t lcore_id;
uint16_t fid;
int ret;

lcore_id = rte_lcore_id();
args = &t->args[lcore_id];
args->start_cycles = 0;
model_enq = 0;

if (args->nb_reqs == 0)
Expand Down Expand Up @@ -88,10 +102,12 @@ ml_enqueue_single(void *arg)
req->fid = fid;

enqueue_req:
start_cycle = rte_get_tsc_cycles();
burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
if (burst_enq == 0)
goto enqueue_req;

args->start_cycles += start_cycle;
fid++;
if (likely(fid <= args->end_fid))
goto next_model;
Expand All @@ -115,20 +131,24 @@ ml_dequeue_single(void *arg)
uint64_t total_deq = 0;
uint8_t nb_filelist;
uint32_t burst_deq;
uint64_t end_cycle;
uint32_t lcore_id;

lcore_id = rte_lcore_id();
args = &t->args[lcore_id];
args->end_cycles = 0;
nb_filelist = args->end_fid - args->start_fid + 1;

if (args->nb_reqs == 0)
return 0;

dequeue_req:
burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 1);
end_cycle = rte_get_tsc_cycles();

if (likely(burst_deq == 1)) {
total_deq += burst_deq;
args->end_cycles += end_cycle;
if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
ml_err("error_code = 0x%" PRIx64 ", error_message = %s\n", error.errcode,
Expand All @@ -152,6 +172,7 @@ ml_enqueue_burst(void *arg)
{
struct test_inference *t = ml_test_priv((struct ml_test *)arg);
struct ml_core_args *args;
uint64_t start_cycle;
uint16_t ops_count;
uint64_t model_enq;
uint16_t burst_enq;
Expand All @@ -164,6 +185,7 @@ ml_enqueue_burst(void *arg)

lcore_id = rte_lcore_id();
args = &t->args[lcore_id];
args->start_cycles = 0;
model_enq = 0;

if (args->nb_reqs == 0)
Expand Down Expand Up @@ -205,8 +227,10 @@ ml_enqueue_burst(void *arg)
pending = ops_count;

enqueue_reqs:
start_cycle = rte_get_tsc_cycles();
burst_enq =
rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &args->enq_ops[idx], pending);
args->start_cycles += burst_enq * start_cycle;
pending = pending - burst_enq;

if (pending > 0) {
Expand Down Expand Up @@ -236,11 +260,13 @@ ml_dequeue_burst(void *arg)
uint64_t total_deq = 0;
uint16_t burst_deq = 0;
uint8_t nb_filelist;
uint64_t end_cycle;
uint32_t lcore_id;
uint32_t i;

lcore_id = rte_lcore_id();
args = &t->args[lcore_id];
args->end_cycles = 0;
nb_filelist = args->end_fid - args->start_fid + 1;

if (args->nb_reqs == 0)
Expand All @@ -249,9 +275,11 @@ ml_dequeue_burst(void *arg)
dequeue_burst:
burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, args->deq_ops,
t->cmn.opt->burst_size);
end_cycle = rte_get_tsc_cycles();

if (likely(burst_deq > 0)) {
total_deq += burst_deq;
args->end_cycles += burst_deq * end_cycle;

for (i = 0; i < burst_deq; i++) {
if (unlikely(args->deq_ops[i]->status == RTE_ML_OP_STATUS_ERROR)) {
Expand Down Expand Up @@ -381,6 +409,7 @@ test_inference_opt_dump(struct ml_options *opt)
ml_dump("queue_pairs", "%u", opt->queue_pairs);
ml_dump("queue_size", "%u", opt->queue_size);
ml_dump("tolerance", "%-7.3f", opt->tolerance);
ml_dump("stats", "%s", (opt->stats ? "true" : "false"));

if (opt->batches == 0)
ml_dump("batches", "%u (default)", opt->batches);
Expand Down Expand Up @@ -454,6 +483,11 @@ test_inference_setup(struct ml_test *test, struct ml_options *opt)
RTE_CACHE_LINE_SIZE, opt->socket_id);
}

for (i = 0; i < RTE_MAX_LCORE; i++) {
t->args[i].start_cycles = 0;
t->args[i].end_cycles = 0;
}

return 0;

error:
Expand Down Expand Up @@ -986,3 +1020,109 @@ ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t

return 0;
}

int
ml_inference_stats_get(struct ml_test *test, struct ml_options *opt)
{
struct test_inference *t = ml_test_priv(test);
uint64_t total_cycles = 0;
uint32_t nb_filelist;
uint64_t throughput;
uint64_t avg_e2e;
uint32_t qp_id;
uint64_t freq;
int ret;
int i;

if (!opt->stats)
return 0;

/* get xstats size */
t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0);
if (t->xstats_size >= 0) {
/* allocate for xstats_map and values */
t->xstats_map = rte_malloc(
"ml_xstats_map", t->xstats_size * sizeof(struct rte_ml_dev_xstats_map), 0);
if (t->xstats_map == NULL) {
ret = -ENOMEM;
goto error;
}

t->xstats_values =
rte_malloc("ml_xstats_values", t->xstats_size * sizeof(uint64_t), 0);
if (t->xstats_values == NULL) {
ret = -ENOMEM;
goto error;
}

ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, t->xstats_size);
if (ret != t->xstats_size) {
printf("Unable to get xstats names, ret = %d\n", ret);
ret = -1;
goto error;
}

for (i = 0; i < t->xstats_size; i++)
rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id,
&t->xstats_values[i], 1);
}

/* print xstats*/
printf("\n");
print_line(80);
printf(" ML Device Extended Statistics\n");
print_line(80);
for (i = 0; i < t->xstats_size; i++)
printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, t->xstats_values[i]);
print_line(80);

/* release buffers */
if (t->xstats_map)
rte_free(t->xstats_map);

if (t->xstats_values)
rte_free(t->xstats_values);

/* print end-to-end stats */
freq = rte_get_tsc_hz();
for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++)
total_cycles += t->args[qp_id].end_cycles - t->args[qp_id].start_cycles;
avg_e2e = total_cycles / opt->repetitions;

if (freq == 0) {
avg_e2e = total_cycles / opt->repetitions;
printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (cycles)", avg_e2e);
} else {
avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq);
printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency (ns)", avg_e2e);
}

/* print inference throughput */
if (strcmp(opt->test_name, "inference_ordered") == 0)
nb_filelist = 1;
else
nb_filelist = opt->nb_filelist;

if (freq == 0) {
throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) / total_cycles;
printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / million cycles)",
throughput);
} else {
throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / total_cycles;
printf(" %-64s = %" PRIu64 "\n", "Average Throughput (inferences / second)",
throughput);
}

print_line(80);

return 0;

error:
if (t->xstats_map)
rte_free(t->xstats_map);

if (t->xstats_values)
rte_free(t->xstats_values);

return ret;
}
8 changes: 8 additions & 0 deletions app/test-mldev/test_inference_common.h
Expand Up @@ -27,6 +27,9 @@ struct ml_core_args {
struct rte_ml_op **enq_ops;
struct rte_ml_op **deq_ops;
struct ml_request **reqs;

uint64_t start_cycles;
uint64_t end_cycles;
};

struct test_inference {
Expand All @@ -46,6 +49,10 @@ struct test_inference {

struct ml_core_args args[RTE_MAX_LCORE];
uint64_t error_count[RTE_MAX_LCORE];

struct rte_ml_dev_xstats_map *xstats_map;
uint64_t *xstats_values;
int xstats_size;
} __rte_cache_aligned;

bool test_inference_cap_check(struct ml_options *opt);
Expand All @@ -63,5 +70,6 @@ void ml_inference_mem_destroy(struct ml_test *test, struct ml_options *opt);
int ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t fid);
int ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, uint16_t start_fid,
uint16_t end_fid);
int ml_inference_stats_get(struct ml_test *test, struct ml_options *opt);

#endif /* _ML_TEST_INFERENCE_COMMON_ */
4 changes: 4 additions & 0 deletions app/test-mldev/test_inference_interleave.c
Expand Up @@ -56,7 +56,11 @@ test_inference_interleave_driver(struct ml_test *test, struct ml_options *opt)
goto error;

ml_inference_iomem_destroy(test, opt, fid);
}

ml_inference_stats_get(test, opt);

for (fid = 0; fid < opt->nb_filelist; fid++) {
ret = ml_model_stop(test, opt, &t->model[fid], fid);
if (ret != 0)
goto error;
Expand Down
1 change: 1 addition & 0 deletions app/test-mldev/test_inference_ordered.c
Expand Up @@ -54,6 +54,7 @@ test_inference_ordered_driver(struct ml_test *test, struct ml_options *opt)
goto error;

ml_inference_iomem_destroy(test, opt, fid);
ml_inference_stats_get(test, opt);

/* stop model */
ret = ml_model_stop(test, opt, &t->model[fid], fid);
Expand Down
7 changes: 7 additions & 0 deletions doc/guides/tools/testmldev.rst
Expand Up @@ -116,6 +116,10 @@ The following are the command-line options supported by the test application.
Set the tolerance value in percentage to be used for output validation. Default value
is `0`.

* ``--stats``

Enable reporting device extended stats.

* ``--debug``

Enable the tests to run in debug mode.
Expand Down Expand Up @@ -279,6 +283,7 @@ Supported command line options for inference tests are following::
--queue_size
--batches
--tolerance
--stats


List of files to be used for the inference tests can be specified through the option
Expand All @@ -300,6 +305,8 @@ inference output and reference output are compared. When the tolerance is non-ze
comparison of output is performed. Validation is considered as successful only when all the
elements of the output tensor are with in the tolerance range specified.

Enabling ``--stats`` would print the extended stats supported by the driver.

.. Note::

* The ``--filelist <file_list>`` is a mandatory option for running inference tests.
Expand Down

0 comments on commit 3c2ab5e

Please sign in to comment.