Add support for aggpercpu option

The aggpercpu option appears in the documentation. Further, the current implementation has vestigial code that suggests the option was once supported. On the other hand, its behavior is not particularly described in the documentation. Further, it seems the option has no effect on Solaris or with the legacy Linux implementation. An easy workaround is to add cpu as a key to an aggregation; this workaround makes the feature superfluous. Another challenge is that it's hard to know what behavior makes most sense if aggpercpu is combined with other features. For quantize() output, should the same row values be used for all CPUs as for the overall aggregation? What should the output format look like if aggpercpu is combined with a printa() that has multiple aggregations? Just implement some reasonable version of aggpercpu support and leave intricate scenarios for the user to handle. Signed-off-by: Eugene Loh <eugene.loh@oracle.com> Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
oracle · Jan 26, 2024 · b189765 · b189765
1 parent a20cc60
commit b189765
Show file tree

Hide file tree

Showing 3 changed files with 206 additions and 14 deletions.
diff --git a/libdtrace/dt_aggregate.c b/libdtrace/dt_aggregate.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -438,6 +438,32 @@ dt_agg_one_agg(dt_ident_t *aid, dtrace_recdesc_t *rec, char *dst,
 	}
 }
 
+static void
+dt_aggregate_clear_one_percpu(const dtrace_aggdata_t *agd,
+			      dtrace_recdesc_t *rec, int max_cpus)
+{
+	dtrace_actkind_t	act = rec->dtrd_action;
+	uint32_t		siz = rec->dtrd_size;
+	int			i;
+
+	for (i = 0; i < max_cpus; i++) {
+		int64_t	*vals;
+
+		vals = (int64_t *) &agd->dtada_percpu[i][rec->dtrd_offset];
+		switch (act) {
+		case DT_AGG_MIN:
+			*vals = INT64_MAX;
+			break;
+		case DT_AGG_MAX:
+			*vals = INT64_MIN;
+			break;
+		default:
+			memset(vals, 0, siz);
+			break;
+		}
+	}
+}
+
 int
 dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 {
@@ -447,7 +473,7 @@ dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 	int64_t			*vals = (int64_t *)
 					&agd->dtada_data[rec->dtrd_offset];
 	uint64_t		agen;
-	int			i, max_cpus = dtp->dt_conf.max_cpuid + 1;
+	int			max_cpus = dtp->dt_conf.max_cpuid + 1;
 
 	/*
 	 * We can pass the entire key because we know that the first uint32_t
@@ -460,24 +486,18 @@ dt_aggregate_clear_one(const dtrace_aggdata_t *agd, void *arg)
 	switch (rec->dtrd_action) {
 	case DT_AGG_MIN:
 		*vals = INT64_MAX;
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				*((uint64_t*)agd->dtada_percpu[i]) = INT64_MAX;
 		break;
 	case DT_AGG_MAX:
 		*vals = INT64_MIN;
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				*((uint64_t*)agd->dtada_percpu[i]) = INT64_MIN;
 		break;
 	default:
 		memset(vals, 0, rec->dtrd_size);
-		if (agd->dtada_percpu)
-			for (i = 0; i < max_cpus; i++)
-				memset(agd->dtada_percpu[i], 0, rec->dtrd_size);
 		break;
 	}
 
+	if (agd->dtada_percpu)
+		dt_aggregate_clear_one_percpu(agd, rec, max_cpus);
+
 	return DTRACE_AGGWALK_NEXT;
 }
 
@@ -543,6 +563,9 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
 		assert(aid != NULL);
 		dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
 			       agd->dtada_data, data);
+		if (agd->dtada_percpu != NULL)
+			dt_agg_one_agg(aid, &agg->dtagd_drecs[DT_AGGDATA_RECORD],
+				       agd->dtada_percpu[cpu], data);
 
 		return 0;
 
@@ -578,8 +601,25 @@ dt_aggregate_snap_one(dtrace_hdl_t *dtp, int aggid, int cpu, const char *key,
 
 	memcpy(ptr, data, size);
 	agd->dtada_data = ptr;
+	if (dtp->dt_aggregate.dtat_flags & DTRACE_A_PERCPU) {
+		int i, max_cpus = dtp->dt_conf.max_cpuid + 1;
+		dtrace_recdesc_t	*rec = &agg->dtagd_drecs[DT_AGGDATA_RECORD];
+
+		agd->dtada_percpu = dt_alloc(dtp, max_cpus * sizeof(caddr_t));
+		if (agd->dtada_percpu == NULL)
+			return dt_set_errno(dtp, EDT_NOMEM);
+
+		for (i = 0; i < max_cpus; i++) {
+			agd->dtada_percpu[i] = dt_alloc(dtp, size);
+			if (agd->dtada_percpu[i] == NULL)
+				return dt_set_errno(dtp, EDT_NOMEM);
+		}
+
+		dt_aggregate_clear_one_percpu(agd, rec, max_cpus);
+		memcpy(agd->dtada_percpu[cpu], data, size);
+	}
 
-	/* Add the new entru to the hashtable. */
+	/* Add the new entry to the hashtable. */
 	if (agh->dtah_hash[ndx] != NULL)
 		agh->dtah_hash[ndx]->dtahe_prev = h;
 
@@ -1564,7 +1604,7 @@ dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggid_t *aggvars,
 
 		if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {
 			/*
-			 * If we failed to allocated some zero-filled data, we
+			 * If we failed to allocate some zero-filled data, we
 			 * need to zero out the remaining dtada_data pointers
 			 * to prevent the wrong data from being freed below.
 			 */

diff --git a/libdtrace/dt_consume.c b/libdtrace/dt_consume.c
@@ -1,6 +1,6 @@
 /*
  * Oracle Linux DTrace.
- * Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2024, Oracle and/or its affiliates. All rights reserved.
  * Licensed under the Universal Permissive License v 1.0 as shown at
  * http://oss.oracle.com/licenses/upl.
  */
@@ -1760,6 +1760,15 @@ dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
 		if (dt_print_datum(dtp, fp, rec, aggdata->dtada_data, normal,
 				   agg->dtagd_sig) < 0)
 			return DTRACE_AGGWALK_ERROR;
+		if (aggdata->dtada_percpu != NULL) {
+			int j, max_cpus = aggdata->dtada_hdl->dt_conf.max_cpuid + 1;
+			for (j = 0; j < max_cpus; j++) {
+				if (dt_printf(dtp, fp, "\n    [CPU %d]", aggdata->dtada_hdl->dt_conf.cpus[j].cpu_id) < 0)
+					return DTRACE_AGGWALK_ERROR;
+				if (dt_print_datum(dtp, fp, rec, aggdata->dtada_percpu[j], normal, agg->dtagd_sig) < 0)
+					return DTRACE_AGGWALK_ERROR;
+			}
+		}
 
 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
 				      DTRACE_BUFDATA_AGGVAL) < 0)

diff --git a/test/unittest/aggs/tst.aggpercpu.sh b/test/unittest/aggs/tst.aggpercpu.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+#
+# Oracle Linux DTrace.
+# Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at
+# http://oss.oracle.com/licenses/upl.
+#
+
+dtrace=$1
+
+DIRNAME="$tmpdir/aggpercpu.$$.$RANDOM"
+mkdir -p $DIRNAME
+cd $DIRNAME
+
+#
+# Run a D script that fires on every CPU,
+# forcing DTrace to aggregate results over all CPUs.
+#
+
+$dtrace -xaggpercpu -qn '
+    profile-600ms
+    {
+        printf("cpu %d\n", cpu);
+        @xcnt = count();
+        @xavg = avg(10 * cpu + 3);
+        @xstd = stddev(20 * cpu + 8);
+        @xmin = min(30 * cpu - 10);
+        @xmax = max(40 * cpu - 15);
+        @xsum = sum(50 * cpu);
+    }
+    tick-900ms
+    {
+        exit(0)
+    }
+' > dtrace.out
+if [ $? -ne 0 ]; then
+    echo DTrace failed
+    cat dtrace.out
+    exit 1
+fi
+
+#
+# Examine the results.
+#
+
+awk '
+    # The expected value for the aggregation is aggval.
+    # The expected value on a CPU is (m * cpu + b).
+    function check(label, aggval, m, b) {
+        # Check the aggregation over all CPUs.
+        getline;
+        print "check:", $0;
+        if ($1 != aggval) { printf("ERROR: %s, expect %d got %d\n", label, aggval, $1) };
+
+        # Check the per-CPU values.
+        for (i = 1; i <= ncpu; i++) {
+            getline;
+            print "check:", $0;
+            if (match($0, "^    \\[CPU ") != 1 ||
+                strtonum($2) != cpu[i] ||
+                strtonum($3) != m * cpu[i] + b)
+                printf("ERROR: %s, agg per cpu %d, line: %s\n", label, cpu[i], $0);
+        }
+    }
+
+    BEGIN {
+        xcnt = xavg = xstm = xstd = xsum = 0;
+        xmin = +1000000000;
+        xmax = -1000000000;
+        ncpu = 0;
+    }
+
+    # The first "cpu" lines provide the inputs to the aggregations.
+    /^cpu [0-9]*$/ {
+	cpu[++ncpu] = strtonum($NF);
+
+        xcnt += 1;
+
+        x = 10 * $2 + 3;
+        xavg += x;
+
+        x = 20 * $2 + 8;
+        xstm += x;
+        xstd += x * x;
+
+        x = 30 * $2 - 10;
+        if (xmin > x) { xmin = x };
+
+        x = 40 * $2 - 15;
+        if (xmax < x) { xmax = x };
+
+        x = 50 * $2;
+        xsum += x;
+
+        next;
+    }
+
+    # The remaining lines are the aggregation results.
+    {
+        # First we finish computing our estimates for avg and stddev.
+        # (The other results require no further action.)
+
+        xavg /= xcnt;
+
+        xstm /= xcnt;
+        xstd /= xcnt;
+        xstd -= xstm * xstm;
+        xstd = int(sqrt(xstd));
+
+        # Sort the cpus.
+
+        asort(cpu);
+
+        # Now read the results and compare.
+
+        check("cnt", xcnt,  0,   1);
+        check("avg", xavg, 10,   3);
+        check("std", xstd,  0,   0);
+        check("min", xmin, 30, -10);
+        check("max", xmax, 40, -15);
+        check("sum", xsum, 50,   0);
+
+        printf("done\n");
+    }
+' dtrace.out > awk.out
+if [ $? -ne 0 ]; then
+    echo awk failed
+    cat dtrace.out
+    exit 1
+fi
+
+if grep -q ERROR awk.out ; then
+    echo ERROR found
+    echo "=================================================="
+    cat dtrace.out
+    echo "=================================================="
+    cat awk.out
+    echo "=================================================="
+    exit 1
+fi
+
+echo success
+exit 0