Skip to content

Commit ef368b3

Browse files
Hao TangShaojun Wangjerboaa
committed
8265836: OperatingSystemImpl.getCpuLoad() returns incorrect CPU load inside a container
Co-authored-by: Shaojun Wang <jeffery.wsj@alibaba-inc.com> Co-authored-by: Severin Gehwolf <sgehwolf@openjdk.org> Reviewed-by: sgehwolf, ysuenaga
1 parent 10a6f5d commit ef368b3

File tree

4 files changed

+88
-12
lines changed

4 files changed

+88
-12
lines changed

src/jdk.management/aix/native/libmanagement_ext/UnixOperatingSystem.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ Java_com_sun_management_internal_OperatingSystemImpl_getSingleCpuLoad0
5151
return -1.0;
5252
}
5353

54+
JNIEXPORT jlong JNICALL
55+
Java_com_sun_management_internal_OperatingSystemImpl_getHostTotalCpuTicks0
56+
(JNIEnv *env, jobject mbean)
57+
{
58+
return -1.0;
59+
}
60+
5461
JNIEXPORT jint JNICALL
5562
Java_com_sun_management_internal_OperatingSystemImpl_getHostConfiguredCpuCount0
5663
(JNIEnv *env, jobject mbean)

src/jdk.management/linux/native/libmanagement_ext/UnixOperatingSystem.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ static struct perfbuf {
6161
} counters;
6262

6363
#define DEC_64 "%"SCNd64
64+
#define NS_PER_SEC 1000000000
6465

6566
static void next_line(FILE *f) {
6667
while (fgetc(f) != '\n');
@@ -363,6 +364,31 @@ Java_com_sun_management_internal_OperatingSystemImpl_getHostConfiguredCpuCount0
363364
}
364365
}
365366

367+
// Return the host cpu ticks since boot in nanoseconds
368+
JNIEXPORT jlong JNICALL
369+
Java_com_sun_management_internal_OperatingSystemImpl_getHostTotalCpuTicks0
370+
(JNIEnv *env, jobject mbean)
371+
{
372+
if (perfInit() == 0) {
373+
if (get_totalticks(-1, &counters.cpuTicks) < 0) {
374+
return -1;
375+
} else {
376+
long ticks_per_sec = sysconf(_SC_CLK_TCK);
377+
jlong result = (jlong)counters.cpuTicks.total;
378+
if (ticks_per_sec <= NS_PER_SEC) {
379+
long scale_factor = NS_PER_SEC/ticks_per_sec;
380+
result = result * scale_factor;
381+
} else {
382+
long scale_factor = ticks_per_sec/NS_PER_SEC;
383+
result = result / scale_factor;
384+
}
385+
return result;
386+
}
387+
} else {
388+
return -1;
389+
}
390+
}
391+
366392
JNIEXPORT jint JNICALL
367393
Java_com_sun_management_internal_OperatingSystemImpl_getHostOnlineCpuCount0
368394
(JNIEnv *env, jobject mbean)

src/jdk.management/macosx/native/libmanagement_ext/UnixOperatingSystem.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@ Java_com_sun_management_internal_OperatingSystemImpl_getSingleCpuLoad0
167167
return -1.0;
168168
}
169169

170+
JNIEXPORT jlong JNICALL
171+
Java_com_sun_management_internal_OperatingSystemImpl_getHostTotalCpuTicks0
172+
(JNIEnv *env, jobject mbean)
173+
{
174+
return -1.0;
175+
}
176+
170177
JNIEXPORT jint JNICALL
171178
Java_com_sun_management_internal_OperatingSystemImpl_getHostConfiguredCpuCount0
172179
(JNIEnv *env, jobject mbean)

src/jdk.management/unix/classes/com/sun/management/internal/OperatingSystemImpl.java

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ class OperatingSystemImpl extends BaseOperatingSystemImpl
4242

4343
private static final int MAX_ATTEMPTS_NUMBER = 10;
4444
private final Metrics containerMetrics;
45+
private long usageTicks = 0; // used for cpu load calculation
46+
private long totalTicks = 0; // used for cpu load calculation
4547

4648
OperatingSystemImpl(VMManagement vm) {
4749
super(vm);
@@ -132,24 +134,56 @@ public long getMaxFileDescriptorCount() {
132134
return getMaxFileDescriptorCount0();
133135
}
134136

137+
private double getUsageDividesTotal(long usageTicks, long totalTicks) {
138+
// If cpu quota or cpu shares are in effect calculate the cpu load
139+
// based on the following formula (similar to how
140+
// getCpuLoad0() is being calculated):
141+
//
142+
// | usageTicks - usageTicks' |
143+
// ------------------------------
144+
// | totalTicks - totalTicks' |
145+
//
146+
// where usageTicks' and totalTicks' are historical values
147+
// retrieved via an earlier call of this method.
148+
//
149+
// Total ticks should be scaled to the container effective number
150+
// of cpus, if cpu shares are in effect.
151+
if (usageTicks < 0 || totalTicks <= 0) {
152+
return -1;
153+
}
154+
long distance = usageTicks - this.usageTicks;
155+
this.usageTicks = usageTicks;
156+
long totalDistance = totalTicks - this.totalTicks;
157+
this.totalTicks = totalTicks;
158+
159+
double systemLoad = 0.0;
160+
if (distance > 0 && totalDistance > 0) {
161+
systemLoad = ((double)distance) / totalDistance;
162+
}
163+
// Ensure the return value is in the range 0.0 -> 1.0
164+
systemLoad = Math.max(0.0, systemLoad);
165+
systemLoad = Math.min(1.0, systemLoad);
166+
return systemLoad;
167+
}
168+
135169
public double getCpuLoad() {
136170
if (containerMetrics != null) {
137171
long quota = containerMetrics.getCpuQuota();
172+
long share = containerMetrics.getCpuShares();
173+
long usageNanos = containerMetrics.getCpuUsage();
138174
if (quota > 0) {
139-
long periodLength = containerMetrics.getCpuPeriod();
140175
long numPeriods = containerMetrics.getCpuNumPeriods();
141-
long usageNanos = containerMetrics.getCpuUsage();
142-
if (periodLength > 0 && numPeriods > 0 && usageNanos > 0) {
143-
long elapsedNanos = TimeUnit.MICROSECONDS.toNanos(periodLength * numPeriods);
144-
double systemLoad = (double) usageNanos / elapsedNanos;
145-
// Ensure the return value is in the range 0.0 -> 1.0
146-
systemLoad = Math.max(0.0, systemLoad);
147-
systemLoad = Math.min(1.0, systemLoad);
148-
return systemLoad;
149-
}
150-
return -1;
176+
long quotaNanos = TimeUnit.MICROSECONDS.toNanos(quota * numPeriods);
177+
return getUsageDividesTotal(usageNanos, quotaNanos);
178+
} else if (share > 0) {
179+
long hostTicks = getHostTotalCpuTicks0();
180+
int totalCPUs = getHostOnlineCpuCount0();
181+
int containerCPUs = getAvailableProcessors();
182+
// scale the total host load to the actual container cpus
183+
hostTicks = hostTicks * containerCPUs / totalCPUs;
184+
return getUsageDividesTotal(usageNanos, hostTicks);
151185
} else {
152-
// If CPU quotas are not active then find the average system load for
186+
// If CPU quotas and shares are not active then find the average system load for
153187
// all online CPUs that are allowed to run this container.
154188

155189
// If the cpuset is the same as the host's one there is no need to iterate over each CPU
@@ -208,6 +242,8 @@ private boolean isCpuSetSameAsHostCpuSet() {
208242
private native double getSingleCpuLoad0(int cpuNum);
209243
private native int getHostConfiguredCpuCount0();
210244
private native int getHostOnlineCpuCount0();
245+
// CPU ticks since boot in nanoseconds
246+
private native long getHostTotalCpuTicks0();
211247

212248
static {
213249
initialize0();

0 commit comments

Comments
 (0)