Skip to content

Conversation

simonwallis2
Copy link
Contributor

Explicitly round up the reciprocal calculation,
so that .125 is displayed as 0.13 consistently across all hosts.

Fix buildbot failure https://lab.llvm.org/buildbot/#/builders/193/builds/10666
since #154495

Explicitly round up the reciprocal calculation,
so that .125 is displayed as 0.13 consistently across all hosts.

Fix buildbot failure https://lab.llvm.org/buildbot/#/builders/193/builds/10666
since llvm#154495

Related to https://jira.arm.com/browse/LLVMAENG-4393 "Update issueWidth for Neoverse V1, N1, N3"

Change-Id: Ia82308a18ff36cf2b85dfd01bf82df93886ca0e9
@llvmbot
Copy link
Member

llvmbot commented Sep 18, 2025

@llvm/pr-subscribers-tools-llvm-mca

Author: Simon Wallis (simonwallis2)

Changes

Explicitly round up the reciprocal calculation,
so that .125 is displayed as 0.13 consistently across all hosts.

Fix buildbot failure https://lab.llvm.org/buildbot/#/builders/193/builds/10666
since #154495


Full diff: https://github.com/llvm/llvm-project/pull/159544.diff

4 Files Affected:

  • (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s (+1-1)
  • (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s (+15-15)
  • (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s (+9-9)
  • (modified) llvm/tools/llvm-mca/Views/InstructionInfoView.cpp (+1-1)
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index 72ae67e3bea71..eddc3e565c353 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -2688,7 +2688,7 @@ drps
 # CHECK-NEXT:  1      1     0.25                        movk	x7, #0, lsl #32
 # CHECK-NEXT:  1      1     0.25                        movz	x8, #0, lsl #48
 # CHECK-NEXT:  1      1     0.25                        movk	x9, #0, lsl #48
-# CHECK-NEXT:  1      1     0.12                  U     msr	DAIFSet, #0
+# CHECK-NEXT:  1      1     0.13                  U     msr	DAIFSet, #0
 # CHECK-NEXT:  1      1     0.25                        adr	x2, #1600
 # CHECK-NEXT:  1      1     0.25                        adrp	x21, #6553600
 # CHECK-NEXT:  1      1     0.25                        adr	x0, #262144
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s
index 2af85a87c51af..53ade6eeb77a8 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-misc-instructions.s
@@ -30,21 +30,21 @@ sysl x16, #5, c11, c8, #5
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.12                  U     at	s12e1r, x28
-# CHECK-NEXT:  1      1     0.12                  U     brk	#0x8415
-# CHECK-NEXT:  1      1     0.12    *      *      U     clrex
-# CHECK-NEXT:  1      1     0.12    *      *      U     csdb
-# CHECK-NEXT:  1      1     0.12                  U     dcps1
-# CHECK-NEXT:  1      1     0.12                  U     dcps2
-# CHECK-NEXT:  1      1     0.12                  U     dcps3
-# CHECK-NEXT:  1      1     0.12    *      *      U     dmb	sy
-# CHECK-NEXT:  1      1     0.12                  U     hlt	#0x7a67
-# CHECK-NEXT:  1      1     0.12                  U     hvc	#0xecb9
-# CHECK-NEXT:  1      1     0.12    *      *      U     isb
-# CHECK-NEXT:  1      1     0.12    *      *      U     pssbb
-# CHECK-NEXT:  1      1     0.12                  U     smc	#0x7e57
-# CHECK-NEXT:  1      1     0.12                  U     svc	#0x89cb
-# CHECK-NEXT:  1      1     0.12                  U     sysl	x16, #5, c11, c8, #5
+# CHECK-NEXT:  1      1     0.13                  U     at	s12e1r, x28
+# CHECK-NEXT:  1      1     0.13                  U     brk	#0x8415
+# CHECK-NEXT:  1      1     0.13    *      *      U     clrex
+# CHECK-NEXT:  1      1     0.13    *      *      U     csdb
+# CHECK-NEXT:  1      1     0.13                  U     dcps1
+# CHECK-NEXT:  1      1     0.13                  U     dcps2
+# CHECK-NEXT:  1      1     0.13                  U     dcps3
+# CHECK-NEXT:  1      1     0.13    *      *      U     dmb	sy
+# CHECK-NEXT:  1      1     0.13                  U     hlt	#0x7a67
+# CHECK-NEXT:  1      1     0.13                  U     hvc	#0xecb9
+# CHECK-NEXT:  1      1     0.13    *      *      U     isb
+# CHECK-NEXT:  1      1     0.13    *      *      U     pssbb
+# CHECK-NEXT:  1      1     0.13                  U     smc	#0x7e57
+# CHECK-NEXT:  1      1     0.13                  U     svc	#0x89cb
+# CHECK-NEXT:  1      1     0.13                  U     sysl	x16, #5, c11, c8, #5
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V1UnitB
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
index 3c0f0b3ddcb15..911ad1900195c 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
@@ -3991,19 +3991,19 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      2     2.00                         2     V1UnitI[2],V1UnitM[2],V1UnitM0[2]          ANDS_PPzPP                 movs	p0.b, p0/z, p0.b
 # CHECK-NEXT:  2      2     2.00                         2     V1UnitI[2],V1UnitM[2],V1UnitM0[2]          ORRS_PPzPP                 movs	p15.b, p15.b
 # CHECK-NEXT:  2      2     2.00                         2     V1UnitI[2],V1UnitM[2],V1UnitM0[2]          ANDS_PPzPP                 movs	p15.b, p15/z, p15.b
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MRS                        mrs	x3, ID_AA64ZFR0_EL1
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MRS                        mrs	x3, ZCR_EL1
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MRS                        mrs	x3, ZCR_EL12
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MRS                        mrs	x3, ZCR_EL2
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MRS                        mrs	x3, ZCR_EL3
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MSR                        msr	ZCR_EL1, x3
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MRS                        mrs	x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MRS                        mrs	x3, ZCR_EL1
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MRS                        mrs	x3, ZCR_EL12
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MRS                        mrs	x3, ZCR_EL2
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MRS                        mrs	x3, ZCR_EL3
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MSR                        msr	ZCR_EL1, x3
 # CHECK-NEXT:  2      5     2.00                         2     V1UnitV[2],V1UnitV0[2],V1UnitV01[2],V1UnitV02[2] MSB_ZPmZZ_D          msb	z0.d, p0/m, z0.d, z0.d
 # CHECK-NEXT:  1      4     1.00                         4     V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02       MSB_ZPmZZ_B                msb	z18.b, p1/m, z27.b, z0.b
 # CHECK-NEXT:  1      4     1.00                         4     V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02       MSB_ZPmZZ_H                msb	z27.h, p5/m, z23.h, z1.h
 # CHECK-NEXT:  1      4     1.00                         4     V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02       MSB_ZPmZZ_S                msb	z26.s, p2/m, z0.s, z2.s
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MSR                        msr	ZCR_EL12, x3
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MSR                        msr	ZCR_EL2, x3
-# CHECK-NEXT:  1      1     0.12                  U      1                                                MSR                        msr	ZCR_EL3, x3
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MSR                        msr	ZCR_EL12, x3
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MSR                        msr	ZCR_EL2, x3
+# CHECK-NEXT:  1      1     0.13                  U      1                                                MSR                        msr	ZCR_EL3, x3
 # CHECK-NEXT:  1      4     1.00                         4     V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02       MUL_ZPmZ_B                 mul	z0.b, p7/m, z0.b, z31.b
 # CHECK-NEXT:  2      5     2.00                         5     V1UnitV[2],V1UnitV0[2],V1UnitV01[2],V1UnitV02[2] MUL_ZPmZ_D           mul	z0.d, p7/m, z0.d, z31.d
 # CHECK-NEXT:  1      4     1.00                         4     V1UnitV,V1UnitV0,V1UnitV01,V1UnitV02       MUL_ZPmZ_H                 mul	z0.h, p7/m, z0.h, z31.h
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index c708ab8647734..f616dea02303b 100644
--- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -136,7 +136,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
     FOS.PadToColumn(Paddings[2]);
     if (IIVDEntry.RThroughput) {
       double RT = *IIVDEntry.RThroughput;
-      FOS << format("%.2f", RT);
+      FOS << format("%.2f", floor((RT * 100) + 0.5) / 100);
     } else {
       FOS << " -";
     }

Copy link
Contributor

@boomanaiden154 boomanaiden154 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hack and not a principled solution to the underlying problem.

I don't think we're using anything other than the basic arithmetic operators inside of MCA to compute the value, so it should be bitwise identical between platforms assuming they are IEEE754 compliant and implement correct rounding on basic arithmetic operators. It would be good to understand why this platform difference exists (different default rounding modes? format implementation differences?) before papering over the issue with more arithmetic.

@simonwallis2
Copy link
Contributor Author

This is a hack and not a principled solution to the underlying problem.

I don't think we're using anything other than the basic arithmetic operators inside of MCA to compute the value, so it should be bitwise identical between platforms assuming they are IEEE754 compliant and implement correct rounding on basic arithmetic operators. It would be good to understand why this platform difference exists (different default rounding modes? format implementation differences?) before papering over the issue with more arithmetic.

In llvm/tools/llvm-mca, there are 10 instances of
format("%.2f", …)

9 of these instances explicitly round up, using
floor((X * 100) + 0.5) / 100)

This RT output is the 10th instance. This PR brings llvm-mca/InstructionInfoView.cpp into line with the other 9.

Copy link
Contributor

@boomanaiden154 boomanaiden154 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM if this improves consistency.

Would still be nice if someone had a better idea of why this behavior is occurring.

Copy link
Contributor

@vvereschaka vvereschaka left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you @simonwallis2.

Copy link
Member

@mshockwave mshockwave left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM thanks

if (IIVDEntry.RThroughput) {
double RT = *IIVDEntry.RThroughput;
FOS << format("%.2f", RT);
FOS << format("%.2f", floor((RT * 100) + 0.5) / 100);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: there will be an implicit promotion but perhaps we could just spell out 100.0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: there will be an implicit promotion but perhaps we could just spell out 100.0?

I see what you mean, but in the spirit of "invisible mending", 100 matches the 30 or so other uses of implicitly promoted integer 100 in llvm-mca/Views.

@simonwallis2 simonwallis2 merged commit ae1e909 into llvm:main Sep 19, 2025
11 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 19, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/19309

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5c122c39ecf0 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5c122c39ecf0 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

@simonwallis2
Copy link
Contributor Author

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/19309

I took a look at the buildbot failure reported at
https://lab.llvm.org/buildbot/#/builders/138/builds/19309/steps/7/logs/FAIL__MLIR__async_mlir
But I don't see how it was related to the recent PR #159544

I note that builder 138 reported a similar failure twice in the previous 100 or so builds:
https://lab.llvm.org/buildbot/#/builders/138/builds/19224/steps/7/logs/FAIL__MLIR__async_mlir
https://lab.llvm.org/buildbot/#/builders/138/builds/19212/steps/7/logs/FAIL__MLIR__async_mlir

And I note that the latest build
https://lab.llvm.org/buildbot/#/builders/138/builds/19310 has just succeeded.
I suspect there is something inconsistent about the MLIR async test run by buildbot 138, and this is not a fault in PR159544.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants