Skip to content

Conversation

PeddleSpam
Copy link
Contributor

Fix bug in #140188 where incoming vectors are rotated in the wrong direction.

@llvmbot
Copy link
Member

llvmbot commented Sep 24, 2025

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Leon Clark (PeddleSpam)

Changes

Fix bug in #140188 where incoming vectors are rotated in the wrong direction.


Full diff: https://github.com/llvm/llvm-project/pull/160465.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+1-1)
  • (modified) llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll (+9-9)
  • (modified) llvm/test/Transforms/VectorCombine/X86/narrow-phi-of-shuffles.ll (+15-15)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0ef933f596604..cbdc621f1878b 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4433,7 +4433,7 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
 
   // Create new mask using difference of the two incoming masks.
   int MaskOffset = NewMask[0u];
-  unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
+  unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
   NewMask.clear();
 
   for (unsigned I = 0u; I < InputNumElements; ++I) {
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
index 8c504843d87d8..b293976974bf5 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
@@ -392,7 +392,7 @@ define <4 x i32> @shuffle_v4i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -427,7 +427,7 @@ define <8 x i32> @shuffle_v8i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -462,7 +462,7 @@ define <16 x i32> @shuffle_v16i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -497,7 +497,7 @@ define <32 x i32> @shuffle_v32i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -1092,7 +1092,7 @@ define <4 x float> @shuffle_v4f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -1127,7 +1127,7 @@ define <6 x float> @shuffle_v6f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -1162,7 +1162,7 @@ define <8 x float> @shuffle_v8f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -1197,7 +1197,7 @@ define <16 x float> @shuffle_v16f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -1232,7 +1232,7 @@ define <32 x float> @shuffle_v32f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
diff --git a/llvm/test/Transforms/VectorCombine/X86/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/narrow-phi-of-shuffles.ll
index 59422e98cbcc6..594017ecf84c3 100644
--- a/llvm/test/Transforms/VectorCombine/X86/narrow-phi-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/narrow-phi-of-shuffles.ll
@@ -605,7 +605,7 @@ define <4 x bfloat> @shuffle_v4bf16(<3 x bfloat> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -640,7 +640,7 @@ define <6 x bfloat> @shuffle_v6bf16(<3 x bfloat> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -675,7 +675,7 @@ define <8 x bfloat> @shuffle_v8bf16(<3 x bfloat> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -710,7 +710,7 @@ define <16 x bfloat> @shuffle_v16bf16(<3 x bfloat> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -745,7 +745,7 @@ define <32 x bfloat> @shuffle_v32bf16(<3 x bfloat> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
@@ -850,7 +850,7 @@ define <4 x half> @shuffle_v4f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V1-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V1-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V1:       [[THEN]]:
-; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V1-NEXT:    tail call void @func0()
 ; CHECK-V1-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V1:       [[ELSE]]:
@@ -866,7 +866,7 @@ define <4 x half> @shuffle_v4f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V2-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V2-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V2:       [[THEN]]:
-; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V2-NEXT:    tail call void @func0()
 ; CHECK-V2-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V2:       [[ELSE]]:
@@ -933,7 +933,7 @@ define <6 x half> @shuffle_v6f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V1-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V1-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V1:       [[THEN]]:
-; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V1-NEXT:    tail call void @func0()
 ; CHECK-V1-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V1:       [[ELSE]]:
@@ -949,7 +949,7 @@ define <6 x half> @shuffle_v6f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V2-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V2-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V2:       [[THEN]]:
-; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V2-NEXT:    tail call void @func0()
 ; CHECK-V2-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V2:       [[ELSE]]:
@@ -1016,7 +1016,7 @@ define <8 x half> @shuffle_v8f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V1-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V1-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V1:       [[THEN]]:
-; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V1-NEXT:    tail call void @func0()
 ; CHECK-V1-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V1:       [[ELSE]]:
@@ -1032,7 +1032,7 @@ define <8 x half> @shuffle_v8f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V2-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V2-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V2:       [[THEN]]:
-; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V2-NEXT:    tail call void @func0()
 ; CHECK-V2-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V2:       [[ELSE]]:
@@ -1099,7 +1099,7 @@ define <16 x half> @shuffle_v16f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V1-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V1-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V1:       [[THEN]]:
-; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V1-NEXT:    tail call void @func0()
 ; CHECK-V1-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V1:       [[ELSE]]:
@@ -1115,7 +1115,7 @@ define <16 x half> @shuffle_v16f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V2-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V2-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V2:       [[THEN]]:
-; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V2-NEXT:    tail call void @func0()
 ; CHECK-V2-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V2:       [[ELSE]]:
@@ -1182,7 +1182,7 @@ define <32 x half> @shuffle_v32f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V1-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V1-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V1:       [[THEN]]:
-; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V1-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V1-NEXT:    tail call void @func0()
 ; CHECK-V1-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V1:       [[ELSE]]:
@@ -1198,7 +1198,7 @@ define <32 x half> @shuffle_v32f16(<3 x half> %arg0, i1 %cond) {
 ; CHECK-V2-NEXT:  [[ENTRY:.*:]]
 ; CHECK-V2-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK-V2:       [[THEN]]:
-; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 2, i32 0>
+; CHECK-V2-NEXT:    [[TMP0:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 0, i32 1>
 ; CHECK-V2-NEXT:    tail call void @func0()
 ; CHECK-V2-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK-V2:       [[ELSE]]:

@PeddleSpam PeddleSpam requested a review from nikic September 24, 2025 08:29
@RKSimon
Copy link
Collaborator

RKSimon commented Sep 24, 2025

@PeddleSpam Please can you raise an issue as we'll need to backport this

@nikic
Copy link
Contributor

nikic commented Sep 24, 2025

@PeddleSpam Please can you raise an issue as we'll need to backport this

It's possible to backport directly from the PR, and issue isn't required for that.

@RKSimon RKSimon added this to the LLVM 21.x Release milestone Sep 29, 2025
@github-project-automation github-project-automation bot moved this to Needs Triage in LLVM Release Status Sep 29, 2025
@PeddleSpam PeddleSpam merged commit 8df643f into llvm:main Sep 29, 2025
14 checks passed
@github-project-automation github-project-automation bot moved this from Needs Triage to Done in LLVM Release Status Sep 29, 2025
@nikic
Copy link
Contributor

nikic commented Sep 29, 2025

/cherry-pick 8df643f

@llvmbot
Copy link
Member

llvmbot commented Sep 29, 2025

Failed to cherry-pick: 8df643f

https://github.com/llvm/llvm-project/actions/runs/18096921518

Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request

@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 29, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-amdgpu-runtime-2 running on rocm-worker-hw-02 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/14416

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libarcher :: races/parallel-simple.c' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 13
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp  -gdwarf-4 -O1 -fsanitize=thread  -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src   /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp -latomic && env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1' /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp 2>&1 | tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp.log | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -gdwarf-4 -O1 -fsanitize=thread -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp -latomic
# note: command had no output on stdout or stderr
# executed command: env TSAN_OPTIONS=ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1 /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp
# note: command had no output on stdout or stderr
# executed command: tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp.log
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c
# note: command had no output on stdout or stderr
# RUN: at line 14
/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp  -gdwarf-4 -O1 -fsanitize=thread  -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src   /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp -latomic && env ARCHER_OPTIONS="ignore_serial=1 report_data_leak=1" env TSAN_OPTIONS='ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1' /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp 2>&1 | tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp.log | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/clang -fopenmp -gdwarf-4 -O1 -fsanitize=thread -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests -I /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -L /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -Wl,-rpath,/home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/runtime/src /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c -o /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp -latomic
# note: command had no output on stdout or stderr
# executed command: env 'ARCHER_OPTIONS=ignore_serial=1 report_data_leak=1' env TSAN_OPTIONS=ignore_noninstrumented_modules=0:ignore_noninstrumented_modules=1 /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/deflake.bash /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp
# note: command had no output on stdout or stderr
# executed command: tee /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/runtimes/runtimes-bins/openmp/tools/archer/tests/races/Output/parallel-simple.c.tmp.log
# note: command had no output on stdout or stderr
# executed command: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.build/./bin/FileCheck /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c
# .---command stderr------------
# | /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c:36:11: error: CHECK: expected string not found in input
# | // CHECK: ThreadSanitizer: reported {{[1-7]}} warnings
# |           ^
# | <stdin>:23:5: note: scanning from here
# | DONE
# |     ^
# | <stdin>:24:1: note: possible intended match here
# | ThreadSanitizer: thread T4 finished with ignores enabled, created at:
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             .
# |             .
# |             .
# |            18:  #0 pthread_create /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1075:3 (parallel-simple.c.tmp+0xa3eca) 
# |            19:  #1 __kmp_create_worker z_Linux_util.cpp (libomp.so+0xcbc22) 
# |            20:  
# |            21: SUMMARY: ThreadSanitizer: data race /home/botworker/builds/openmp-offload-amdgpu-runtime-2/llvm.src/openmp/tools/archer/tests/races/parallel-simple.c:23:8 in main.omp_outlined_debug__ 
# |            22: ================== 
...

@RKSimon
Copy link
Collaborator

RKSimon commented Sep 29, 2025

@PeddleSpam please can you create a branch off 21.x and cherry pick the fix (or a revert if necessary).

@PeddleSpam
Copy link
Contributor Author

@PeddleSpam please can you create a branch off 21.x and cherry pick the fix (or a revert if necessary).

The phi narrowing PR isn't in 21.x so it doesn't need the fix.

@RKSimon
Copy link
Collaborator

RKSimon commented Sep 30, 2025

Sorry about that - I saw the PR was May 2025 and assumed the worse! My brain is entirely frazzled at this point from the house build :(

@PeddleSpam
Copy link
Contributor Author

Sorry about that - I saw the PR was May 2025 and assumed the worse! My brain is entirely frazzled at this point from the house build :(

No worries. Better to check just in case 😊

RiverDave pushed a commit that referenced this pull request Oct 1, 2025
Fix bug in #140188 where incoming vectors are rotated in the wrong
direction.

Co-authored-by: Leon Clark <leoclark@amd.com>
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
Fix bug in llvm#140188 where incoming vectors are rotated in the wrong
direction.

Co-authored-by: Leon Clark <leoclark@amd.com>
@llvm-ci
Copy link
Collaborator

llvm-ci commented Oct 5, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-flang-rhel-clang running on ppc64le-flang-rhel-test while building llvm at step 6 "test-build-unified-tree-check-flang".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/40716

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-flang) failure: 1200 seconds without output running [b'ninja', b'check-flang'], attempting to kill
...
PASS: Flang :: Semantics/synchronization01b.f90 (3837 of 3847)
PASS: Flang :: Semantics/synchronization03a.f90 (3838 of 3847)
PASS: Flang :: Semantics/structconst05.f90 (3839 of 3847)
PASS: Flang :: Semantics/modfile71.F90 (3840 of 3847)
PASS: Flang :: Transforms/debug-dwarf-version.fir (3841 of 3847)
PASS: Flang :: Transforms/debug-line-table.fir (3842 of 3847)
PASS: Flang :: Transforms/debug-omp-target-op-1.fir (3843 of 3847)
PASS: Flang :: Semantics/mod-file-rewriter.f90 (3844 of 3847)
PASS: Flang :: Driver/omp-driver-offload.f90 (3845 of 3847)
PASS: Flang :: Intrinsics/math-codegen.fir (3846 of 3847)
command timed out: 1200 seconds without output running [b'ninja', b'check-flang'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=2179.619939

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants