diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 596a3a48b1004b..01bc936060b7f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7494,13 +7494,17 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } // fold (rot x, c) -> (rot x, c % BitSize) - // TODO - support non-uniform vector amounts. - if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { - if (Cst->getAPIntValue().uge(Bitsize)) { - uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); - return DAG.getNode(N->getOpcode(), dl, VT, N0, - DAG.getConstant(RotAmt, dl, N1.getValueType())); - } + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) { + OutOfRange |= C->getAPIntValue().uge(Bitsize); + return true; + }; + if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) { + EVT AmtVT = N1.getValueType(); + SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT); + if (SDValue Amt = + DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits})) + return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt); } // Simplify the operands using demanded-bits information. diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index ba4279f8d0c899..f300eee47d66d5 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -1272,7 +1272,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; XOPAVX1-LABEL: constant_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -1281,7 +1281,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; XOPAVX2-LABEL: constant_funnnel_v32i8: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 7d6a5eef9ebcc0..bc898ad1196043 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -1350,7 +1350,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; XOPAVX1-LABEL: constant_funnnel_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,254,253,252,251,250,249,248,249,250,251,252,253,254,255] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -1359,7 +1359,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; XOPAVX2-LABEL: constant_funnnel_v32i8: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,254,253,252,251,250,249,248,249,250,251,252,253,254,255] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 33f14d7e2b6e3d..c0e59451b973dc 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -1265,7 +1265,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; XOPAVX1-LABEL: constant_rotate_v32i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] ; XOPAVX1-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -1274,7 +1274,7 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; XOPAVX2-LABEL: constant_rotate_v32i8: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1] +; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1] ; XOPAVX2-NEXT: vprotb %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vprotb %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0