81 changes: 32 additions & 49 deletions llvm/test/CodeGen/RISCV/shuffle-reverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV32-NEXT: vle8.v v12, (a0)
; RV32-NEXT: vmv1r.v v14, v9
; RV32-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV32-NEXT: vrgather.vv v10, v8, v12
; RV32-NEXT: vid.v v8
; RV32-NEXT: vrsub.vi v8, v8, 15
Expand All @@ -138,7 +137,6 @@ define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64-NEXT: vle8.v v12, (a0)
; RV64-NEXT: vmv1r.v v14, v9
; RV64-NEXT: # kill: def $v8 killed $v8 def $v8m2
; RV64-NEXT: vrgather.vv v10, v8, v12
; RV64-NEXT: vid.v v8
; RV64-NEXT: vrsub.vi v8, v8, 15
Expand Down Expand Up @@ -232,7 +230,6 @@ define <16 x i16> @v8i16_2(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: v8i16_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v9
; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vid.v v14
; CHECK-NEXT: vrsub.vi v16, v14, 15
Expand Down Expand Up @@ -267,19 +264,18 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0)
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vmv2r.v v20, v10
; RV32-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV32-NEXT: vrgather.vv v12, v8, v16
; RV32-NEXT: vid.v v8
; RV32-NEXT: vrsub.vi v8, v8, 15
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: vmv2r.v v16, v10
; RV32-NEXT: vmv2r.v v12, v8
; RV32-NEXT: vrgather.vv v8, v12, v20
; RV32-NEXT: vid.v v12
; RV32-NEXT: vrsub.vi v12, v12, 15
; RV32-NEXT: lui a0, 16
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; RV32-NEXT: vrgather.vv v12, v20, v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t
; RV32-NEXT: ret
;
; RV64-LABEL: v16i16_2:
Expand All @@ -288,19 +284,18 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0)
; RV64-NEXT: li a1, 32
; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; RV64-NEXT: vle16.v v16, (a0)
; RV64-NEXT: vmv2r.v v20, v10
; RV64-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV64-NEXT: vrgather.vv v12, v8, v16
; RV64-NEXT: vid.v v8
; RV64-NEXT: vrsub.vi v8, v8, 15
; RV64-NEXT: vle16.v v20, (a0)
; RV64-NEXT: vmv2r.v v16, v10
; RV64-NEXT: vmv2r.v v12, v8
; RV64-NEXT: vrgather.vv v8, v12, v20
; RV64-NEXT: vid.v v12
; RV64-NEXT: vrsub.vi v12, v12, 15
; RV64-NEXT: lui a0, 16
; RV64-NEXT: addiw a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v0, a0
; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; RV64-NEXT: vrgather.vv v12, v20, v8, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t
; RV64-NEXT: ret
%v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i16> %v32i16
Expand Down Expand Up @@ -354,7 +349,6 @@ define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: v4i32_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v9
; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vid.v v14
; CHECK-NEXT: vrsub.vi v16, v14, 7
Expand Down Expand Up @@ -386,7 +380,6 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: v8i32_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv2r.v v16, v10
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vid.v v20
; CHECK-NEXT: vrsub.vi v24, v20, 15
Expand Down Expand Up @@ -475,16 +468,15 @@ define <4 x i64> @v2i64_2(<2 x i64> %a, < 2 x i64> %b) {
; CHECK-LABEL: v2i64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v12, v8, 1
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v12, v8, 1
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 1
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v12, 2
; CHECK-NEXT: vmv2r.v v8, v10
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4i64 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i64> %v4i64
Expand Down Expand Up @@ -517,16 +509,15 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) {
; RV32-LABEL: v4i64_2:
; RV32: # %bb.0:
; RV32-NEXT: vmv2r.v v16, v10
; RV32-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vid.v v20
; RV32-NEXT: vrsub.vi v21, v20, 7
; RV32-NEXT: vid.v v18
; RV32-NEXT: vrsub.vi v19, v18, 7
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; RV32-NEXT: vrgatherei16.vv v12, v8, v21
; RV32-NEXT: vrgatherei16.vv v12, v8, v19
; RV32-NEXT: li a0, 15
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vrsub.vi v8, v20, 3
; RV32-NEXT: vrsub.vi v8, v18, 3
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
Expand All @@ -535,7 +526,6 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) {
; RV64-LABEL: v4i64_2:
; RV64: # %bb.0:
; RV64-NEXT: vmv2r.v v16, v10
; RV64-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vid.v v20
; RV64-NEXT: vrsub.vi v24, v20, 7
Expand Down Expand Up @@ -628,7 +618,6 @@ define <16 x half> @v8f16_2(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: v8f16_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v9
; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vid.v v14
; CHECK-NEXT: vrsub.vi v16, v14, 15
Expand Down Expand Up @@ -659,7 +648,6 @@ define <16 x half> @v16f16(<16 x half> %a) {
define <32 x half> @v16f16_2(<16 x half> %a) {
; CHECK-LABEL: v16f16_2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; CHECK-NEXT: lui a0, %hi(.LCPI35_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0)
; CHECK-NEXT: li a1, 32
Expand Down Expand Up @@ -723,7 +711,6 @@ define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: v4f32_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v9
; CHECK-NEXT: # kill: def $v8 killed $v8 def $v8m2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vid.v v14
; CHECK-NEXT: vrsub.vi v16, v14, 7
Expand Down Expand Up @@ -755,7 +742,6 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: v8f32_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv2r.v v16, v10
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vid.v v20
; CHECK-NEXT: vrsub.vi v24, v20, 15
Expand Down Expand Up @@ -787,16 +773,15 @@ define <4 x double> @v2f64_2(<2 x double> %a, < 2 x double> %b) {
; CHECK-LABEL: v2f64_2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v12, v8, 1
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v12, v8, 1
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v9, 1
; CHECK-NEXT: vslidedown.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v10, v12, 2
; CHECK-NEXT: vmv2r.v v8, v10
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: ret
%v4f64 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x double> %v4f64
Expand Down Expand Up @@ -829,16 +814,15 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) {
; RV32-LABEL: v4f64_2:
; RV32: # %bb.0:
; RV32-NEXT: vmv2r.v v16, v10
; RV32-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vid.v v20
; RV32-NEXT: vrsub.vi v21, v20, 7
; RV32-NEXT: vid.v v18
; RV32-NEXT: vrsub.vi v19, v18, 7
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; RV32-NEXT: vrgatherei16.vv v12, v8, v21
; RV32-NEXT: vrgatherei16.vv v12, v8, v19
; RV32-NEXT: li a0, 15
; RV32-NEXT: vmv.s.x v0, a0
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vrsub.vi v8, v20, 3
; RV32-NEXT: vrsub.vi v8, v18, 3
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
; RV32-NEXT: vmv.v.v v8, v12
Expand All @@ -847,7 +831,6 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) {
; RV64-LABEL: v4f64_2:
; RV64: # %bb.0:
; RV64-NEXT: vmv2r.v v16, v10
; RV64-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m4
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vid.v v20
; RV64-NEXT: vrsub.vi v24, v20, 7
Expand Down