48 changes: 26 additions & 22 deletions llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ define <vscale x 1 x i8> @insertelt_nxv1i8_imm(<vscale x 1 x i8> %v, i8 signext
ret <vscale x 1 x i8> %r
}

define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 1 x i8> @insertelt_nxv1i8_idx(<vscale x 1 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
Expand Down Expand Up @@ -59,7 +59,7 @@ define <vscale x 2 x i8> @insertelt_nxv2i8_imm(<vscale x 2 x i8> %v, i8 signext
ret <vscale x 2 x i8> %r
}

define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 2 x i8> @insertelt_nxv2i8_idx(<vscale x 2 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
Expand Down Expand Up @@ -94,7 +94,7 @@ define <vscale x 4 x i8> @insertelt_nxv4i8_imm(<vscale x 4 x i8> %v, i8 signext
ret <vscale x 4 x i8> %r
}

define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 4 x i8> @insertelt_nxv4i8_idx(<vscale x 4 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
Expand Down Expand Up @@ -129,7 +129,7 @@ define <vscale x 8 x i8> @insertelt_nxv8i8_imm(<vscale x 8 x i8> %v, i8 signext
ret <vscale x 8 x i8> %r
}

define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 8 x i8> @insertelt_nxv8i8_idx(<vscale x 8 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
Expand Down Expand Up @@ -164,7 +164,7 @@ define <vscale x 16 x i8> @insertelt_nxv16i8_imm(<vscale x 16 x i8> %v, i8 signe
ret <vscale x 16 x i8> %r
}

define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 16 x i8> @insertelt_nxv16i8_idx(<vscale x 16 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
Expand Down Expand Up @@ -199,7 +199,7 @@ define <vscale x 32 x i8> @insertelt_nxv32i8_imm(<vscale x 32 x i8> %v, i8 signe
ret <vscale x 32 x i8> %r
}

define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 32 x i8> @insertelt_nxv32i8_idx(<vscale x 32 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
Expand Down Expand Up @@ -234,7 +234,7 @@ define <vscale x 64 x i8> @insertelt_nxv64i8_imm(<vscale x 64 x i8> %v, i8 signe
ret <vscale x 64 x i8> %r
}

define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 signext %idx) {
define <vscale x 64 x i8> @insertelt_nxv64i8_idx(<vscale x 64 x i8> %v, i8 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv64i8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
Expand Down Expand Up @@ -269,7 +269,7 @@ define <vscale x 1 x i16> @insertelt_nxv1i16_imm(<vscale x 1 x i16> %v, i16 sign
ret <vscale x 1 x i16> %r
}

define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 1 x i16> @insertelt_nxv1i16_idx(<vscale x 1 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
Expand Down Expand Up @@ -304,7 +304,7 @@ define <vscale x 2 x i16> @insertelt_nxv2i16_imm(<vscale x 2 x i16> %v, i16 sign
ret <vscale x 2 x i16> %r
}

define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 2 x i16> @insertelt_nxv2i16_idx(<vscale x 2 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
Expand Down Expand Up @@ -339,7 +339,7 @@ define <vscale x 4 x i16> @insertelt_nxv4i16_imm(<vscale x 4 x i16> %v, i16 sign
ret <vscale x 4 x i16> %r
}

define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 4 x i16> @insertelt_nxv4i16_idx(<vscale x 4 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
Expand Down Expand Up @@ -374,7 +374,7 @@ define <vscale x 8 x i16> @insertelt_nxv8i16_imm(<vscale x 8 x i16> %v, i16 sign
ret <vscale x 8 x i16> %r
}

define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 8 x i16> @insertelt_nxv8i16_idx(<vscale x 8 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
Expand Down Expand Up @@ -409,7 +409,7 @@ define <vscale x 16 x i16> @insertelt_nxv16i16_imm(<vscale x 16 x i16> %v, i16 s
ret <vscale x 16 x i16> %r
}

define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 16 x i16> @insertelt_nxv16i16_idx(<vscale x 16 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
Expand Down Expand Up @@ -444,7 +444,7 @@ define <vscale x 32 x i16> @insertelt_nxv32i16_imm(<vscale x 32 x i16> %v, i16 s
ret <vscale x 32 x i16> %r
}

define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 signext %idx) {
define <vscale x 32 x i16> @insertelt_nxv32i16_idx(<vscale x 32 x i16> %v, i16 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv32i16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
Expand Down Expand Up @@ -479,7 +479,7 @@ define <vscale x 1 x i32> @insertelt_nxv1i32_imm(<vscale x 1 x i32> %v, i32 sign
ret <vscale x 1 x i32> %r
}

define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 signext %idx) {
define <vscale x 1 x i32> @insertelt_nxv1i32_idx(<vscale x 1 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv1i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
Expand Down Expand Up @@ -514,7 +514,7 @@ define <vscale x 2 x i32> @insertelt_nxv2i32_imm(<vscale x 2 x i32> %v, i32 sign
ret <vscale x 2 x i32> %r
}

define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 signext %idx) {
define <vscale x 2 x i32> @insertelt_nxv2i32_idx(<vscale x 2 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv2i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
Expand Down Expand Up @@ -549,7 +549,7 @@ define <vscale x 4 x i32> @insertelt_nxv4i32_imm(<vscale x 4 x i32> %v, i32 sign
ret <vscale x 4 x i32> %r
}

define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 signext %idx) {
define <vscale x 4 x i32> @insertelt_nxv4i32_idx(<vscale x 4 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv4i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
Expand Down Expand Up @@ -584,7 +584,7 @@ define <vscale x 8 x i32> @insertelt_nxv8i32_imm(<vscale x 8 x i32> %v, i32 sign
ret <vscale x 8 x i32> %r
}

define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 signext %idx) {
define <vscale x 8 x i32> @insertelt_nxv8i32_idx(<vscale x 8 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv8i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma
Expand Down Expand Up @@ -619,7 +619,7 @@ define <vscale x 16 x i32> @insertelt_nxv16i32_imm(<vscale x 16 x i32> %v, i32 s
ret <vscale x 16 x i32> %r
}

define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 signext %idx) {
define <vscale x 16 x i32> @insertelt_nxv16i32_idx(<vscale x 16 x i32> %v, i32 signext %elt, i32 zeroext %idx) {
; CHECK-LABEL: insertelt_nxv16i32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
Expand Down Expand Up @@ -659,7 +659,8 @@ define <vscale x 1 x i64> @insertelt_nxv1i64_idx(<vscale x 1 x i64> %v, i64 %elt
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: sext.w a0, a1
; CHECK-NEXT: slli a0, a1, 32
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
Expand Down Expand Up @@ -695,7 +696,8 @@ define <vscale x 2 x i64> @insertelt_nxv2i64_idx(<vscale x 2 x i64> %v, i64 %elt
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: sext.w a0, a1
; CHECK-NEXT: slli a0, a1, 32
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
Expand Down Expand Up @@ -731,7 +733,8 @@ define <vscale x 4 x i64> @insertelt_nxv4i64_idx(<vscale x 4 x i64> %v, i64 %elt
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: sext.w a0, a1
; CHECK-NEXT: slli a0, a1, 32
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
Expand Down Expand Up @@ -767,7 +770,8 @@ define <vscale x 8 x i64> @insertelt_nxv8i64_idx(<vscale x 8 x i64> %v, i64 %elt
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: sext.w a0, a1
; CHECK-NEXT: slli a0, a1, 32
; CHECK-NEXT: srli a0, a0, 32
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
Expand Down
14 changes: 12 additions & 2 deletions llvm/test/CodeGen/VE/Vector/extract_elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) {
; CHECK-LABEL: extract_rr_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i64> %v, i32 %idx
Expand Down Expand Up @@ -45,6 +46,7 @@ define fastcc i64 @extract_ri_v512i64(<512 x i64> %v) {
define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) {
; CHECK-LABEL: extract_rr_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x i32> %v, i32 %idx
Expand Down Expand Up @@ -84,7 +86,10 @@ define fastcc i32 @extract_ri_v512i32(<512 x i32> %v) {
define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512i32:
; CHECK: # %bb.0:
; CHECK-NEXT: srl %s1, %s0, 1
; CHECK-NEXT: lea %s1, -2
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: and %s1, %s0, %s1
; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
Expand All @@ -100,6 +105,7 @@ define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) {
; CHECK-LABEL: extract_rr_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x double> %v, i32 %idx
Expand Down Expand Up @@ -139,6 +145,7 @@ define fastcc double @extract_ri_v512f64(<512 x double> %v) {
define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) {
; CHECK-LABEL: extract_rr_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lvs %s0, %v0(%s0)
; CHECK-NEXT: b.l.t (, %s10)
%ret = extractelement <256 x float> %v, i32 %idx
Expand Down Expand Up @@ -179,7 +186,10 @@ define fastcc float @extract_ri_v512f32(<512 x float> %v) {
define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) {
; CHECK-LABEL: extract_rr_v512f32:
; CHECK: # %bb.0:
; CHECK-NEXT: srl %s1, %s0, 1
; CHECK-NEXT: lea %s1, -2
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: and %s1, %s0, %s1
; CHECK-NEXT: srl %s1, %s1, 1
; CHECK-NEXT: lvs %s1, %v0(%s1)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
Expand Down
12 changes: 11 additions & 1 deletion llvm/test/CodeGen/VE/Vector/insert_elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
; CHECK-LABEL: insert_rr_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i64> undef, i64 %s, i32 %idx
Expand Down Expand Up @@ -46,6 +47,7 @@ define fastcc <256 x i32> @insert_rr_v256i32(i32 signext %idx, i32 signext %s) {
; CHECK-LABEL: insert_rr_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i32> undef, i32 %s, i32 %idx
Expand Down Expand Up @@ -94,6 +96,9 @@ define fastcc <512 x i32> @insert_rr_v512i32(i32 signext %idx, i32 signext %s) {
; CHECK-NEXT: nnd %s2, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s2, %s2, 5
; CHECK-NEXT: sll %s1, %s1, %s2
; CHECK-NEXT: lea %s3, -2
; CHECK-NEXT: and %s3, %s3, (32)0
; CHECK-NEXT: and %s0, %s0, %s3
; CHECK-NEXT: srl %s0, %s0, 1
; CHECK-NEXT: lvs %s3, %v0(%s0)
; CHECK-NEXT: srl %s2, (32)1, %s2
Expand All @@ -110,6 +115,7 @@ define fastcc <512 x i32> @insert_rr_v512i32(i32 signext %idx, i32 signext %s) {
define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
; CHECK-LABEL: insert_rr_v256f64:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x double> undef, double %s, i32 %idx
Expand Down Expand Up @@ -149,6 +155,7 @@ define fastcc <512 x double> @insert_ri_v512f64(double %s) {
define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
; CHECK-LABEL: insert_rr_v256f32:
; CHECK: # %bb.0:
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lsv %v0(%s0), %s1
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x float> undef, float %s, i32 %idx
Expand Down Expand Up @@ -193,7 +200,10 @@ define fastcc <512 x float> @insert_rr_v512f32(i32 signext %idx, float %s) {
; CHECK-LABEL: insert_rr_v512f32:
; CHECK: # %bb.0:
; CHECK-NEXT: sra.l %s1, %s1, 32
; CHECK-NEXT: srl %s2, %s0, 1
; CHECK-NEXT: lea %s2, -2
; CHECK-NEXT: and %s2, %s2, (32)0
; CHECK-NEXT: and %s2, %s0, %s2
; CHECK-NEXT: srl %s2, %s2, 1
; CHECK-NEXT: lvs %s3, %v0(%s2)
; CHECK-NEXT: nnd %s0, %s0, (63)0
; CHECK-NEXT: sla.w.sx %s0, %s0, 5
Expand Down
143 changes: 131 additions & 12 deletions llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,17 @@ define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
; CHECK-LABEL: swizzle_one_i8x16:
; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
; CHECK-NEXT: return $pop0
; CHECK-NEXT: global.get $push5=, __stack_pointer
; CHECK-NEXT: i32.const $push6=, 16
; CHECK-NEXT: i32.sub $push8=, $pop5, $pop6
; CHECK-NEXT: local.tee $push7=, $2=, $pop8
; CHECK-NEXT: v128.store 0($pop7), $0
; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i32.and $push2=, $pop0, $pop1
; CHECK-NEXT: i32.or $push3=, $2, $pop2
; CHECK-NEXT: v128.load8_splat $push4=, 0($pop3)
; CHECK-NEXT: return $pop4
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
Expand All @@ -109,8 +118,107 @@ define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
; CHECK-LABEL: swizzle_all_i8x16:
; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
; CHECK-NEXT: return $pop0
; CHECK-NEXT: global.get $push80=, __stack_pointer
; CHECK-NEXT: i32.const $push81=, 16
; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81
; CHECK-NEXT: local.tee $push97=, $2=, $pop98
; CHECK-NEXT: v128.store 0($pop97), $0
; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i32.and $push62=, $pop61, $pop1
; CHECK-NEXT: i32.or $push63=, $2, $pop62
; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63)
; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1
; CHECK-NEXT: i32.const $push96=, 15
; CHECK-NEXT: i32.and $push58=, $pop57, $pop96
; CHECK-NEXT: i32.or $push59=, $2, $pop58
; CHECK-NEXT: i32.load8_u $push60=, 0($pop59)
; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60
; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2
; CHECK-NEXT: i32.const $push95=, 15
; CHECK-NEXT: i32.and $push54=, $pop53, $pop95
; CHECK-NEXT: i32.or $push55=, $2, $pop54
; CHECK-NEXT: i32.load8_u $push56=, 0($pop55)
; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56
; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3
; CHECK-NEXT: i32.const $push94=, 15
; CHECK-NEXT: i32.and $push50=, $pop49, $pop94
; CHECK-NEXT: i32.or $push51=, $2, $pop50
; CHECK-NEXT: i32.load8_u $push52=, 0($pop51)
; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52
; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4
; CHECK-NEXT: i32.const $push93=, 15
; CHECK-NEXT: i32.and $push46=, $pop45, $pop93
; CHECK-NEXT: i32.or $push47=, $2, $pop46
; CHECK-NEXT: i32.load8_u $push48=, 0($pop47)
; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48
; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5
; CHECK-NEXT: i32.const $push92=, 15
; CHECK-NEXT: i32.and $push42=, $pop41, $pop92
; CHECK-NEXT: i32.or $push43=, $2, $pop42
; CHECK-NEXT: i32.load8_u $push44=, 0($pop43)
; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44
; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6
; CHECK-NEXT: i32.const $push91=, 15
; CHECK-NEXT: i32.and $push38=, $pop37, $pop91
; CHECK-NEXT: i32.or $push39=, $2, $pop38
; CHECK-NEXT: i32.load8_u $push40=, 0($pop39)
; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40
; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7
; CHECK-NEXT: i32.const $push90=, 15
; CHECK-NEXT: i32.and $push34=, $pop33, $pop90
; CHECK-NEXT: i32.or $push35=, $2, $pop34
; CHECK-NEXT: i32.load8_u $push36=, 0($pop35)
; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36
; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8
; CHECK-NEXT: i32.const $push89=, 15
; CHECK-NEXT: i32.and $push30=, $pop29, $pop89
; CHECK-NEXT: i32.or $push31=, $2, $pop30
; CHECK-NEXT: i32.load8_u $push32=, 0($pop31)
; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32
; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9
; CHECK-NEXT: i32.const $push88=, 15
; CHECK-NEXT: i32.and $push26=, $pop25, $pop88
; CHECK-NEXT: i32.or $push27=, $2, $pop26
; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28
; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10
; CHECK-NEXT: i32.const $push87=, 15
; CHECK-NEXT: i32.and $push22=, $pop21, $pop87
; CHECK-NEXT: i32.or $push23=, $2, $pop22
; CHECK-NEXT: i32.load8_u $push24=, 0($pop23)
; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24
; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11
; CHECK-NEXT: i32.const $push86=, 15
; CHECK-NEXT: i32.and $push18=, $pop17, $pop86
; CHECK-NEXT: i32.or $push19=, $2, $pop18
; CHECK-NEXT: i32.load8_u $push20=, 0($pop19)
; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20
; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12
; CHECK-NEXT: i32.const $push85=, 15
; CHECK-NEXT: i32.and $push14=, $pop13, $pop85
; CHECK-NEXT: i32.or $push15=, $2, $pop14
; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16
; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13
; CHECK-NEXT: i32.const $push84=, 15
; CHECK-NEXT: i32.and $push10=, $pop9, $pop84
; CHECK-NEXT: i32.or $push11=, $2, $pop10
; CHECK-NEXT: i32.load8_u $push12=, 0($pop11)
; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12
; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14
; CHECK-NEXT: i32.const $push83=, 15
; CHECK-NEXT: i32.and $push6=, $pop5, $pop83
; CHECK-NEXT: i32.or $push7=, $2, $pop6
; CHECK-NEXT: i32.load8_u $push8=, 0($pop7)
; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8
; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15
; CHECK-NEXT: i32.const $push82=, 15
; CHECK-NEXT: i32.and $push2=, $pop0, $pop82
; CHECK-NEXT: i32.or $push3=, $2, $pop2
; CHECK-NEXT: i32.load8_u $push4=, 0($pop3)
; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4
; CHECK-NEXT: return $pop79
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
Expand Down Expand Up @@ -210,14 +318,25 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla
; CHECK-LABEL: mashup_swizzle_i8x16:
; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1
; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2
; CHECK-NEXT: i32.const $push2=, 42
; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2
; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2
; CHECK-NEXT: i32.const $push6=, 42
; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6
; CHECK-NEXT: return $pop5
; CHECK-NEXT: global.get $push12=, __stack_pointer
; CHECK-NEXT: i32.const $push13=, 16
; CHECK-NEXT: i32.sub $push16=, $pop12, $pop13
; CHECK-NEXT: local.tee $push15=, $3=, $pop16
; CHECK-NEXT: v128.store 0($pop15), $0
; CHECK-NEXT: i8x16.extract_lane_u $push7=, $1, 7
; CHECK-NEXT: i32.const $push1=, 15
; CHECK-NEXT: i32.and $push8=, $pop7, $pop1
; CHECK-NEXT: i32.or $push9=, $3, $pop8
; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0
; CHECK-NEXT: i32.const $push14=, 15
; CHECK-NEXT: i32.and $push2=, $pop0, $pop14
; CHECK-NEXT: i32.or $push3=, $3, $pop2
; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0
; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2
; CHECK-NEXT: v128.load8_lane $push10=, 0($pop9), $pop6, 7
; CHECK-NEXT: i8x16.replace_lane $push11=, $pop10, 12, $2
; CHECK-NEXT: return $pop11
%m0 = extractelement <16 x i8> %mask, i32 0
%s0 = extractelement <16 x i8> %src, i8 %m0
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/extract-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) {
; CHECK-LABEL: extractelt_undef_insertelt:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ret{{[l|q]}}
%b = insertelement <4 x i32> zeroinitializer, i32 %x, i64 3
%c = icmp uge i32 %y, %y
Expand Down
102 changes: 70 additions & 32 deletions llvm/test/CodeGen/X86/insertelement-var-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,7 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
;
; AVX512-LABEL: arg_i64_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1}
Expand Down Expand Up @@ -1101,7 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm2
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
; SSE41-NEXT: movslq %edi, %rax
; SSE41-NEXT: movl %edi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
Expand All @@ -1112,7 +1112,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; AVX1-LABEL: arg_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: movslq %edi, %rax
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
Expand All @@ -1122,7 +1122,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; AVX2-LABEL: arg_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX2-NEXT: movslq %edi, %rax
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
Expand All @@ -1131,7 +1131,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
;
; AVX512-LABEL: arg_f64_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %edi, %rax
; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
Expand Down Expand Up @@ -1346,7 +1346,7 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
;
; AVX512-LABEL: load_i64_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1}
Expand Down Expand Up @@ -1458,7 +1458,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm1
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
; SSE41-NEXT: movslq %esi, %rax
; SSE41-NEXT: movl %esi, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
Expand All @@ -1469,7 +1469,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; AVX1-LABEL: load_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; AVX1-NEXT: movslq %esi, %rax
; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
Expand All @@ -1479,7 +1479,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; AVX2-LABEL: load_f64_v2f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; AVX2-NEXT: movslq %esi, %rax
; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
Expand All @@ -1488,7 +1488,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
;
; AVX512-LABEL: load_f64_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %xmm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
Expand Down Expand Up @@ -1733,7 +1733,7 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
;
; AVX512-LABEL: arg_i64_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1}
Expand Down Expand Up @@ -1834,7 +1834,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: movslq %edi, %rax
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vmovq %rax, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
Expand All @@ -1846,7 +1846,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind
; AVX2-LABEL: arg_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
; AVX2-NEXT: movslq %edi, %rax
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
Expand All @@ -1855,7 +1855,7 @@ define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind
;
; AVX512-LABEL: arg_f64_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %edi, %rax
; AVX512-NEXT: movl %edi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm2
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1
; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
Expand Down Expand Up @@ -2114,7 +2114,7 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
;
; AVX512-LABEL: load_i64_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1}
Expand Down Expand Up @@ -2218,7 +2218,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind {
;
; AVX1-LABEL: load_f64_v4f64:
; AVX1: # %bb.0:
; AVX1-NEXT: movslq %esi, %rax
; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
Expand All @@ -2231,7 +2231,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind {
; AVX2-LABEL: load_f64_v4f64:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
; AVX2-NEXT: movslq %esi, %rax
; AVX2-NEXT: movl %esi, %eax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
Expand All @@ -2240,7 +2240,7 @@ define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind {
;
; AVX512-LABEL: load_f64_v4f64:
; AVX512: # %bb.0:
; AVX512-NEXT: movslq %esi, %rax
; AVX512-NEXT: movl %esi, %eax
; AVX512-NEXT: vpbroadcastq %rax, %ymm1
; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1
; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
Expand Down Expand Up @@ -2273,6 +2273,15 @@ define i32 @PR44139(ptr %p) {
; SSE-LABEL: PR44139:
; SSE: # %bb.0:
; SSE-NEXT: movl (%rdi), %eax
; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1]
; SSE-NEXT: movdqa %xmm0, 96(%rdi)
; SSE-NEXT: movdqa %xmm0, 112(%rdi)
; SSE-NEXT: movdqa %xmm0, 64(%rdi)
; SSE-NEXT: movdqa %xmm0, 80(%rdi)
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
; SSE-NEXT: movdqa %xmm0, (%rdi)
; SSE-NEXT: movdqa %xmm0, 16(%rdi)
; SSE-NEXT: leal 2147483647(%rax), %ecx
; SSE-NEXT: testl %eax, %eax
; SSE-NEXT: cmovnsl %eax, %ecx
Expand All @@ -2283,30 +2292,59 @@ define i32 @PR44139(ptr %p) {
; SSE-NEXT: divl %ecx
; SSE-NEXT: retq
;
; AVX-LABEL: PR44139:
; AVX: # %bb.0:
; AVX-NEXT: movl (%rdi), %eax
; AVX-NEXT: leal 2147483647(%rax), %ecx
; AVX-NEXT: testl %eax, %eax
; AVX-NEXT: cmovnsl %eax, %ecx
; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; AVX-NEXT: addl %eax, %ecx
; AVX-NEXT: # kill: def $eax killed $eax killed $rax
; AVX-NEXT: xorl %edx, %edx
; AVX-NEXT: divl %ecx
; AVX-NEXT: retq
; AVX1OR2-LABEL: PR44139:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX1OR2-NEXT: movl (%rdi), %eax
; AVX1OR2-NEXT: vmovaps %ymm0, 64(%rdi)
; AVX1OR2-NEXT: vmovaps %ymm0, 96(%rdi)
; AVX1OR2-NEXT: vmovaps %ymm0, (%rdi)
; AVX1OR2-NEXT: vmovaps %ymm0, 32(%rdi)
; AVX1OR2-NEXT: leal 2147483647(%rax), %ecx
; AVX1OR2-NEXT: testl %eax, %eax
; AVX1OR2-NEXT: cmovnsl %eax, %ecx
; AVX1OR2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; AVX1OR2-NEXT: addl %eax, %ecx
; AVX1OR2-NEXT: # kill: def $eax killed $eax killed $rax
; AVX1OR2-NEXT: xorl %edx, %edx
; AVX1OR2-NEXT: divl %ecx
; AVX1OR2-NEXT: vzeroupper
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: PR44139:
; AVX512: # %bb.0:
; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0
; AVX512-NEXT: movl (%rdi), %eax
; AVX512-NEXT: vmovaps %zmm0, (%rdi)
; AVX512-NEXT: vmovaps %zmm0, 64(%rdi)
; AVX512-NEXT: leal 2147483647(%rax), %ecx
; AVX512-NEXT: testl %eax, %eax
; AVX512-NEXT: cmovnsl %eax, %ecx
; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; AVX512-NEXT: addl %eax, %ecx
; AVX512-NEXT: # kill: def $eax killed $eax killed $rax
; AVX512-NEXT: xorl %edx, %edx
; AVX512-NEXT: divl %ecx
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; X86AVX2-LABEL: PR44139:
; X86AVX2: # %bb.0:
; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86AVX2-NEXT: movl (%eax), %eax
; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86AVX2-NEXT: movl (%ecx), %eax
; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0
; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx)
; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx)
; X86AVX2-NEXT: vmovaps %ymm0, (%ecx)
; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx)
; X86AVX2-NEXT: leal 2147483647(%eax), %ecx
; X86AVX2-NEXT: testl %eax, %eax
; X86AVX2-NEXT: cmovnsl %eax, %ecx
; X86AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; X86AVX2-NEXT: addl %eax, %ecx
; X86AVX2-NEXT: xorl %edx, %edx
; X86AVX2-NEXT: divl %ecx
; X86AVX2-NEXT: vzeroupper
; X86AVX2-NEXT: retl
%L = load <16 x i64>, ptr %p
%E1 = extractelement <16 x i64> %L, i64 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/var-permute-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ define <4 x i32> @var_shuffle_v4i32(<4 x i32> %v, <4 x i32> %indices) nounwind {
define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
; SSE3-LABEL: var_shuffle_v8i16:
; SSE3: # %bb.0:
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pextrw $0, %xmm1, %eax
; SSE3-NEXT: pextrw $1, %xmm1, %ecx
; SSE3-NEXT: pextrw $2, %xmm1, %edx
; SSE3-NEXT: pextrw $3, %xmm1, %esi
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/var-permute-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; AVX512F-NEXT: vmovd %xmm4, %eax
; AVX512F-NEXT: vpextrw $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
Expand All @@ -127,7 +127,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi
; AVX512F-NEXT: vpextrw $7, %xmm4, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm3, %eax
; AVX512F-NEXT: vpextrw $0, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand All @@ -152,7 +152,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi
; AVX512F-NEXT: vpextrw $7, %xmm3, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm3
; AVX512F-NEXT: vmovd %xmm2, %eax
; AVX512F-NEXT: vpextrw $0, %xmm2, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -180,7 +180,7 @@ define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwi
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vpinsrw $7, %eax, %xmm4, %xmm2
; AVX512F-NEXT: vmovd %xmm1, %eax
; AVX512F-NEXT: vpextrw $0, %xmm1, %eax
; AVX512F-NEXT: andl $31, %eax
; AVX512F-NEXT: movzwl (%rsp,%rax,2), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -330,7 +330,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; AVX512F-NEXT: vmovd %xmm4, %eax
; AVX512F-NEXT: vpextrb $0, %xmm4, %eax
; AVX512F-NEXT: vmovaps %zmm0, (%rsp)
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
Expand Down Expand Up @@ -380,7 +380,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: vpextrb $15, %xmm4, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm3, %eax
; AVX512F-NEXT: vpextrb $0, %xmm3, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -432,7 +432,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
; AVX512F-NEXT: vmovd %xmm2, %eax
; AVX512F-NEXT: vpextrb $0, %xmm2, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -485,7 +485,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
; AVX512F-NEXT: vmovd %xmm1, %eax
; AVX512F-NEXT: vpextrb $0, %xmm1, %eax
; AVX512F-NEXT: andl $63, %eax
; AVX512F-NEXT: movzbl (%rsp,%rax), %eax
; AVX512F-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -555,7 +555,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; AVX512BW-NEXT: vmovd %xmm4, %eax
; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax
; AVX512BW-NEXT: vmovaps %zmm0, (%rsp)
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
Expand Down Expand Up @@ -605,7 +605,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm3, %eax
; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -657,7 +657,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
Expand Down Expand Up @@ -710,7 +710,7 @@ define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2
; AVX512BW-NEXT: vmovd %xmm1, %eax
; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
; AVX512BW-NEXT: andl $63, %eax
; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax
; AVX512BW-NEXT: vmovd %eax, %xmm4
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/X86/vec_extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,15 @@ define <4 x i32> @ossfuzz15662(ptr %in) {
; X32-LABEL: ossfuzz15662:
; X32: # %bb.0:
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: movaps %xmm0, (%eax)
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: ossfuzz15662:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: movaps %xmm0, (%rax)
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: retq
%C10 = icmp ule i1 false, false
%C3 = icmp ule i1 true, undef
Expand Down