diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 57b219343c3e3..a2bd862e2ce14 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1394,3 +1394,77 @@ define <2 x double> @vid_step2_v2f64() { ; CHECK-NEXT: ret ret <2 x double> } + + +define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_v8f32_zvl256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: ret + %v0 = insertelement <8 x float> poison, float %e0, i64 0 + %v1 = insertelement <8 x float> %v0, float %e1, i64 1 + %v2 = insertelement <8 x float> %v1, float %e2, i64 2 + %v3 = insertelement <8 x float> %v2, float %e3, i64 3 + %v4 = insertelement <8 x float> %v3, float %e4, i64 4 + %v5 = insertelement <8 x float> %v4, float %e5, i64 5 + %v6 = insertelement <8 x float> %v5, float %e6, i64 6 + %v7 = insertelement <8 x float> %v6, float %e7, i64 7 + ret <8 x float> %v7 +} + + +define <8 x double> @buildvec_v8f64_zvl256(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_v8f64_zvl256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v2 = insertelement <8 x double> %v1, double %e2, i64 2 + %v3 = insertelement <8 x double> %v2, double %e3, i64 3 + %v4 = insertelement <8 x double> %v3, double %e4, i64 4 + %v5 = insertelement <8 x double> %v4, double %e5, i64 5 + %v6 = insertelement <8 x double> %v5, double %e6, i64 6 + %v7 = insertelement <8 x double> %v6, double %e7, i64 7 + ret <8 x double> %v7 +} + +define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(8, 128) { +; CHECK-LABEL: buildvec_v8f64_zvl512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v2 = insertelement <8 x double> %v1, double %e2, i64 2 + %v3 = insertelement <8 x double> %v2, double %e3, i64 3 + %v4 = insertelement <8 x double> %v3, double %e4, i64 4 + %v5 = insertelement <8 x double> %v4, double %e5, i64 5 + %v6 = insertelement <8 x double> %v5, double %e6, i64 6 + %v7 = insertelement <8 x double> %v6, double %e7, i64 7 + ret <8 x double> %v7 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index dfafbfb97284c..e691e63581154 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1178,3 +1178,512 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca %v4 = insertelement <8 x i64> %v3, i64 %d, i32 7 ret <8 x i64> %v4 } + + +define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { +; RV32-LABEL: buildvec_v16i8_loads_contigous: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: lbu a1, 1(a0) +; RV32-NEXT: lbu a2, 2(a0) +; RV32-NEXT: lbu a3, 3(a0) +; RV32-NEXT: lbu a4, 4(a0) +; RV32-NEXT: lbu a5, 5(a0) +; RV32-NEXT: lbu a6, 6(a0) +; RV32-NEXT: lbu a7, 7(a0) +; RV32-NEXT: lbu t0, 8(a0) +; RV32-NEXT: lbu t1, 9(a0) +; RV32-NEXT: lbu t2, 10(a0) +; RV32-NEXT: lbu t3, 11(a0) +; RV32-NEXT: lbu t4, 12(a0) +; RV32-NEXT: lbu t5, 13(a0) +; RV32-NEXT: lbu t6, 14(a0) +; RV32-NEXT: lbu s0, 15(a0) +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vlse8.v v8, (a0), zero +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a6 +; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vslide1down.vx v8, v8, t0 +; RV32-NEXT: vslide1down.vx v8, v8, t1 +; RV32-NEXT: vslide1down.vx v8, v8, t2 +; RV32-NEXT: vslide1down.vx v8, v8, t3 +; RV32-NEXT: vslide1down.vx v8, v8, t4 +; RV32-NEXT: vslide1down.vx v8, v8, t5 +; RV32-NEXT: vslide1down.vx v8, v8, t6 +; RV32-NEXT: vslide1down.vx v8, v8, s0 +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_v16i8_loads_contigous: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset s0, -8 +; RV64-NEXT: lbu a1, 1(a0) +; RV64-NEXT: lbu a2, 2(a0) +; RV64-NEXT: lbu a3, 3(a0) +; RV64-NEXT: lbu a4, 4(a0) +; RV64-NEXT: lbu a5, 5(a0) +; RV64-NEXT: lbu a6, 6(a0) +; RV64-NEXT: lbu a7, 7(a0) +; RV64-NEXT: lbu t0, 8(a0) +; RV64-NEXT: lbu t1, 9(a0) +; RV64-NEXT: lbu t2, 10(a0) +; RV64-NEXT: lbu t3, 11(a0) +; RV64-NEXT: lbu t4, 12(a0) +; RV64-NEXT: lbu t5, 13(a0) +; RV64-NEXT: lbu t6, 14(a0) +; RV64-NEXT: lbu s0, 15(a0) +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vlse8.v v8, (a0), zero +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: vslide1down.vx v8, v8, a5 +; RV64-NEXT: vslide1down.vx v8, v8, a6 +; RV64-NEXT: vslide1down.vx v8, v8, a7 +; RV64-NEXT: vslide1down.vx v8, v8, t0 +; RV64-NEXT: vslide1down.vx v8, v8, t1 +; RV64-NEXT: vslide1down.vx v8, v8, t2 +; RV64-NEXT: vslide1down.vx v8, v8, t3 +; RV64-NEXT: vslide1down.vx v8, v8, t4 +; RV64-NEXT: vslide1down.vx v8, v8, t5 +; RV64-NEXT: vslide1down.vx v8, v8, t6 +; RV64-NEXT: vslide1down.vx v8, v8, s0 +; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %p2 = getelementptr i8, ptr %p, i32 1 + %p3 = getelementptr i8, ptr %p, i32 2 + %p4 = getelementptr i8, ptr %p, i32 3 + %p5 = getelementptr i8, ptr %p, i32 4 + %p6 = getelementptr i8, ptr %p, i32 5 + %p7 = getelementptr i8, ptr %p, i32 6 + %p8 = getelementptr i8, ptr %p, i32 7 + %p9 = getelementptr i8, ptr %p, i32 8 + %p10 = getelementptr i8, ptr %p, i32 9 + %p11 = getelementptr i8, ptr %p, i32 10 + %p12 = getelementptr i8, ptr %p, i32 11 + %p13 = getelementptr i8, ptr %p, i32 12 + %p14 = getelementptr i8, ptr %p, i32 13 + %p15 = getelementptr i8, ptr %p, i32 14 + %p16 = getelementptr i8, ptr %p, i32 15 + + %ld1 = load i8, ptr %p + %ld2 = load i8, ptr %p2 + %ld3 = load i8, ptr %p3 + %ld4 = load i8, ptr %p4 + %ld5 = load i8, ptr %p5 + %ld6 = load i8, ptr %p6 + %ld7 = load i8, ptr %p7 + %ld8 = load i8, ptr %p8 + %ld9 = load i8, ptr %p9 + %ld10 = load i8, ptr %p10 + %ld11 = load i8, ptr %p11 + %ld12 = load i8, ptr %p12 + %ld13 = load i8, ptr %p13 + %ld14 = load i8, ptr %p14 + %ld15 = load i8, ptr %p15 + %ld16 = load i8, ptr %p16 + + %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 + %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 + %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 + %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 + %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 + %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 + %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 + %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 + %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 + %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 + %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 + %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 + %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 + %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 + %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 + %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 + ret <16 x i8> %v16 +} + + +define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { +; RV32-LABEL: buildvec_v16i8_loads_gather: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: lbu a1, 1(a0) +; RV32-NEXT: lbu a2, 22(a0) +; RV32-NEXT: lbu a3, 31(a0) +; RV32-NEXT: lbu a4, 44(a0) +; RV32-NEXT: lbu a5, 55(a0) +; RV32-NEXT: lbu a6, 623(a0) +; RV32-NEXT: lbu a7, 75(a0) +; RV32-NEXT: lbu t0, 82(a0) +; RV32-NEXT: lbu t1, 93(a0) +; RV32-NEXT: lbu t2, 105(a0) +; RV32-NEXT: lbu t3, 161(a0) +; RV32-NEXT: lbu t4, 124(a0) +; RV32-NEXT: lbu t5, 163(a0) +; RV32-NEXT: lbu t6, 144(a0) +; RV32-NEXT: lbu s0, 154(a0) +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vlse8.v v8, (a0), zero +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a6 +; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vslide1down.vx v8, v8, t0 +; RV32-NEXT: vslide1down.vx v8, v8, t1 +; RV32-NEXT: vslide1down.vx v8, v8, t2 +; RV32-NEXT: vslide1down.vx v8, v8, t3 +; RV32-NEXT: vslide1down.vx v8, v8, t4 +; RV32-NEXT: vslide1down.vx v8, v8, t5 +; RV32-NEXT: vslide1down.vx v8, v8, t6 +; RV32-NEXT: vslide1down.vx v8, v8, s0 +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_v16i8_loads_gather: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset s0, -8 +; RV64-NEXT: lbu a1, 1(a0) +; RV64-NEXT: lbu a2, 22(a0) +; RV64-NEXT: lbu a3, 31(a0) +; RV64-NEXT: lbu a4, 44(a0) +; RV64-NEXT: lbu a5, 55(a0) +; RV64-NEXT: lbu a6, 623(a0) +; RV64-NEXT: lbu a7, 75(a0) +; RV64-NEXT: lbu t0, 82(a0) +; RV64-NEXT: lbu t1, 93(a0) +; RV64-NEXT: lbu t2, 105(a0) +; RV64-NEXT: lbu t3, 161(a0) +; RV64-NEXT: lbu t4, 124(a0) +; RV64-NEXT: lbu t5, 163(a0) +; RV64-NEXT: lbu t6, 144(a0) +; RV64-NEXT: lbu s0, 154(a0) +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vlse8.v v8, (a0), zero +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: vslide1down.vx v8, v8, a5 +; RV64-NEXT: vslide1down.vx v8, v8, a6 +; RV64-NEXT: vslide1down.vx v8, v8, a7 +; RV64-NEXT: vslide1down.vx v8, v8, t0 +; RV64-NEXT: vslide1down.vx v8, v8, t1 +; RV64-NEXT: vslide1down.vx v8, v8, t2 +; RV64-NEXT: vslide1down.vx v8, v8, t3 +; RV64-NEXT: vslide1down.vx v8, v8, t4 +; RV64-NEXT: vslide1down.vx v8, v8, t5 +; RV64-NEXT: vslide1down.vx v8, v8, t6 +; RV64-NEXT: vslide1down.vx v8, v8, s0 +; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %p2 = getelementptr i8, ptr %p, i32 1 + %p3 = getelementptr i8, ptr %p, i32 22 + %p4 = getelementptr i8, ptr %p, i32 31 + %p5 = getelementptr i8, ptr %p, i32 44 + %p6 = getelementptr i8, ptr %p, i32 55 + %p7 = getelementptr i8, ptr %p, i32 623 + %p8 = getelementptr i8, ptr %p, i32 75 + %p9 = getelementptr i8, ptr %p, i32 82 + %p10 = getelementptr i8, ptr %p, i32 93 + %p11 = getelementptr i8, ptr %p, i32 105 + %p12 = getelementptr i8, ptr %p, i32 161 + %p13 = getelementptr i8, ptr %p, i32 124 + %p14 = getelementptr i8, ptr %p, i32 163 + %p15 = getelementptr i8, ptr %p, i32 144 + %p16 = getelementptr i8, ptr %p, i32 154 + + %ld1 = load i8, ptr %p + %ld2 = load i8, ptr %p2 + %ld3 = load i8, ptr %p3 + %ld4 = load i8, ptr %p4 + %ld5 = load i8, ptr %p5 + %ld6 = load i8, ptr %p6 + %ld7 = load i8, ptr %p7 + %ld8 = load i8, ptr %p8 + %ld9 = load i8, ptr %p9 + %ld10 = load i8, ptr %p10 + %ld11 = load i8, ptr %p11 + %ld12 = load i8, ptr %p12 + %ld13 = load i8, ptr %p13 + %ld14 = load i8, ptr %p14 + %ld15 = load i8, ptr %p15 + %ld16 = load i8, ptr %p16 + + %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 + %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 + %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 + %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 + %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 + %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 + %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 + %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 + %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 + %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 + %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 + %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 + %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 + %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 + %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 + %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 + ret <16 x i8> %v16 +} + +define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { +; CHECK-LABEL: buildvec_v16i8_undef_low_half: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 82 +; CHECK-NEXT: lbu a2, 93(a0) +; CHECK-NEXT: lbu a3, 105(a0) +; CHECK-NEXT: lbu a4, 161(a0) +; CHECK-NEXT: lbu a5, 124(a0) +; CHECK-NEXT: lbu a6, 163(a0) +; CHECK-NEXT: lbu a7, 144(a0) +; CHECK-NEXT: lbu a0, 154(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: ret + %p9 = getelementptr i8, ptr %p, i32 82 + %p10 = getelementptr i8, ptr %p, i32 93 + %p11 = getelementptr i8, ptr %p, i32 105 + %p12 = getelementptr i8, ptr %p, i32 161 + %p13 = getelementptr i8, ptr %p, i32 124 + %p14 = getelementptr i8, ptr %p, i32 163 + %p15 = getelementptr i8, ptr %p, i32 144 + %p16 = getelementptr i8, ptr %p, i32 154 + + %ld9 = load i8, ptr %p9 + %ld10 = load i8, ptr %p10 + %ld11 = load i8, ptr %p11 + %ld12 = load i8, ptr %p12 + %ld13 = load i8, ptr %p13 + %ld14 = load i8, ptr %p14 + %ld15 = load i8, ptr %p15 + %ld16 = load i8, ptr %p16 + + %v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8 + %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 + %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 + %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 + %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 + %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13 + %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 + %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 + ret <16 x i8> %v16 +} + +define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { +; CHECK-LABEL: buildvec_v16i8_undef_high_half: +; CHECK: # %bb.0: +; CHECK-NEXT: lbu a1, 1(a0) +; CHECK-NEXT: lbu a2, 22(a0) +; CHECK-NEXT: lbu a3, 31(a0) +; CHECK-NEXT: lbu a4, 44(a0) +; CHECK-NEXT: lbu a5, 55(a0) +; CHECK-NEXT: lbu a6, 623(a0) +; CHECK-NEXT: lbu a7, 75(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: ret + %p2 = getelementptr i8, ptr %p, i32 1 + %p3 = getelementptr i8, ptr %p, i32 22 + %p4 = getelementptr i8, ptr %p, i32 31 + %p5 = getelementptr i8, ptr %p, i32 44 + %p6 = getelementptr i8, ptr %p, i32 55 + %p7 = getelementptr i8, ptr %p, i32 623 + %p8 = getelementptr i8, ptr %p, i32 75 + + %ld1 = load i8, ptr %p + %ld2 = load i8, ptr %p2 + %ld3 = load i8, ptr %p3 + %ld4 = load i8, ptr %p4 + %ld5 = load i8, ptr %p5 + %ld6 = load i8, ptr %p6 + %ld7 = load i8, ptr %p7 + %ld8 = load i8, ptr %p8 + + %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 + %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 + %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2 + %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3 + %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 + %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 + %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 + %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 + ret <16 x i8> %v8 +} + +define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { +; CHECK-LABEL: buildvec_v16i8_undef_edges: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 31 +; CHECK-NEXT: lbu a2, 44(a0) +; CHECK-NEXT: lbu a3, 55(a0) +; CHECK-NEXT: lbu a4, 623(a0) +; CHECK-NEXT: lbu a5, 75(a0) +; CHECK-NEXT: lbu a6, 82(a0) +; CHECK-NEXT: lbu a7, 93(a0) +; CHECK-NEXT: lbu t0, 105(a0) +; CHECK-NEXT: lbu a0, 161(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a1), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslide1down.vx v8, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: ret + %p4 = getelementptr i8, ptr %p, i32 31 + %p5 = getelementptr i8, ptr %p, i32 44 + %p6 = getelementptr i8, ptr %p, i32 55 + %p7 = getelementptr i8, ptr %p, i32 623 + %p8 = getelementptr i8, ptr %p, i32 75 + %p9 = getelementptr i8, ptr %p, i32 82 + %p10 = getelementptr i8, ptr %p, i32 93 + %p11 = getelementptr i8, ptr %p, i32 105 + %p12 = getelementptr i8, ptr %p, i32 161 + + %ld4 = load i8, ptr %p4 + %ld5 = load i8, ptr %p5 + %ld6 = load i8, ptr %p6 + %ld7 = load i8, ptr %p7 + %ld8 = load i8, ptr %p8 + %ld9 = load i8, ptr %p9 + %ld10 = load i8, ptr %p10 + %ld11 = load i8, ptr %p11 + %ld12 = load i8, ptr %p12 + + %v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3 + %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 + %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 + %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6 + %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 + %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 + %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 + %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10 + %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11 + ret <16 x i8> %v12 +} + +define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { +; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered: +; CHECK: # %bb.0: +; CHECK-NEXT: lbu a1, 1(a0) +; CHECK-NEXT: lbu a2, 44(a0) +; CHECK-NEXT: lbu a3, 55(a0) +; CHECK-NEXT: lbu a4, 75(a0) +; CHECK-NEXT: lbu a5, 82(a0) +; CHECK-NEXT: lbu a6, 93(a0) +; CHECK-NEXT: lbu a7, 124(a0) +; CHECK-NEXT: lbu t0, 144(a0) +; CHECK-NEXT: lbu t1, 154(a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vlse8.v v8, (a0), zero +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a5 +; CHECK-NEXT: vslide1down.vx v8, v8, a6 +; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, t0 +; CHECK-NEXT: vslide1down.vx v8, v8, t1 +; CHECK-NEXT: ret + %p2 = getelementptr i8, ptr %p, i32 1 + %p3 = getelementptr i8, ptr %p, i32 22 + %p4 = getelementptr i8, ptr %p, i32 31 + %p5 = getelementptr i8, ptr %p, i32 44 + %p6 = getelementptr i8, ptr %p, i32 55 + %p7 = getelementptr i8, ptr %p, i32 623 + %p8 = getelementptr i8, ptr %p, i32 75 + %p9 = getelementptr i8, ptr %p, i32 82 + %p10 = getelementptr i8, ptr %p, i32 93 + %p11 = getelementptr i8, ptr %p, i32 105 + %p12 = getelementptr i8, ptr %p, i32 161 + %p13 = getelementptr i8, ptr %p, i32 124 + %p14 = getelementptr i8, ptr %p, i32 163 + %p15 = getelementptr i8, ptr %p, i32 144 + %p16 = getelementptr i8, ptr %p, i32 154 + + %ld1 = load i8, ptr %p + %ld2 = load i8, ptr %p2 + %ld3 = load i8, ptr %p3 + %ld4 = load i8, ptr %p4 + %ld5 = load i8, ptr %p5 + %ld6 = load i8, ptr %p6 + %ld7 = load i8, ptr %p7 + %ld8 = load i8, ptr %p8 + %ld9 = load i8, ptr %p9 + %ld10 = load i8, ptr %p10 + %ld11 = load i8, ptr %p11 + %ld12 = load i8, ptr %p12 + %ld13 = load i8, ptr %p13 + %ld14 = load i8, ptr %p14 + %ld15 = load i8, ptr %p15 + %ld16 = load i8, ptr %p16 + + %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0 + %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1 + %v3 = insertelement <16 x i8> %v2, i8 undef, i32 2 + %v4 = insertelement <16 x i8> %v3, i8 undef, i32 3 + %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4 + %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5 + %v7 = insertelement <16 x i8> %v6, i8 undef, i32 6 + %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7 + %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8 + %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9 + %v11 = insertelement <16 x i8> %v10, i8 undef, i32 10 + %v12 = insertelement <16 x i8> %v11, i8 undef, i32 11 + %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12 + %v14 = insertelement <16 x i8> %v13, i8 undef, i32 13 + %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14 + %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15 + ret <16 x i8> %v16 +}