34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(<vscale x 8 x
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 0)
%ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 0)
ret <vscale x 8 x i8> %ins
}

Expand All @@ -33,7 +33,7 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(<vscale x
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 4)
%ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 4)
ret <vscale x 8 x i8> %ins
}

Expand All @@ -49,7 +49,7 @@ define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(<vscale x 4
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 0)
%ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 0)
ret <vscale x 4 x i16> %ins
}

Expand All @@ -65,7 +65,7 @@ define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_nonzero_i16(<vscale
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 2)
%ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 2)
ret <vscale x 4 x i16> %ins
}

Expand All @@ -83,7 +83,7 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_zero_i8(<vscale x 8 x i8
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <8 x i8>, <8 x i8>* %b
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 0)
%ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 0)
ret <vscale x 8 x i8> %ins
}

Expand Down Expand Up @@ -111,7 +111,7 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_nonzero_i8(<vscale x 8 x
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
%subvec = load <8 x i8>, <8 x i8>* %b
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 8)
%ins = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 8)
ret <vscale x 8 x i8> %ins
}

Expand All @@ -127,7 +127,7 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_zero_i16(<vscale x 4 x
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <4 x i16>, <4 x i16>* %b
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 0)
%ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 0)
ret <vscale x 4 x i16> %ins
}

Expand Down Expand Up @@ -155,7 +155,7 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_nonzero_i16(<vscale x 4
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
%subvec = load <4 x i16>, <4 x i16>* %b
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 4)
%ins = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 4)
ret <vscale x 4 x i16> %ins
}

Expand All @@ -171,7 +171,7 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_zero_i32(<vscale x 2 x
; CHECK-NEXT: ret
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
%subvec = load <2 x i32>, <2 x i32>* %b
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 0)
%ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 0)
ret <vscale x 2 x i32> %ins
}

Expand Down Expand Up @@ -199,7 +199,7 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(<vscale x 2
; CHECK-NEXT: ret
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
%subvec = load <2 x i32>, <2 x i32>* %b
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 2)
%ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 2)
ret <vscale x 2 x i32> %ins
}

Expand Down Expand Up @@ -228,18 +228,18 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vsca
; CHECK-NEXT: ret
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
%subvec = load <8 x i32>, <8 x i32>* %b
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> %vec, <8 x i32> %subvec, i64 8)
%ins = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> %vec, <8 x i32> %subvec, i64 8)
ret <vscale x 2 x i32> %ins
}

declare <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8>, <vscale x 4 x i8>, i64)
declare <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16>, <vscale x 2 x i16>, i64)
declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8>, <vscale x 4 x i8>, i64)
declare <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16>, <vscale x 2 x i16>, i64)

declare <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8>, <8 x i8>, i64)
declare <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16>, <4 x i16>, i64)
declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32>, <2 x i32>, i64)
declare <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8>, <8 x i8>, i64)
declare <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16>, <4 x i16>, i64)
declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32>, <2 x i32>, i64)

declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32>, <8 x i32>, i64)
declare <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32>, <8 x i32>, i64)

attributes #0 = { nounwind "target-features"="+sve" }
attributes #1 = { nounwind "target-features"="+sve" vscale_range(4,4) }
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/split-vector-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
target triple = "aarch64-unknown-linux-gnu"
attributes #0 = {"target-features"="+sve" uwtable}

declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64>, <8 x i64>, i64)
declare <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double>, <8 x double>, i64)
declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64>, <8 x i64>, i64)
declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double>, <8 x double>, i64)

define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %b) #0 {
; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
Expand Down Expand Up @@ -61,7 +61,7 @@ define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %



%r = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> %a, <8 x i64> %b, i64 0)
%r = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> %a, <8 x i64> %b, i64 0)
ret <vscale x 2 x i64> %r
}

Expand Down Expand Up @@ -118,6 +118,6 @@ define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x



%r = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> %a, <8 x double> %b, i64 0)
%r = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> %a, <8 x double> %b, i64 0)
ret <vscale x 2 x double> %r
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

; CHECK-ERROR: ERROR: Extracting a fixed-length vector from an illegal scalable vector is not yet supported
define <4 x i32> @extract_v4i32_nxv16i32_12(<vscale x 16 x i32> %arg) {
%ext = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32> %arg, i64 12)
%ext = call <4 x i32> @llvm.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32> %arg, i64 12)
ret <4 x i32> %ext
}

declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32>, i64)
declare <4 x i32> @llvm.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32>, i64)
94 changes: 47 additions & 47 deletions llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll

Large diffs are not rendered by default.

224 changes: 112 additions & 112 deletions llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define void @pred_store_v2i8(<vscale x 16 x i1> %pred, <2 x i8>* %addr) #0 {
; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret void
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <2 x i8> %extract, <2 x i8>* %addr, align 4
ret void
}
Expand All @@ -19,7 +19,7 @@ define void @pred_store_v4i8(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret void
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <4 x i8> %extract, <4 x i8>* %addr, align 4
ret void
}
Expand All @@ -30,7 +30,7 @@ define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
; CHECK-NEXT: store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret void
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <8 x i8> %extract, <8 x i8>* %addr, align 4
ret void
}
Expand All @@ -39,46 +39,46 @@ define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
; Check that too small of a vscale prevents optimization
define void @pred_store_neg1(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #0 {
; CHECK-LABEL: @pred_store_neg1(
; CHECK: call <4 x i8> @llvm.experimental.vector.extract
; CHECK: call <4 x i8> @llvm.vector.extract
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <4 x i8> %extract, <4 x i8>* %addr, align 4
ret void
}

; Check that too large of a vscale prevents optimization
define void @pred_store_neg2(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #2 {
; CHECK-LABEL: @pred_store_neg2(
; CHECK: call <4 x i8> @llvm.experimental.vector.extract
; CHECK: call <4 x i8> @llvm.vector.extract
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <4 x i8> %extract, <4 x i8>* %addr, align 4
ret void
}

; Check that a non-zero index prevents optimization
define void @pred_store_neg3(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
; CHECK-LABEL: @pred_store_neg3(
; CHECK: call <4 x i8> @llvm.experimental.vector.extract
; CHECK: call <4 x i8> @llvm.vector.extract
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
store <4 x i8> %extract, <4 x i8>* %addr, align 4
ret void
}

; Check that differing vscale min/max prevents optimization
define void @pred_store_neg4(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #3 {
; CHECK-LABEL: @pred_store_neg4(
; CHECK: call <4 x i8> @llvm.experimental.vector.extract
; CHECK: call <4 x i8> @llvm.vector.extract
%bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
%extract = tail call <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
%extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
store <4 x i8> %extract, <4 x i8>* %addr, align 4
ret void
}

declare <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <4 x i8> @llvm.experimental.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8>, i64)
declare <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8>, i64)

attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
Expand Down
172 changes: 86 additions & 86 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define <vscale x 16 x i1> @pred_load_v2i8(<2 x i8>* %addr) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
%load = load <2 x i8>, <2 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}
Expand All @@ -19,7 +19,7 @@ define <vscale x 16 x i1> @pred_load_v4i8(<4 x i8>* %addr) #1 {
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}
Expand All @@ -30,7 +30,7 @@ define <vscale x 16 x i1> @pred_load_v8i8(<8 x i8>* %addr) #2 {
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
%load = load <8 x i8>, <8 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}
Expand All @@ -49,64 +49,64 @@ entry:
br label %bb1

bb1:
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

; Check that too small of a vscale prevents optimization
define <vscale x 16 x i1> @pred_load_neg1(<4 x i8>* %addr) #0 {
; CHECK-LABEL: @pred_load_neg1(
; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

; Check that too large of a vscale prevents optimization
define <vscale x 16 x i1> @pred_load_neg2(<4 x i8>* %addr) #2 {
; CHECK-LABEL: @pred_load_neg2(
; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

; Check that a non-zero index prevents optimization
define <vscale x 16 x i1> @pred_load_neg3(<4 x i8>* %addr) #1 {
; CHECK-LABEL: @pred_load_neg3(
; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

; Check that differing vscale min/max prevents optimization
define <vscale x 16 x i1> @pred_load_neg4(<4 x i8>* %addr) #3 {
; CHECK-LABEL: @pred_load_neg4(
; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

; Check that insertion into a non-undef vector prevents optimization
define <vscale x 16 x i1> @pred_load_neg5(<4 x i8>* %addr, <vscale x 2 x i8> %passthru) #1 {
; CHECK-LABEL: @pred_load_neg5(
; CHECK: call <vscale x 2 x i8> @llvm.experimental.vector.insert
; CHECK: call <vscale x 2 x i8> @llvm.vector.insert
%load = load <4 x i8>, <4 x i8>* %addr, align 4
%insert = tail call <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
%insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
%ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
ret <vscale x 16 x i1> %ret
}

declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
declare <vscale x 2 x i8> @llvm.experimental.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)

attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
Expand Down
194 changes: 97 additions & 97 deletions llvm/test/CodeGen/AArch64/sve-insert-vector.ll

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ define dso_local <vscale x 2 x double> @dupq_ld1rqd_f64() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI49_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call fast <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> <double 1.000000e+00, double 2.000000e+00>, i64 0)
%1 = tail call fast <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> <double 1.000000e+00, double 2.000000e+00>, i64 0)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %1, i64 0)
ret <vscale x 2 x double> %2
}
Expand All @@ -600,7 +600,7 @@ define dso_local <vscale x 4 x float> @dupq_ld1rqw_f32() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI50_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call fast <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i64 0)
%1 = tail call fast <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i64 0)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %1, i64 0)
ret <vscale x 4 x float> %2
}
Expand All @@ -612,7 +612,7 @@ define dso_local <vscale x 8 x half> @dupq_ld1rqh_f16() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call fast <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> <half 0xH3C00, half 0xH4000, half 0xH4200, half 0xH4400, half 0xH4500, half 0xH4600, half 0xH4700, half 0xH4800>, i64 0)
%1 = tail call fast <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> <half 0xH3C00, half 0xH4000, half 0xH4200, half 0xH4400, half 0xH4500, half 0xH4600, half 0xH4700, half 0xH4800>, i64 0)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %1, i64 0)
ret <vscale x 8 x half> %2
}
Expand All @@ -624,7 +624,7 @@ define dso_local <vscale x 8 x bfloat> @dupq_ld1rqh_bf16() #0 {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI52_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> <bfloat 1.000e+00, bfloat 2.000e+00, bfloat 3.000e+00, bfloat 4.000e+00, bfloat 5.000e+00, bfloat 6.000e+00, bfloat 7.000e+00, bfloat 8.000e+00>, i64 0)
%1 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> <bfloat 1.000e+00, bfloat 2.000e+00, bfloat 3.000e+00, bfloat 4.000e+00, bfloat 5.000e+00, bfloat 6.000e+00, bfloat 7.000e+00, bfloat 8.000e+00>, i64 0)
%2 = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %1, i64 0)
ret <vscale x 8 x bfloat> %2
}
Expand All @@ -636,7 +636,7 @@ define dso_local <vscale x 2 x i64> @dupq_ld1rqd_i64() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI53_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> <i64 1, i64 2>, i64 0)
%1 = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> <i64 1, i64 2>, i64 0)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %1, i64 0)
ret <vscale x 2 x i64> %2
}
Expand All @@ -648,7 +648,7 @@ define dso_local <vscale x 4 x i32> @dupq_ld1rqd_i32() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI54_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0)
%1 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %1, i64 0)
ret <vscale x 4 x i32> %2
}
Expand All @@ -660,7 +660,7 @@ define dso_local <vscale x 8 x i16> @dupq_ld1rqd_i16() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI55_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i64 0)
%1 = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i64 0)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %1, i64 0)
ret <vscale x 8 x i16> %2
}
Expand All @@ -672,7 +672,7 @@ define dso_local <vscale x 16 x i8> @dupq_ld1rqd_i8() {
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI56_0]
; CHECK-NEXT: mov z0.q, q0
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i64 0)
%1 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, i64 0)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %1, i64 0)
ret <vscale x 16 x i8> %2
}
Expand Down Expand Up @@ -2559,14 +2559,14 @@ declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>
declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

declare <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
declare <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
declare <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
declare <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
declare <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64)
declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64)

; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ define <4 x i32> @sve_no_typesize_warning(<vscale x 8 x i16> %a, <4 x i16> %b) #
; CHECK: // %bb.0:
; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h
; CHECK-NEXT: ret
%a.lo = call <4 x i16> @llvm.experimental.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%a.lo = call <4 x i16> @llvm.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16> %a, i64 0)
%a.lo.zext = zext <4 x i16> %a.lo to <4 x i32>
%b.zext = zext <4 x i16> %b to <4 x i32>
%add = add <4 x i32> %a.lo.zext, %b.zext
ret <4 x i32> %add
}

declare <4 x i16> @llvm.experimental.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16>, i64)
declare <4 x i16> @llvm.vector.extract.v4i16.nxv8i16(<vscale x 8 x i16>, i64)

attributes #0 = { "target-features"="+sve" }
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16(i8* %ap, <vscale x 16 x i8> %b)
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
%p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
%cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
Expand All @@ -30,7 +30,7 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
%p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
%cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
Expand All @@ -47,7 +47,7 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_parg(i8* %ap, <vscale x 16 x i8
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
%p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
%cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
Expand All @@ -64,7 +64,7 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32(i8* %ap, <vscale x 16 x i8> %b)
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
%p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
%cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
Expand All @@ -84,7 +84,7 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
%p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
%cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
Expand All @@ -102,7 +102,7 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_parg(i8* %ap, <vscale x 16 x i8
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
%p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
%cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
Expand All @@ -120,7 +120,7 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64(i8* %ap, <vscale x 16 x i8> %b)
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
%p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
%cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
Expand All @@ -141,7 +141,7 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
%p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
%cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
Expand All @@ -160,7 +160,7 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(i8* %ap, <vscale x 16 x i8
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
%p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
%cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
Expand All @@ -179,7 +179,7 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
%p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
%cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
Expand All @@ -199,7 +199,7 @@ define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
%p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
%cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
Expand All @@ -218,7 +218,7 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_all(i8* %ap, <vscale x 16
; CHECK-NEXT: ret
%p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
%extract = call <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
%ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
%p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
Expand All @@ -232,9 +232,9 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)

declare <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
declare <vscale x 4 x i1> @llvm.experimental.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
declare <vscale x 2 x i1> @llvm.experimental.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
declare <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)

declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define i1 @reduce_or_insert_subvec_into_zero(<vscale x 4 x i1> %in) {
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}
Expand All @@ -22,7 +22,7 @@ define i1 @reduce_or_insert_subvec_into_poison(<vscale x 4 x i1> %in) {
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}
Expand All @@ -38,7 +38,7 @@ define i1 @reduce_or_insert_subvec_into_nonzero(<vscale x 4 x i1> %in, <vscale x
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}
Expand All @@ -57,7 +57,7 @@ define i1 @reduce_and_insert_subvec_into_ones(<vscale x 4 x i1> %in) {
; CHECK-NEXT: ret
%allones.ins = insertelement <vscale x 16 x i1> poison, i1 1, i32 0
%allones = shufflevector <vscale x 16 x i1> %allones.ins, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %allones, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %allones, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}
Expand All @@ -70,7 +70,7 @@ define i1 @reduce_and_insert_subvec_into_poison(<vscale x 4 x i1> %in) {
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}
Expand All @@ -88,11 +88,11 @@ define i1 @reduce_and_insert_subvec_into_var(<vscale x 4 x i1> %in, <vscale x 16
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %in, i64 0)
%res = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %t)
ret i1 %res
}

declare i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1>)
declare i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64)
declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64)
126 changes: 63 additions & 63 deletions llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Large diffs are not rendered by default.

86 changes: 43 additions & 43 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define void @extract_v2i8_v4i8_0(<4 x i8>* %x, <2 x i8>* %y) {
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i8>, <4 x i8>* %x
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v4i8(<4 x i8> %a, i64 0)
%c = call <2 x i8> @llvm.vector.extract.v2i8.v4i8(<4 x i8> %a, i64 0)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -27,7 +27,7 @@ define void @extract_v2i8_v4i8_2(<4 x i8>* %x, <2 x i8>* %y) {
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i8>, <4 x i8>* %x
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v4i8(<4 x i8> %a, i64 2)
%c = call <2 x i8> @llvm.vector.extract.v2i8.v4i8(<4 x i8> %a, i64 2)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -41,7 +41,7 @@ define void @extract_v2i8_v8i8_0(<8 x i8>* %x, <2 x i8>* %y) {
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 0)
%c = call <2 x i8> @llvm.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 0)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -57,7 +57,7 @@ define void @extract_v2i8_v8i8_6(<8 x i8>* %x, <2 x i8>* %y) {
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 6)
%c = call <2 x i8> @llvm.vector.extract.v2i8.v8i8(<8 x i8> %a, i64 6)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -79,7 +79,7 @@ define void @extract_v2i32_v8i32_0(<8 x i32>* %x, <2 x i32>* %y) {
; LMULMAX1-NEXT: vse32.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i32>, <8 x i32>* %x
%c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 0)
%c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 0)
store <2 x i32> %c, <2 x i32>* %y
ret void
}
Expand All @@ -105,7 +105,7 @@ define void @extract_v2i32_v8i32_2(<8 x i32>* %x, <2 x i32>* %y) {
; LMULMAX1-NEXT: vse32.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i32>, <8 x i32>* %x
%c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2)
%c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 2)
store <2 x i32> %c, <2 x i32>* %y
ret void
}
Expand All @@ -132,7 +132,7 @@ define void @extract_v2i32_v8i32_6(<8 x i32>* %x, <2 x i32>* %y) {
; LMULMAX1-NEXT: vse32.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <8 x i32>, <8 x i32>* %x
%c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6)
%c = call <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %a, i64 6)
store <2 x i32> %c, <2 x i32>* %y
ret void
}
Expand All @@ -143,7 +143,7 @@ define void @extract_v2i32_nxv16i32_0(<vscale x 16 x i32> %x, <2 x i32>* %y) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %x, i64 0)
%c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %x, i64 0)
store <2 x i32> %c, <2 x i32>* %y
ret void
}
Expand All @@ -156,7 +156,7 @@ define void @extract_v2i32_nxv16i32_8(<vscale x 16 x i32> %x, <2 x i32>* %y) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i32> @llvm.experimental.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %x, i64 6)
%c = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %x, i64 6)
store <2 x i32> %c, <2 x i32>* %y
ret void
}
Expand All @@ -167,7 +167,7 @@ define void @extract_v2i8_nxv2i8_0(<vscale x 2 x i8> %x, <2 x i8>* %y) {
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %x, i64 0)
%c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %x, i64 0)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -180,7 +180,7 @@ define void @extract_v2i8_nxv2i8_2(<vscale x 2 x i8> %x, <2 x i8>* %y) {
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %x, i64 2)
%c = call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %x, i64 2)
store <2 x i8> %c, <2 x i8>* %y
ret void
}
Expand All @@ -204,7 +204,7 @@ define void @extract_v8i32_nxv16i32_8(<vscale x 16 x i32> %x, <8 x i32>* %y) {
; LMULMAX1-NEXT: vse32.v v8, (a1)
; LMULMAX1-NEXT: vse32.v v16, (a0)
; LMULMAX1-NEXT: ret
%c = call <8 x i32> @llvm.experimental.vector.extract.v8i32.nxv16i32(<vscale x 16 x i32> %x, i64 8)
%c = call <8 x i32> @llvm.vector.extract.v8i32.nxv16i32(<vscale x 16 x i32> %x, i64 8)
store <8 x i32> %c, <8 x i32>* %y
ret void
}
Expand All @@ -227,7 +227,7 @@ define void @extract_v8i1_v64i1_0(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0)
%c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -254,7 +254,7 @@ define void @extract_v8i1_v64i1_8(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8)
%c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -281,7 +281,7 @@ define void @extract_v8i1_v64i1_48(<64 x i1>* %x, <8 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48)
%c = call <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -292,7 +292,7 @@ define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vsm.v v0, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -303,7 +303,7 @@ define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vsm.v v0, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -316,7 +316,7 @@ define void @extract_v8i1_nxv64i1_8(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 8)
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 8)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand All @@ -329,7 +329,7 @@ define void @extract_v8i1_nxv64i1_48(<vscale x 64 x i1> %x, <8 x i1>* %y) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 48)
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 48)
store <8 x i1> %c, <8 x i1>* %y
ret void
}
Expand Down Expand Up @@ -369,7 +369,7 @@ define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0)
%c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand Down Expand Up @@ -418,7 +418,7 @@ define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
%c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand Down Expand Up @@ -469,7 +469,7 @@ define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX1-NEXT: vsm.v v8, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
%c = call <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -488,7 +488,7 @@ define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -513,7 +513,7 @@ define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -532,7 +532,7 @@ define void @extract_v2i1_nxv64i1_0(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -557,7 +557,7 @@ define void @extract_v2i1_nxv64i1_2(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -583,7 +583,7 @@ define void @extract_v2i1_nxv64i1_42(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -608,7 +608,7 @@ define void @extract_v2i1_nxv32i1_26(<vscale x 32 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vmsne.vi v8, v9, 0
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv32i1(<vscale x 32 x i1> %x, i64 26)
%c = call <2 x i1> @llvm.vector.extract.v2i1.nxv32i1(<vscale x 32 x i1> %x, i64 26)
store <2 x i1> %c, <2 x i1>* %y
ret void
}
Expand All @@ -621,28 +621,28 @@ define void @extract_v8i1_nxv32i1_16(<vscale x 32 x i1> %x, <8 x i1>* %y) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vsm.v v8, (a0)
; CHECK-NEXT: ret
%c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %x, i64 16)
%c = call <8 x i1> @llvm.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %x, i64 16)
store <8 x i1> %c, <8 x i1>* %y
ret void
}

declare <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)

declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)

declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv32i1(<vscale x 32 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.vector.extract.v2i1.nxv32i1(<vscale x 32 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.vector.extract.v8i1.nxv32i1(<vscale x 32 x i1> %vec, i64 %idx)

declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <2 x i1> @llvm.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
declare <8 x i1> @llvm.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)

declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v4i8(<4 x i8> %vec, i64 %idx)
declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <2 x i8> @llvm.vector.extract.v2i8.v4i8(<4 x i8> %vec, i64 %idx)
declare <2 x i8> @llvm.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx)
declare <2 x i32> @llvm.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)

declare <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %vec, i64 %idx)
declare <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %vec, i64 %idx)

declare <2 x i32> @llvm.experimental.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.extract.v8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
declare <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
declare <8 x i32> @llvm.vector.extract.v8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
82 changes: 41 additions & 41 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x
; CHECK-NEXT: vslideup.vi v8, v12, 0
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0)
ret <vscale x 8 x i32> %v
}

Expand All @@ -31,7 +31,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, <2 x
; CHECK-NEXT: vslideup.vi v8, v12, 2
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2)
ret <vscale x 8 x i32> %v
}

Expand All @@ -44,7 +44,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, <2 x
; CHECK-NEXT: vslideup.vi v8, v12, 6
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6)
ret <vscale x 8 x i32> %v
}

Expand All @@ -69,7 +69,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x
; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, <8 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
ret <vscale x 8 x i32> %v
}

Expand All @@ -94,7 +94,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x
; LMULMAX1-NEXT: vslideup.vi v8, v16, 12
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, <8 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
ret <vscale x 8 x i32> %v
}

Expand All @@ -105,7 +105,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(<2 x i32>* %svp) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0)
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0)
ret <vscale x 8 x i32> %v
}

Expand All @@ -123,7 +123,7 @@ define void @insert_v4i32_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%vec = load <4 x i32>, <4 x i32>* %vp
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 0)
%v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 0)
store <4 x i32> %v, <4 x i32>* %vp
ret void
}
Expand All @@ -141,7 +141,7 @@ define void @insert_v4i32_v2i32_2(<4 x i32>* %vp, <2 x i32>* %svp) {
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%vec = load <4 x i32>, <4 x i32>* %vp
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 2)
%v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 2)
store <4 x i32> %v, <4 x i32>* %vp
ret void
}
Expand All @@ -155,7 +155,7 @@ define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0)
%v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0)
store <4 x i32> %v, <4 x i32>* %vp
ret void
}
Expand Down Expand Up @@ -186,7 +186,7 @@ define void @insert_v8i32_v2i32_0(<8 x i32>* %vp, <2 x i32>* %svp) {
; LMULMAX1-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%vec = load <8 x i32>, <8 x i32>* %vp
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0)
%v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0)
store <8 x i32> %v, <8 x i32>* %vp
ret void
}
Expand Down Expand Up @@ -216,7 +216,7 @@ define void @insert_v8i32_v2i32_2(<8 x i32>* %vp, <2 x i32>* %svp) {
; LMULMAX1-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%vec = load <8 x i32>, <8 x i32>* %vp
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2)
%v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2)
store <8 x i32> %v, <8 x i32>* %vp
ret void
}
Expand Down Expand Up @@ -246,7 +246,7 @@ define void @insert_v8i32_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
; LMULMAX1-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%vec = load <8 x i32>, <8 x i32>* %vp
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6)
%v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6)
store <8 x i32> %v, <8 x i32>* %vp
ret void
}
Expand All @@ -271,7 +271,7 @@ define void @insert_v8i32_undef_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
; LMULMAX1-NEXT: vse32.v v9, (a0)
; LMULMAX1-NEXT: ret
%sv = load <2 x i32>, <2 x i32>* %svp
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
%v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
store <8 x i32> %v, <8 x i32>* %vp
ret void
}
Expand All @@ -290,7 +290,7 @@ define void @insert_v4i16_v2i16_0(<4 x i16>* %vp, <2 x i16>* %svp) {
; CHECK-NEXT: ret
%v = load <4 x i16>, <4 x i16>* %vp
%sv = load <2 x i16>, <2 x i16>* %svp
%c = call <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 0)
%c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 0)
store <4 x i16> %c, <4 x i16>* %vp
ret void
}
Expand All @@ -308,7 +308,7 @@ define void @insert_v4i16_v2i16_2(<4 x i16>* %vp, <2 x i16>* %svp) {
; CHECK-NEXT: ret
%v = load <4 x i16>, <4 x i16>* %vp
%sv = load <2 x i16>, <2 x i16>* %svp
%c = call <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 2)
%c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 2)
store <4 x i16> %c, <4 x i16>* %vp
ret void
}
Expand Down Expand Up @@ -340,7 +340,7 @@ define void @insert_v32i1_v8i1_0(<32 x i1>* %vp, <8 x i1>* %svp) {
; LMULMAX1-NEXT: ret
%v = load <32 x i1>, <32 x i1>* %vp
%sv = load <8 x i1>, <8 x i1>* %svp
%c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0)
%c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0)
store <32 x i1> %c, <32 x i1>* %vp
ret void
}
Expand Down Expand Up @@ -373,7 +373,7 @@ define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) {
; LMULMAX1-NEXT: ret
%v = load <32 x i1>, <32 x i1>* %vp
%sv = load <8 x i1>, <8 x i1>* %svp
%c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
%c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
store <32 x i1> %c, <32 x i1>* %vp
ret void
}
Expand All @@ -400,7 +400,7 @@ define void @insert_v8i1_v4i1_0(<8 x i1>* %vp, <4 x i1>* %svp) {
; CHECK-NEXT: ret
%v = load <8 x i1>, <8 x i1>* %vp
%sv = load <4 x i1>, <4 x i1>* %svp
%c = call <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 0)
%c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 0)
store <8 x i1> %c, <8 x i1>* %vp
ret void
}
Expand All @@ -427,7 +427,7 @@ define void @insert_v8i1_v4i1_4(<8 x i1>* %vp, <4 x i1>* %svp) {
; CHECK-NEXT: ret
%v = load <8 x i1>, <8 x i1>* %vp
%sv = load <4 x i1>, <4 x i1>* %svp
%c = call <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 4)
%c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 4)
store <8 x i1> %c, <8 x i1>* %vp
ret void
}
Expand All @@ -441,7 +441,7 @@ define <vscale x 2 x i16> @insert_nxv2i16_v2i16_0(<vscale x 2 x i16> %v, <2 x i1
; CHECK-NEXT: vslideup.vi v8, v9, 0
; CHECK-NEXT: ret
%sv = load <2 x i16>, <2 x i16>* %svp
%c = call <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 0)
%c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 0)
ret <vscale x 2 x i16> %c
}

Expand All @@ -454,7 +454,7 @@ define <vscale x 2 x i16> @insert_nxv2i16_v2i16_2(<vscale x 2 x i16> %v, <2 x i1
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: ret
%sv = load <2 x i16>, <2 x i16>* %svp
%c = call <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 4)
%c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 4)
ret <vscale x 2 x i16> %c
}

Expand All @@ -476,7 +476,7 @@ define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %
; CHECK-NEXT: vmsne.vi v0, v9, 0
; CHECK-NEXT: ret
%sv = load <4 x i1>, <4 x i1>* %svp
%c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)
%c = call <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)
ret <vscale x 2 x i1> %c
}

Expand All @@ -489,7 +489,7 @@ define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %
; CHECK-NEXT: vslideup.vi v0, v8, 0
; CHECK-NEXT: ret
%sv = load <8 x i1>, <8 x i1>* %svp
%c = call <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 0)
%c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 0)
ret <vscale x 8 x i1> %c
}

Expand All @@ -502,11 +502,11 @@ define <vscale x 8 x i1> @insert_nxv8i1_v8i1_16(<vscale x 8 x i1> %v, <8 x i1>*
; CHECK-NEXT: vslideup.vi v0, v8, 2
; CHECK-NEXT: ret
%sv = load <8 x i1>, <8 x i1>* %svp
%c = call <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 16)
%c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 16)
ret <vscale x 8 x i1> %c
}

declare <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64)
declare <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64)

define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_v2i64_nxv16i64:
Expand All @@ -520,8 +520,8 @@ define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x
; CHECK-NEXT: ret
%sv0 = load <2 x i64>, <2 x i64>* %psv0
%sv1 = load <2 x i64>, <2 x i64>* %psv1
%v0 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0)
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4)
%v0 = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0)
%v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
Expand All @@ -534,7 +534,7 @@ define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, <vscale x 16 x i64>* %ou
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0)
%v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
Expand All @@ -549,7 +549,7 @@ define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %ou
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2)
%v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
Expand Down Expand Up @@ -585,24 +585,24 @@ define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out
; CHECK-NEXT: addi sp, sp, 64
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
%v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}

declare <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
declare <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)
declare <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
declare <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)

declare <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16>, <2 x i16>, i64)
declare <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16>, <2 x i16>, i64)

declare <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32>, <2 x i32>, i64)
declare <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32>, <2 x i32>, i64)
declare <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32>, <2 x i32>, i64)
declare <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32>, <2 x i32>, i64)

declare <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1>, <4 x i1>, i64)
declare <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1>, <8 x i1>, i64)
declare <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1>, <4 x i1>, i64)
declare <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1>, <8 x i1>, i64)

declare <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>, <2 x i16>, i64)
declare <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>, <2 x i16>, i64)

declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64)
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64)
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64)
declare <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64)
declare <vscale x 8 x i32> @llvm.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64)
declare <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64)
114 changes: 57 additions & 57 deletions llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1266,8 +1266,8 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i64

declare <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0f64(<vscale x 16 x i64*>, i32, <vscale x 16 x i1>, <vscale x 16 x i64>)

declare <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64 %idx)
declare <vscale x 16 x i64*> @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*>, <vscale x 8 x i64*>, i64 %idx)
declare <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64 %idx)
declare <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*>, <vscale x 8 x i64*>, i64 %idx)

define void @mgather_nxv16i64(<vscale x 8 x i64*> %ptrs0, <vscale x 8 x i64*> %ptrs1, <vscale x 16 x i1> %m, <vscale x 8 x i64> %passthru0, <vscale x 8 x i64> %passthru1, <vscale x 16 x i64>* %out) {
; RV32-LABEL: mgather_nxv16i64:
Expand Down Expand Up @@ -1318,11 +1318,11 @@ define void @mgather_nxv16i64(<vscale x 8 x i64*> %ptrs0, <vscale x 8 x i64*> %p
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%p0 = call <vscale x 16 x i64*> @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> undef, <vscale x 8 x i64*> %ptrs0, i64 0)
%p1 = call <vscale x 16 x i64*> @llvm.experimental.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> %p0, <vscale x 8 x i64*> %ptrs1, i64 8)
%p0 = call <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> undef, <vscale x 8 x i64*> %ptrs0, i64 0)
%p1 = call <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> %p0, <vscale x 8 x i64*> %ptrs1, i64 8)

%pt0 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %passthru0, i64 0)
%pt1 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %pt0, <vscale x 8 x i64> %passthru1, i64 8)
%pt0 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %passthru0, i64 0)
%pt1 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %pt0, <vscale x 8 x i64> %passthru1, i64 8)

%v = call <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0f64(<vscale x 16 x i64*> %p1, i32 8, <vscale x 16 x i1> %m, <vscale x 16 x i64> %pt1)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1819,8 +1819,8 @@ define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base,

declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>)

declare <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
declare <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64)
declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
declare <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64)

define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) {
; RV32-LABEL: mscatter_nxv16f64:
Expand Down Expand Up @@ -1863,10 +1863,10 @@ define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: ret
%p0 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0)
%p1 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8)
%v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
%p0 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0)
%p1 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8)
%v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m)
ret void
}
Expand Down Expand Up @@ -1905,8 +1905,8 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vsc
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs
%v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
%v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
ret void
}
Expand Down Expand Up @@ -1945,8 +1945,8 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vs
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
; RV64-NEXT: ret
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
%v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
%v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
%v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,8 @@ define <vscale x 16 x double> @vpload_nxv16f64(<vscale x 16 x double>* %ptr, <vs

declare <vscale x 17 x double> @llvm.vp.load.nxv17f64.p0nxv17f64(<vscale x 17 x double>*, <vscale x 17 x i1>, i32)

declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 1 x double> @llvm.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 16 x double> @llvm.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)

; Note: We can't return <vscale x 17 x double> as that introduces a vector
; store can't yet be legalized through widening. In order to test purely the
Expand Down Expand Up @@ -542,8 +542,8 @@ define <vscale x 16 x double> @vpload_nxv17f64(<vscale x 17 x double>* %ptr, <vs
; CHECK-NEXT: vs1r.v v24, (a1)
; CHECK-NEXT: ret
%load = call <vscale x 17 x double> @llvm.vp.load.nxv17f64.p0nxv17f64(<vscale x 17 x double>* %ptr, <vscale x 17 x i1> %m, i32 %evl)
%lo = call <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %load, i64 0)
%hi = call <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %load, i64 16)
%lo = call <vscale x 16 x double> @llvm.vector.extract.nxv16f64(<vscale x 17 x double> %load, i64 0)
%hi = call <vscale x 1 x double> @llvm.vector.extract.nxv1f64(<vscale x 17 x double> %load, i64 16)
store <vscale x 1 x double> %hi, <vscale x 1 x double>* %out
ret <vscale x 16 x double> %lo
}

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

; llvm.experimental.vector.extract canonicalizes to shufflevector in the fixed case. In the
; llvm.vector.extract canonicalizes to shufflevector in the fixed case. In the
; scalable case, we lower to the EXTRACT_SUBVECTOR ISD node.

declare <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 %idx)
declare <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <10 x i32> @llvm.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <2 x i32> @llvm.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 %idx)
declare <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <8 x i32> @llvm.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 %idx)

; ============================================================================ ;
; Trivial cases
Expand All @@ -20,7 +20,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec) {
; CHECK-LABEL: @trivial_nop(
; CHECK-NEXT: ret <8 x i32> [[VEC:%.*]]
;
%1 = call <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 0)
%1 = call <8 x i32> @llvm.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 0)
ret <8 x i32> %1
}

Expand All @@ -33,7 +33,7 @@ define <2 x i32> @valid_extraction_a(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 0)
%1 = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 0)
ret <2 x i32> %1
}

Expand All @@ -42,7 +42,7 @@ define <2 x i32> @valid_extraction_b(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 2)
%1 = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 2)
ret <2 x i32> %1
}

Expand All @@ -51,7 +51,7 @@ define <2 x i32> @valid_extraction_c(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <2 x i32> <i32 4, i32 5>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 4)
%1 = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 4)
ret <2 x i32> %1
}

Expand All @@ -60,7 +60,7 @@ define <2 x i32> @valid_extraction_d(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <2 x i32> <i32 6, i32 7>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 6)
%1 = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 6)
ret <2 x i32> %1
}

Expand All @@ -69,7 +69,7 @@ define <4 x i32> @valid_extraction_e(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 0)
%1 = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 0)
ret <4 x i32> %1
}

Expand All @@ -78,7 +78,7 @@ define <4 x i32> @valid_extraction_f(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 4)
%1 = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 4)
ret <4 x i32> %1
}

Expand All @@ -87,7 +87,7 @@ define <3 x i32> @valid_extraction_g(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
;
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 0)
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 0)
ret <3 x i32> %1
}

Expand All @@ -96,7 +96,7 @@ define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> poison, <3 x i32> <i32 3, i32 4, i32 5>
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
;
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 3)
%1 = call <3 x i32> @llvm.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 3)
ret <3 x i32> %1
}

Expand All @@ -108,9 +108,9 @@ define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
; EXTRACT_SUBVECTOR ISD node later.
define <4 x i32> @scalable_extract(<vscale x 4 x i32> %vec) {
; CHECK-LABEL: @scalable_extract(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], i64 0)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
%1 = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
ret <4 x i32> %1
}
34 changes: 17 additions & 17 deletions llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

; llvm.experimental.vector.insert canonicalizes to shufflevector in the fixed case. In the
; llvm.vector.insert canonicalizes to shufflevector in the fixed case. In the
; scalable case, we lower to the INSERT_SUBVECTOR ISD node.

declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx)
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx)
declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 %idx)

; ============================================================================ ;
; Trivial cases
Expand All @@ -20,7 +20,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) {
; CHECK-LABEL: @trivial_nop(
; CHECK-NEXT: ret <8 x i32> [[SUBVEC:%.*]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
ret <8 x i32> %1
}

Expand All @@ -34,7 +34,7 @@ define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0)
ret <8 x i32> %1
}

Expand All @@ -44,7 +44,7 @@ define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
ret <8 x i32> %1
}

Expand All @@ -54,7 +54,7 @@ define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4)
ret <8 x i32> %1
}

Expand All @@ -64,7 +64,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6)
ret <8 x i32> %1
}

Expand All @@ -74,7 +74,7 @@ define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
ret <8 x i32> %1
}

Expand All @@ -84,7 +84,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
ret <8 x i32> %1
}

Expand All @@ -94,7 +94,7 @@ define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0)
ret <8 x i32> %1
}

Expand All @@ -104,7 +104,7 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3)
%1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3)
ret <8 x i32> %1
}

Expand All @@ -116,9 +116,9 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
; INSERT_SUBVECTOR ISD node later.
define <vscale x 4 x i32> @scalable_insert(<vscale x 4 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @scalable_insert(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
%1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
ret <vscale x 4 x i32> %1
}
18 changes: 9 additions & 9 deletions llvm/test/Transforms/InstSimplify/extract-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ define <16 x i8> @redundant_insert_extract_chain(<16 x i8> %x) {
; CHECK-LABEL: @redundant_insert_extract_chain(
; CHECK-NEXT: ret <16 x i8> [[X:%.*]]
;
%inserted = call <vscale x 32 x i8> @llvm.experimental.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> %x, i64 0)
%extracted = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv32i8(<vscale x 32 x i8> %inserted, i64 0)
%inserted = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> %x, i64 0)
%extracted = call <16 x i8> @llvm.vector.extract.v16i8.nxv32i8(<vscale x 32 x i8> %inserted, i64 0)
ret <16 x i8> %extracted
}

define <8 x i8> @non_redundant_insert_extract_chain(<16 x i8> %x) {
; CHECK-LABEL: @non_redundant_insert_extract_chain(
; CHECK-NEXT: [[INSERTED:%.*]] = call <vscale x 32 x i8> @llvm.experimental.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> [[X:%.*]], i64 0)
; CHECK-NEXT: [[EXTRACTED:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8> [[INSERTED]], i64 0)
; CHECK-NEXT: [[INSERTED:%.*]] = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> [[X:%.*]], i64 0)
; CHECK-NEXT: [[EXTRACTED:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8> [[INSERTED]], i64 0)
; CHECK-NEXT: ret <8 x i8> [[EXTRACTED]]
;
%inserted = call <vscale x 32 x i8> @llvm.experimental.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> %x, i64 0)
%extracted = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8> %inserted, i64 0)
%inserted = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8> undef, <16 x i8> %x, i64 0)
%extracted = call <8 x i8> @llvm.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8> %inserted, i64 0)
ret <8 x i8> %extracted
}

declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv32i8(<vscale x 32 x i8>, i64)
declare <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8>, i64)
declare <vscale x 32 x i8> @llvm.experimental.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8>, <16 x i8>, i64)
declare <16 x i8> @llvm.vector.extract.v16i8.nxv32i8(<vscale x 32 x i8>, i64)
declare <8 x i8> @llvm.vector.extract.v8i8.nxv32i8(<vscale x 32 x i8>, i64)
declare <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.v16i8(<vscale x 32 x i8>, <16 x i8>, i64)
Loading