diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c index e4c93ade35d53..8206e4f41e44a 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c @@ -10,30 +10,30 @@ // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -46,15 +46,15 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl( @@ -62,15 +62,15 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -83,15 +83,15 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl( @@ -99,15 +99,15 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -120,15 +120,15 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl( @@ -136,15 +136,15 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -157,13 +157,13 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl( @@ -171,13 +171,13 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -189,30 +189,30 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -225,15 +225,15 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl( @@ -241,15 +241,15 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -262,15 +262,15 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl( @@ -278,15 +278,15 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -299,15 +299,15 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl( @@ -315,15 +315,15 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { @@ -336,13 +336,13 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl( @@ -350,13 +350,13 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c index 22a0b9eabaea3..507a544bdadbe 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c @@ -10,30 +10,30 @@ // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPvl( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -46,15 +46,15 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPvl( @@ -62,15 +62,15 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -83,15 +83,15 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPvl( @@ -99,15 +99,15 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -120,15 +120,15 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPvl( @@ -136,15 +136,15 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -157,13 +157,13 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPvl( @@ -171,13 +171,13 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -189,30 +189,30 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPvl( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[TMP0]] -// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP2]], i32 0, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]] +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -225,15 +225,15 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPvl( @@ -241,15 +241,15 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 7 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP3]], i32 1, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -262,15 +262,15 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPvl( @@ -278,15 +278,15 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 3 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP3]], i32 3, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -299,15 +299,15 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPvl( @@ -315,15 +315,15 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: [[TMP6:%.*]] = add i32 [[ADD]], 1 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP3]], i32 7, i32 [[TMP6]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]]) // CHECK-CXX-NEXT: ret void // void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { @@ -336,13 +336,13 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-C-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-C-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-C-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-C-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPvl( @@ -350,13 +350,13 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG]]) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-CXX-NEXT: [[TMP2:%.*]] = shl i64 [[VNUM]], 4 -// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP2]], [[TMP1]] -// CHECK-CXX-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] -// CHECK-CXX-NEXT: [[TMP4:%.*]] = trunc i64 [[VNUM]] to i32 -// CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE]], [[TMP4]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP3]], i32 0, i32 [[TMP5]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP3]], i32 15, i32 [[TMP5]]) +// CHECK-CXX-NEXT: [[SVL:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[SVL]], [[VNUM]] +// CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]] +// CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]]) // CHECK-CXX-NEXT: ret void // void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c index af39be3c8c06e..6471ab424372f 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -390,8 +390,8 @@ svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR // CHECK-LABEL: @test_svld1_vnum_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -399,8 +399,8 @@ svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x2u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -413,8 +413,8 @@ svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnu // CHECK-LABEL: @test_svld1_vnum_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -422,8 +422,8 @@ svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x2u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -436,8 +436,8 @@ svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -445,8 +445,8 @@ svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x2u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -459,8 +459,8 @@ svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -468,8 +468,8 @@ svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x2u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -482,8 +482,8 @@ svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -491,8 +491,8 @@ svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x4u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -505,8 +505,8 @@ svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnu // CHECK-LABEL: @test_svld1_vnum_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -514,8 +514,8 @@ svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x4u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -528,8 +528,8 @@ svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -537,8 +537,8 @@ svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x4u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -551,8 +551,8 @@ svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -560,8 +560,8 @@ svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x4u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -574,8 +574,8 @@ svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t // CHECK-LABEL: @test_svld1_vnum_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -583,8 +583,8 @@ svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x2u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -597,8 +597,8 @@ svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) // CHECK-LABEL: @test_svld1_vnum_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -606,8 +606,8 @@ svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x2u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -620,8 +620,8 @@ svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -629,8 +629,8 @@ svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x2u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -643,8 +643,8 @@ svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -652,8 +652,8 @@ svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x2u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -666,8 +666,8 @@ svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -675,8 +675,8 @@ svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x4u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -689,8 +689,8 @@ svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) // CHECK-LABEL: @test_svld1_vnum_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -698,8 +698,8 @@ svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x4u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -712,8 +712,8 @@ svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -721,8 +721,8 @@ svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x4u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -735,8 +735,8 @@ svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -744,8 +744,8 @@ svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x4u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -758,8 +758,8 @@ svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vn // CHECK-LABEL: @test_svld1_vnum_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -767,8 +767,8 @@ svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x2u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -781,8 +781,8 @@ svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -790,8 +790,8 @@ svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x2u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -804,8 +804,8 @@ svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -813,8 +813,8 @@ svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x2u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -827,8 +827,8 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_mf8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -836,8 +836,8 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -850,8 +850,8 @@ svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -859,8 +859,8 @@ svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x4u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -873,8 +873,8 @@ svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -882,8 +882,8 @@ svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x4u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -896,8 +896,8 @@ svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -905,8 +905,8 @@ svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x4u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -919,8 +919,8 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_ // CHECK-LABEL: @test_svld1_vnum_mf8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -928,8 +928,8 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_ // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 02c7586f15122..cd92b61171802 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -388,8 +388,8 @@ svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR // CHECK-LABEL: @test_svldnt1_vnum_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -397,8 +397,8 @@ svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x2u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -411,8 +411,8 @@ svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t v // CHECK-LABEL: @test_svldnt1_vnum_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -420,8 +420,8 @@ svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x2u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -434,8 +434,8 @@ svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -443,8 +443,8 @@ svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x2u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -457,8 +457,8 @@ svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -466,8 +466,8 @@ svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x2u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -480,8 +480,8 @@ svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -489,8 +489,8 @@ svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x4u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -503,8 +503,8 @@ svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t v // CHECK-LABEL: @test_svldnt1_vnum_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -512,8 +512,8 @@ svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x4u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -526,8 +526,8 @@ svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -535,8 +535,8 @@ svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x4u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -549,8 +549,8 @@ svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -558,8 +558,8 @@ svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x4u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -572,8 +572,8 @@ svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_ // CHECK-LABEL: @test_svldnt1_vnum_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -581,8 +581,8 @@ svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x2u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -595,8 +595,8 @@ svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnu // CHECK-LABEL: @test_svldnt1_vnum_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -604,8 +604,8 @@ svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x2u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -618,8 +618,8 @@ svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -627,8 +627,8 @@ svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x2u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -641,8 +641,8 @@ svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -650,8 +650,8 @@ svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x2u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -664,8 +664,8 @@ svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -673,8 +673,8 @@ svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x4u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -687,8 +687,8 @@ svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnu // CHECK-LABEL: @test_svldnt1_vnum_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -696,8 +696,8 @@ svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x4u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -710,8 +710,8 @@ svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -719,8 +719,8 @@ svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x4u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -733,8 +733,8 @@ svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -742,8 +742,8 @@ svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x4u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -756,8 +756,8 @@ svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t // CHECK-LABEL: @test_svldnt1_vnum_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -765,8 +765,8 @@ svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x2u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -779,8 +779,8 @@ svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -788,8 +788,8 @@ svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x2u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -802,8 +802,8 @@ svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -811,8 +811,8 @@ svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x2u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -825,8 +825,8 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_mf8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , } [[TMP3]] @@ -834,8 +834,8 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , } [[TMP3]] @@ -848,8 +848,8 @@ svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -857,8 +857,8 @@ svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x4u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -871,8 +871,8 @@ svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -880,8 +880,8 @@ svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x4u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -894,8 +894,8 @@ svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -903,8 +903,8 @@ svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x4u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] @@ -917,8 +917,8 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6 // CHECK-LABEL: @test_svldnt1_vnum_mf8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret { , , , } [[TMP3]] @@ -926,8 +926,8 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6 // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret { , , , } [[TMP3]] diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c index 092f31ba8491a..9920abacc1075 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c @@ -388,8 +388,8 @@ void test_svst1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR // CHECK-LABEL: @test_svst1_vnum_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -397,8 +397,8 @@ void test_svst1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -411,8 +411,8 @@ void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_ // CHECK-LABEL: @test_svst1_vnum_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -420,8 +420,8 @@ void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_ // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -434,8 +434,8 @@ void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CHECK-LABEL: @test_svst1_vnum_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -443,8 +443,8 @@ void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -457,8 +457,8 @@ void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CHECK-LABEL: @test_svst1_vnum_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -466,8 +466,8 @@ void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -480,8 +480,8 @@ void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CHECK-LABEL: @test_svst1_vnum_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -489,8 +489,8 @@ void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -503,8 +503,8 @@ void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_ // CHECK-LABEL: @test_svst1_vnum_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -512,8 +512,8 @@ void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_ // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -526,8 +526,8 @@ void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CHECK-LABEL: @test_svst1_vnum_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -535,8 +535,8 @@ void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -549,8 +549,8 @@ void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CHECK-LABEL: @test_svst1_vnum_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -558,8 +558,8 @@ void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -572,8 +572,8 @@ void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CHECK-LABEL: @test_svst1_vnum_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -581,8 +581,8 @@ void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -595,8 +595,8 @@ void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t // CHECK-LABEL: @test_svst1_vnum_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -604,8 +604,8 @@ void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -618,8 +618,8 @@ void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2 // CHECK-LABEL: @test_svst1_vnum_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -627,8 +627,8 @@ void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -641,8 +641,8 @@ void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2 // CHECK-LABEL: @test_svst1_vnum_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -650,8 +650,8 @@ void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -664,8 +664,8 @@ void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2 // CHECK-LABEL: @test_svst1_vnum_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -673,8 +673,8 @@ void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2 // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -687,8 +687,8 @@ void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t // CHECK-LABEL: @test_svst1_vnum_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -696,8 +696,8 @@ void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -710,8 +710,8 @@ void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4 // CHECK-LABEL: @test_svst1_vnum_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -719,8 +719,8 @@ void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -733,8 +733,8 @@ void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4 // CHECK-LABEL: @test_svst1_vnum_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -742,8 +742,8 @@ void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4 // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -757,8 +757,8 @@ void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4 // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -767,8 +767,8 @@ void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4 // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -782,8 +782,8 @@ void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svflo // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -792,8 +792,8 @@ void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -807,8 +807,8 @@ void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svflo // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -817,8 +817,8 @@ void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -831,8 +831,8 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_mf8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -840,8 +840,8 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -855,8 +855,8 @@ void test_svst1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloa // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -865,8 +865,8 @@ void test_svst1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfloa // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -880,8 +880,8 @@ void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svflo // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -890,8 +890,8 @@ void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -905,8 +905,8 @@ void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svflo // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -915,8 +915,8 @@ void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -929,8 +929,8 @@ void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_mf8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -938,8 +938,8 @@ void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svflo // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c index 99dff2c0a5ab2..90045b058bb6a 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -409,8 +409,8 @@ void test_svstnt1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_vnum_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -418,8 +418,8 @@ void test_svstnt1_mf8_x4(svcount_t pn, mfloat8_t *base, svmfloat8x4_t v) ATTR // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -433,8 +433,8 @@ void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CHECK-LABEL: @test_svstnt1_vnum_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -442,8 +442,8 @@ void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -457,8 +457,8 @@ void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -466,8 +466,8 @@ void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -481,8 +481,8 @@ void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -490,8 +490,8 @@ void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -505,8 +505,8 @@ void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -514,8 +514,8 @@ void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -529,8 +529,8 @@ void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CHECK-LABEL: @test_svstnt1_vnum_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -538,8 +538,8 @@ void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -553,8 +553,8 @@ void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -562,8 +562,8 @@ void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -577,8 +577,8 @@ void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -586,8 +586,8 @@ void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -601,8 +601,8 @@ void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -610,8 +610,8 @@ void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -625,8 +625,8 @@ void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_ // CHECK-LABEL: @test_svstnt1_vnum_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -634,8 +634,8 @@ void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_ // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -649,8 +649,8 @@ void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CHECK-LABEL: @test_svstnt1_vnum_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -658,8 +658,8 @@ void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -673,8 +673,8 @@ void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CHECK-LABEL: @test_svstnt1_vnum_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -682,8 +682,8 @@ void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -697,8 +697,8 @@ void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CHECK-LABEL: @test_svstnt1_vnum_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -706,8 +706,8 @@ void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -721,8 +721,8 @@ void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_ // CHECK-LABEL: @test_svstnt1_vnum_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -730,8 +730,8 @@ void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_ // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -745,8 +745,8 @@ void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CHECK-LABEL: @test_svstnt1_vnum_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -754,8 +754,8 @@ void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -769,8 +769,8 @@ void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CHECK-LABEL: @test_svstnt1_vnum_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -778,8 +778,8 @@ void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -794,8 +794,8 @@ void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -804,8 +804,8 @@ void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -820,8 +820,8 @@ void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svf // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -830,8 +830,8 @@ void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -846,8 +846,8 @@ void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svf // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -856,8 +856,8 @@ void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -870,8 +870,8 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_mf8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -879,8 +879,8 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x2u11__SVCount_tPu6__mfp8l13svmfloat8x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -894,8 +894,8 @@ void test_svstnt1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfl // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -904,8 +904,8 @@ void test_svstnt1_vnum_mf8_x2(svcount_t pn, mfloat8_t *base, int64_t vnum, svmfl // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -920,8 +920,8 @@ void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svf // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -930,8 +930,8 @@ void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -946,8 +946,8 @@ void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svf // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -956,8 +956,8 @@ void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[CONV]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void @@ -970,8 +970,8 @@ void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_mf8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void @@ -979,8 +979,8 @@ void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svf // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_mf8_x4u11__SVCount_tPu6__mfp8l13svmfloat8x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 96da698538314..8856eda250ed6 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1983,16 +1983,16 @@ def int_experimental_vector_match : DefaultAttrsIntrinsic< [ llvm_anyvector_ty, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], // Mask - [ IntrNoMem ]>; + [ IntrNoMem, IntrSpeculatable ]>; // Extract based on mask bits def int_experimental_vector_extract_last_active: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMVectorElementType<0>], [IntrNoMem]>; + LLVMVectorElementType<0>], [IntrNoMem, IntrSpeculatable]>; // Operators -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { // Integer arithmetic def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, @@ -2039,26 +2039,6 @@ let IntrProperties = [IntrNoMem] in { LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; def int_vp_abs : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, llvm_i1_ty, @@ -2390,7 +2370,29 @@ let IntrProperties = [IntrNoMem] in { llvm_i32_ty]>; } -let IntrProperties = [IntrNoMem, ImmArg>] in { +// Integer VP division and remainder: not speculatable. +def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], [IntrNoMem]>; +def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], [IntrNoMem]>; +def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], [IntrNoMem]>; +def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], [IntrNoMem]>; + +let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg>] in { def int_vp_ctlz : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, llvm_i1_ty, @@ -2422,18 +2424,18 @@ def int_loop_dependence_war_mask: def int_get_active_lane_mask: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyint_ty, LLVMMatchType<1>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_experimental_get_vector_length: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_anyint_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; def int_experimental_cttz_elts: DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyvector_ty, llvm_i1_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>]>; def int_experimental_vp_splice: DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -2442,21 +2444,21 @@ def int_experimental_vp_splice: llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>]>; def int_experimental_vp_reverse: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_experimental_vp_splat: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_vp_is_fpclass: DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], @@ -2753,16 +2755,22 @@ def int_preserve_static_offset : DefaultAttrsIntrinsic<[llvm_ptr_ty], def int_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, + IntrSpeculatable]>; def int_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + [IntrNoMem, + IntrSpeculatable, + ImmArg>]>; //===---------- Intrinsics to query properties of scalable vectors --------===// -def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, + IntrSpeculatable]>; //===---------- Intrinsics to perform subvector insertion/extraction ------===// def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -2776,18 +2784,22 @@ def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], foreach n = 2...8 in { def int_vector_interleave#n : DefaultAttrsIntrinsic<[llvm_anyvector_ty], !listsplat(LLVMOneNthElementsVectorType<0, n>, n), - [IntrNoMem]>; + [IntrNoMem, + IntrSpeculatable]>; def int_vector_deinterleave#n : DefaultAttrsIntrinsic, n), [llvm_anyvector_ty], - [IntrNoMem]>; + [IntrNoMem, + IntrSpeculatable]>; } //===-------------- Intrinsics to perform partial reduction ---------------===// def int_vector_partial_reduce_add : DefaultAttrsIntrinsic<[LLVMMatchType<0>], - [llvm_anyvector_ty, llvm_anyvector_ty], - [IntrNoMem]>; + [llvm_anyvector_ty, + llvm_anyvector_ty], + [IntrNoMem, + IntrSpeculatable]>; //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/llvm/test/Transforms/LICM/vector-intrinsics.ll b/llvm/test/Transforms/LICM/vector-intrinsics.ll new file mode 100644 index 0000000000000..351773e63de9d --- /dev/null +++ b/llvm/test/Transforms/LICM/vector-intrinsics.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='loop-mssa(licm)' -verify-memoryssa %s | FileCheck %s + +define i32 @reduce_umax(<2 x i32> %inv, i1 %c) { +; CHECK-LABEL: define i32 @reduce_umax( +; CHECK-SAME: <2 x i32> [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[REDUCE_UMAX:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[INV]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[REDUCE_UMAX]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %reduce.umax = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %inv) + %backedge.cond = icmp ult i32 %iv, %reduce.umax + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_umax(<2 x i32> %inv.l, <2 x i32> %inv.r, i1 %c) { +; CHECK-LABEL: define i32 @vp_umax( +; CHECK-SAME: <2 x i32> [[INV_L:%.*]], <2 x i32> [[INV_R:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[VP_UMAX:%.*]] = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> [[INV_L]], <2 x i32> [[INV_R]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UMAX]], i32 0 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.umax = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %inv.l, <2 x i32> %inv.r, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.umax, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_udiv(<2 x i32> %inv.q, <2 x i32> %inv.d, i1 %c) { +; CHECK-LABEL: define i32 @vp_udiv( +; CHECK-SAME: <2 x i32> [[INV_Q:%.*]], <2 x i32> [[INV_D:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_UDIV:%.*]] = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> [[INV_Q]], <2 x i32> [[INV_D]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UDIV]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.udiv = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %inv.q, <2 x i32> %inv.d, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.udiv, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_load(ptr %inv, i1 %c) { +; CHECK-LABEL: define i32 @vp_load( +; CHECK-SAME: ptr [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_LOAD:%.*]] = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr [[INV]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_LOAD]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.load = call <2 x i32> @llvm.vp.load.v2i32(ptr %inv, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.load, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_store(<2 x i32> %inv.v, ptr %inv.p, i1 %c) { +; CHECK-LABEL: define i32 @vp_store( +; CHECK-SAME: <2 x i32> [[INV_V:%.*]], ptr [[INV_P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: call void @llvm.vp.store.v2i32.p0(<2 x i32> [[INV_V]], ptr [[INV_P]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], 10 +; CHECK-NEXT: br i1 [[BACKEDGE_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + call void @llvm.vp.store.v2i32(<2 x i32> %inv.v, ptr %inv.p, <2 x i1> splat (i1 1), i32 2) + %backedge.cond = icmp ult i32 %iv, 10 + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll index d73900dcd0bea..83b494a1be0d3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll @@ -2288,7 +2288,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { } ;. ; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVrNxv_acos(Sleef_acosdx_u10rvvm2)" } ; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVrNxv_acosf(Sleef_acosfx_u10rvvm2)" } ; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVrNxv_acosh(Sleef_acoshdx_u10rvvm2)" } diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll index 9acc6d6601292..09f583f9242d5 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll @@ -39,5 +39,4 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 declare @llvm.exp.nxv4f32() #0 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }