diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b6cbecb6133f4..10b03bbcd33dc 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -226,6 +226,7 @@ static const Align kMinOriginAlignment = Align(4); static const Align kShadowTLSAlignment = Align(8); // These constants must be kept in sync with the ones in msan.h. +// TODO: increase size to match SVE/SVE2/SME/SME2 limits static const unsigned kParamTLSSize = 800; static const unsigned kRetvalTLSSize = 800; @@ -1544,6 +1545,22 @@ struct MemorySanitizerVisitor : public InstVisitor { } } + static bool isAArch64SVCount(Type *Ty) { + if (TargetExtType *TTy = dyn_cast(Ty)) + return TTy->getName() == "aarch64.svcount"; + return false; + } + + // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka + // 'target("aarch64.svcount")', but not e.g., . + static bool isScalableNonVectorType(Type *Ty) { + if (!isAArch64SVCount(Ty)) + LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty + << "\n"); + + return Ty->isScalableTy() && !isa(Ty); + } + void materializeChecks() { #ifndef NDEBUG // For assert below. @@ -1672,6 +1689,12 @@ struct MemorySanitizerVisitor : public InstVisitor { LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } + if (isScalableNonVectorType(OrigTy)) { + LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy + << "\n"); + return OrigTy; + } + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -2185,8 +2208,14 @@ struct MemorySanitizerVisitor : public InstVisitor { << *OrigIns << "\n"); return; } -#ifndef NDEBUG + Type *ShadowTy = Shadow->getType(); + if (isScalableNonVectorType(ShadowTy)) { + LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow + << " before " << *OrigIns << "\n"); + return; + } +#ifndef NDEBUG assert((isa(ShadowTy) || isa(ShadowTy) || isa(ShadowTy) || isa(ShadowTy)) && "Can only insert checks for integer, vector, and aggregate shadow " @@ -6972,6 +7001,15 @@ struct MemorySanitizerVisitor : public InstVisitor { // an extra "select". This results in much more compact IR. // Sa = select Sb, poisoned, (select b, Sc, Sd) Sa1 = getPoisonedShadow(getShadowTy(I.getType())); + } else if (isScalableNonVectorType(I.getType())) { + // This is intended to handle target("aarch64.svcount"), which can't be + // handled in the else branch because of incompatibility with CreateXor + // ("The supported LLVM operations on this type are limited to load, + // store, phi, select and alloca instructions"). + + // TODO: this currently underapproximates. Use Arm SVE EOR in the else + // branch as needed instead. + Sa1 = getCleanShadow(getShadowTy(I.getType())); } else { // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] // If Sb (condition is poisoned), look for bits in c and d that are equal diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll index 1c869bd41b931..e7491e985fa26 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll @@ -1,14 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll -; Manually minimized to show MSan leads to a compiler crash target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { +; CHECK-LABEL: @test_return_arg1( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]] +; ret target("aarch64.svcount") %arg1 } diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll index 00cf3204464d0..e1ea9e68aefc3 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -12,16 +10,49 @@ target triple = "aarch64--linux-android9001" ; Test simple loads, stores and return. ; define target("aarch64.svcount") @test_load(ptr %ptr) nounwind { +; CHECK-LABEL: @test_load( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR:%.*]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[RES]] +; %res = load target("aarch64.svcount"), ptr %ptr ret target("aarch64.svcount") %res } define void @test_store(ptr %ptr, target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_store( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP3]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2 +; CHECK-NEXT: ret void +; store target("aarch64.svcount") %val, ptr %ptr ret void } define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcount") %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[PTR:%.*]] = alloca target("aarch64.svcount"), align 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP5]], i8 0, i64 [[TMP2]], i1 false) +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[TMP8]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") [[VAL:%.*]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[RES:%.*]] = load target("aarch64.svcount"), ptr [[PTR]], align 2 +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[RES]] +; %ptr = alloca target("aarch64.svcount"), align 1 store target("aarch64.svcount") %val, ptr %ptr %res = load target("aarch64.svcount"), ptr %ptr @@ -33,10 +64,20 @@ define target("aarch64.svcount") @test_alloca_store_reload(target("aarch64.svcou ; define target("aarch64.svcount") @test_return_arg1(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1) nounwind { +; CHECK-LABEL: @test_return_arg1( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG1:%.*]] +; ret target("aarch64.svcount") %arg1 } define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) nounwind { +; CHECK-LABEL: @test_return_arg4( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[ARG4:%.*]] +; ret target("aarch64.svcount") %arg4 } @@ -46,22 +87,58 @@ define target("aarch64.svcount") @test_return_arg4(target("aarch64.svcount") %ar declare void @take_svcount_1(target("aarch64.svcount") %arg) define void @test_pass_1arg(target("aarch64.svcount") %arg) nounwind { +; CHECK-LABEL: @test_pass_1arg( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @take_svcount_1(target("aarch64.svcount") [[ARG:%.*]]) +; CHECK-NEXT: ret void +; call void @take_svcount_1(target("aarch64.svcount") %arg) ret void } declare void @take_svcount_5(target("aarch64.svcount") %arg0, target("aarch64.svcount") %arg1, target("aarch64.svcount") %arg2, target("aarch64.svcount") %arg3, target("aarch64.svcount") %arg4) define void @test_pass_5args(target("aarch64.svcount") %arg) nounwind { +; CHECK-LABEL: @test_pass_5args( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @take_svcount_5(target("aarch64.svcount") [[ARG:%.*]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]], target("aarch64.svcount") [[ARG]]) +; CHECK-NEXT: ret void +; call void @take_svcount_5(target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg, target("aarch64.svcount") %arg) ret void } define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i1 %cmp) sanitize_memory { +; CHECK-LABEL: @test_sel( +; CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[CMP:%.*]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP1]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP2]] +; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]] +; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]] +; %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y ret target("aarch64.svcount") %x.y } define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, target("aarch64.svcount") %y, i32 %k) sanitize_memory { +; CHECK-LABEL: @test_sel_cc( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[K:%.*]], -2147483648 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP4]], -2147483606 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], -2147483606 +; CHECK-NEXT: [[TMP8:%.*]] = xor i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[K]], 42 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[CMP]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[TMP8]], target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") [[TMP9]] +; CHECK-NEXT: [[X_Y:%.*]] = select i1 [[CMP]], target("aarch64.svcount") [[X:%.*]], target("aarch64.svcount") [[Y:%.*]] +; CHECK-NEXT: store target("aarch64.svcount") [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret target("aarch64.svcount") [[X_Y]] +; %cmp = icmp sgt i32 %k, 42 %x.y = select i1 %cmp, target("aarch64.svcount") %x, target("aarch64.svcount") %y ret target("aarch64.svcount") %x.y diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll index 3f43efa233621..3ae73c5719c3a 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll ; Manually reduced to show MSan leads to a compiler crash @@ -10,6 +8,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: ret void +; %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %2 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr) ret void diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll index cd04373c11d20..8d00b930abf95 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s - -; XFAIL: * +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s | FileCheck %s ; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -9,6 +7,27 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android9001" define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, %zm) @@ -20,6 +39,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, %zm) @@ -32,6 +72,27 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_single_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm) sanitize_memory { call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice, @@ -47,6 +108,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM]]) +; CHECK-NEXT: ret void +; %zn0, %zn1, %zn2, %zn3, %zm) sanitize_memory { @@ -64,6 +146,27 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM1:%.*]], [[ZM2:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM1]], [[ZM2]]) +; CHECK-NEXT: ret void +; %zm1, %zm2) sanitize_memory { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1, @@ -77,6 +180,27 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZM1:%.*]], [[ZM2:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZM1]], [[ZM2]]) +; CHECK-NEXT: ret void +; %zm1, %zm2) sanitize_memory { call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1, @@ -91,6 +215,27 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM0:%.*]], [[ZM1:%.*]], [[ZM2:%.*]], [[ZM3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM0]], [[ZM1]], [[ZM2]], [[ZM3]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) sanitize_memory { @@ -109,6 +254,27 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, } define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: @multi_vector_add_write_za_vg1x4_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]], [[ZM0:%.*]], [[ZM1:%.*]], [[ZM2:%.*]], [[ZM3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]], [[ZM0]], [[ZM1]], [[ZM2]], [[ZM3]]) +; CHECK-NEXT: ret void +; %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) sanitize_memory { @@ -127,6 +293,27 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, } define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, %zn0, %zn1) @@ -134,6 +321,27 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, %zn0, %zn1) @@ -141,6 +349,27 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 @@ -150,6 +379,27 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0 } define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice, %zn0, %zn1) %slice.7 = add i32 %slice, 7 @@ -159,6 +409,36 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn } define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x2_f64_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP5]], 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]] +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , } [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , } [[TMP10]], 1 +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, [[TMP6]], [[TMP11]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, [[TMP7]], [[TMP12]]) +; CHECK-NEXT: ret void +; entry: %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr) @@ -175,6 +455,27 @@ entry: define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_i32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -186,6 +487,27 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_i64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -197,6 +519,27 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, } define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -208,6 +551,73 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0 } define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f32_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[PTR:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP5]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP5]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP5]], 3 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]] +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX2]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP12]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP12]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP12]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , , , } [[TMP12]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], 0 +; CHECK-NEXT: [[MUL3:%.*]] = shl i64 [[STRIDE]], 1 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP0]], [[TMP18]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL3]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 20: +; CHECK-NEXT: [[TMP21:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX4]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { , , , } [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { , , , } [[TMP21]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { , , , } [[TMP21]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { , , , } [[TMP21]], 3 +; CHECK-NEXT: [[MSPROP_MUL_CST:%.*]] = mul i64 [[TMP1]], 1 +; CHECK-NEXT: [[MUL5:%.*]] = mul i64 [[STRIDE]], 3 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[TMP0]], [[MSPROP_MUL_CST]] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[MUL5]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[_MSPROP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[TMP2]], ptr [[ARRAYIDX6]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractvalue { , , , } [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = extractvalue { , , , } [[TMP28]], 1 +; CHECK-NEXT: [[TMP31:%.*]] = extractvalue { , , , } [[TMP28]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = extractvalue { , , , } [[TMP28]], 3 +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP6]], [[TMP13]], [[TMP22]], [[TMP29]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP7]], [[TMP14]], [[TMP23]], [[TMP30]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP8]], [[TMP15]], [[TMP24]], [[TMP31]]) +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, [[TMP9]], [[TMP16]], [[TMP25]], [[TMP32]]) +; CHECK-NEXT: ret void +; entry: %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr) @@ -243,6 +653,27 @@ entry: } define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) sanitize_memory { +; CHECK-LABEL: @multi_vector_add_za_vg1x4_f64( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE:%.*]], [[ZN0:%.*]], [[ZN1:%.*]], [[ZN2:%.*]], [[ZN3:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], 0 +; CHECK-NEXT: [[SLICE_7:%.*]] = add i32 [[SLICE]], 7 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSPROP]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_7]], [[ZN0]], [[ZN1]], [[ZN2]], [[ZN3]]) +; CHECK-NEXT: ret void +; call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) @@ -255,6 +686,12 @@ define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn define { , } @multi_vec_add_single_x2_s8( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s8( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( %zdn1, %zdn2, %zm) @@ -262,6 +699,12 @@ define { , } @multi_vec_add_single_x2_s8(, } @multi_vec_add_single_x2_s16( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s16( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( %zdn1, %zdn2, %zm) @@ -269,6 +712,12 @@ define { , } @multi_vec_add_single_x2_s16(< } define { , } @multi_vec_add_single_x2_s32( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( %zdn1, %zdn2, %zm) @@ -276,6 +725,12 @@ define { , } @multi_vec_add_single_x2_s32(< } define { , } @multi_vec_add_single_x2_s64( %unused, %zdn1, %zdn2, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x2_s64( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , } [[RES]] +; %res = call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( %zdn1, %zdn2, %zm) @@ -284,6 +739,12 @@ define { , } @multi_vec_add_single_x2_s64(< define { , , , } @multi_vec_add_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_single_x4_s8( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -292,6 +753,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s16( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -300,6 +767,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -308,6 +781,12 @@ define { , , , , , , } @multi_vec_add_x4_single_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) sanitize_memory { +; CHECK-LABEL: @multi_vec_add_x4_single_s64( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZDN1:%.*]], [[ZDN2:%.*]], [[ZDN3:%.*]], [[ZDN4:%.*]], [[ZM:%.*]]) +; CHECK-NEXT: store { , , , } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { , , , } [[RES]] +; %res = call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4,