-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SME]Update intrinsic interface for ld1/st1 #65582
[AArch64][SME]Update intrinsic interface for ld1/st1 #65582
Conversation
The new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the llvm#1 of 3 patches to update the interface. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/files
@llvm/pr-subscribers-clang ChangesThe new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the #1 of 3 patches to update the interface. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/filesPatch is 41.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/65582.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 0eb1e647bf03eaa..d176aeb2d2b4f00 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -20,29 +20,29 @@ include "arm_sve_sme_incl.td" multiclass ZALoad ch> { let TargetGuard = "sme" in { - def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimiPQ", t, + def NAME # _H : MInst<"svld1_hor_" # n_suffix, "vimPQ", t, [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimiPQl", t, + def NAME # _H_VNUM : MInst<"svld1_hor_vnum_" # n_suffix, "vimPQl", t, [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimiPQ", t, + def NAME # _V : MInst<"svld1_ver_" # n_suffix, "vimPQ", t, [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, i_prefix # "_vert", ch>; - def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimiPQl", t, + def NAME # _V_VNUM : MInst<"svld1_ver_vnum_" # n_suffix, "vimPQl", t, [IsLoad, IsOverloadNone, IsStreaming, IsSharedZA], MemEltTyDefault, i_prefix # "_vert", ch>; } } -defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVLD1_ZA8 : ZALoad<"za8", "c", "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0_0>]>; +defm SVLD1_ZA16 : ZALoad<"za16", "s", "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>]>; +defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>]>; +defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>]>; +defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>]>; def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQi", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], @@ -58,29 +58,29 @@ def SVLDR_ZA : MInst<"svldr_za", "vmQ", "", multiclass ZAStore ch> { let TargetGuard = "sme" in { - def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimiP%", t, + def NAME # _H : MInst<"svst1_hor_" # n_suffix, "vimP%", t, [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimiP%l", t, + def NAME # _H_VNUM : MInst<"svst1_hor_vnum_" # n_suffix, "vimP%l", t, [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, i_prefix # "_horiz", ch>; - def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimiP%", t, + def NAME # _V : MInst<"svst1_ver_" # n_suffix, "vimP%", t, [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, i_prefix # "_vert", ch>; - def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimiP%l", t, + def NAME # _V_VNUM : MInst<"svst1_ver_vnum_" # n_suffix, "vimP%l", t, [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, i_prefix # "_vert", ch>; } } -defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; -defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; -defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; -defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; -defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; +defm SVST1_ZA8 : ZAStore<"za8", "c", "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0_0>]>; +defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>]>; +defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>]>; +defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>]>; +defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>]>; def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%i", "", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1ee0c469af9ee8b..8414a6c59cc82bd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9585,29 +9585,29 @@ Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - Ops[3] = EmitSVEPredicateCast( - Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); + Ops[2] = EmitSVEPredicateCast( + Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); SmallVector NewOps; - NewOps.push_back(Ops[3]); + NewOps.push_back(Ops[2]); - llvm::Value *BasePtr = Ops[4]; + llvm::Value *BasePtr = Ops[3]; // If the intrinsic contains the vnum parameter, multiply it with the vector // size in bytes. - if (Ops.size() == 6) { + if (Ops.size() == 5) { Function *StreamingVectorLength = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); llvm::Value *StreamingVectorLengthCall = Builder.CreateCall(StreamingVectorLength); llvm::Value *Mulvl = - Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl"); + Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); // The type of the ptr parameter is void *, so use Int8Ty here. - BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl); + BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); } NewOps.push_back(BasePtr); NewOps.push_back(Ops[0]); - NewOps.push_back(EmitTileslice(Ops[2], Ops[1])); + NewOps.push_back(Ops[1]); Function *F = CGM.getIntrinsic(IntID); return Builder.CreateCall(F, NewOps); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c index c309bde627f7df2..d88f583011d1139 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -20,8 +20,10 @@ // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za8(0, slice_base, 0, pg, ptr); - svld1_hor_za8(0, slice_base, 15, pg, ptr); + uint32_t slice = slice_base; + svld1_hor_za8(0, slice, pg, ptr); + slice = slice_base + 15; + svld1_hor_za8(0, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za16( @@ -34,8 +36,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, con // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za16(0, slice_base, 0, pg, ptr); - svld1_hor_za16(1, slice_base, 7, pg, ptr); + uint32_t slice = slice_base; + svld1_hor_za16(0, slice, pg, ptr); + slice = slice_base + 7; + svld1_hor_za16(1, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za32( @@ -48,8 +52,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za32(0, slice_base, 0, pg, ptr); - svld1_hor_za32(3, slice_base, 3, pg, ptr); + uint32_t slice = slice_base; + svld1_hor_za32(0, slice, pg, ptr); + slice = slice_base + 3; + svld1_hor_za32(3, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za64( @@ -62,8 +68,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za64(0, slice_base, 0, pg, ptr); - svld1_hor_za64(7, slice_base, 1, pg, ptr); + uint32_t slice = slice_base; + svld1_hor_za64(0, slice, pg, ptr); + slice = slice_base + 1; + svld1_hor_za64(7, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_hor_za128( @@ -75,8 +83,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_hor_za128(0, slice_base, 0, pg, ptr); - svld1_hor_za128(15, slice_base, 0, pg, ptr); + uint32_t slice = slice_base; + svld1_hor_za128(0, slice, pg, ptr); + svld1_hor_za128(15, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za8( @@ -88,8 +97,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, c // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za8(0, slice_base, 0, pg, ptr); - svld1_ver_za8(0, slice_base, 15, pg, ptr); + uint32_t slice = slice_base; + svld1_ver_za8(0, slice, pg, ptr); + slice = slice_base + 15; + svld1_ver_za8(0, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za16( @@ -102,8 +113,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, con // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za16(0, slice_base, 0, pg, ptr); - svld1_ver_za16(1, slice_base, 7, pg, ptr); + uint32_t slice = slice_base; + svld1_ver_za16(0, slice, pg, ptr); + slice = slice_base + 7; + svld1_ver_za16(1, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za32( @@ -116,8 +129,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za32(0, slice_base, 0, pg, ptr); - svld1_ver_za32(3, slice_base, 3, pg, ptr); + uint32_t slice = slice_base; + svld1_ver_za32(0, slice, pg, ptr); + slice = slice_base + 3; + svld1_ver_za32(3, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za64( @@ -130,8 +145,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za64(0, slice_base, 0, pg, ptr); - svld1_ver_za64(7, slice_base, 1, pg, ptr); + uint32_t slice = slice_base; + svld1_ver_za64(0, slice, pg, ptr); + slice = slice_base + 1; + svld1_ver_za64(7, slice, pg, ptr); } // CHECK-C-LABEL: @test_svld1_ver_za128( @@ -143,6 +160,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, co // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) { - svld1_ver_za128(0, slice_base, 0, pg, ptr); - svld1_ver_za128(15, slice_base, 0, pg, ptr); + uint32_t slice = slice_base; + svld1_ver_za128(0, slice, pg, ptr); + svld1_ver_za128(15, slice, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c index 6c80ef55f81895d..8459358c4132ee8 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -23,8 +23,10 @@ // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za8(0, slice_base, 15, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_hor_vnum_za8(0, slice, pg, ptr, vnum); + slice = slice_base + 15; + svld1_hor_vnum_za8(0, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za16( @@ -40,8 +42,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za16(1, slice_base, 7, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_hor_vnum_za16(0, slice, pg, ptr, vnum); + slice = slice_base + 7; + svld1_hor_vnum_za16(1, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za32( @@ -57,8 +61,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za32(3, slice_base, 3, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_hor_vnum_za32(0, slice, pg, ptr, vnum); + slice = slice_base + 3; + svld1_hor_vnum_za32(3, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za64( @@ -74,8 +80,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za64(7, slice_base, 1, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_hor_vnum_za64(0, slice, pg, ptr, vnum); + slice = slice_base + 1; + svld1_hor_vnum_za64(7, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_hor_vnum_za128( @@ -90,8 +98,9 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_hor_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svld1_hor_vnum_za128(15, slice_base, 0, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_hor_vnum_za128(0, slice, pg, ptr, vnum); + svld1_hor_vnum_za128(15, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_hor_za8( @@ -106,8 +115,10 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za8(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za8(0, slice_base, 15, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_ver_vnum_za8(0, slice, pg, ptr, vnum); + slice = slice_base + 15; + svld1_ver_vnum_za8(0, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za16( @@ -123,8 +134,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za16(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za16(1, slice_base, 7, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_ver_vnum_za16(0, slice, pg, ptr, vnum); + slice = slice_base + 7; + svld1_ver_vnum_za16(1, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za32( @@ -140,8 +153,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za32(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za32(3, slice_base, 3, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_ver_vnum_za32(0, slice, pg, ptr, vnum); + slice = slice_base + 3; + svld1_ver_vnum_za32(3, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za64( @@ -157,8 +172,10 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za64(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za64(7, slice_base, 1, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_ver_vnum_za64(0, slice, pg, ptr, vnum); + slice = slice_base + 1; + svld1_ver_vnum_za64(7, slice, pg, ptr, vnum); } // CHECK-C-LABEL: @test_svld1_ver_vnum_za128( @@ -173,6 +190,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) { - svld1_ver_vnum_za128(0, slice_base, 0, pg, ptr, vnum); - svld1_ver_vnum_za128(15, slice_base, 0, pg, ptr, vnum); + uint32_t slice = slice_base; + svld1_ver_vnum_za128(0, slice, pg, ptr, vnum); + svld1_ver_vnum_za128(15, slice, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c index 067745f7d4a0592..744f3325d4b932e 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -20,8 +20,10 @@ // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za8(0, slice_base, 0, pg, ptr); - svst1_hor_za8(0, slice_base, 15, pg, ptr); + uint32_t slice = slice_base; + svst1_hor_za8(0, slice, pg, ptr); + slice = slice_base + 15; + svst1_hor_za8(0, slice, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za16( @@ -34,8 +36,10 @@ ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, voi // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za16(0, slice_base, 0, pg, ptr); - svst1_hor_za16(1, slice_base, 7, pg, ptr); + uint32_t slice = slice_base; + svst1_hor_za16(0, slice, pg, ptr); + slice = slice_base + 7; + svst1_hor_za16(1, slice, pg, ptr); } // CHECK-C-LABEL: @test_svst1_hor_za32( @@ -48,8 +52,10 @@ ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, vo // CHECK-NEXT: ret void // ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) { - svst1_hor_za32(0, slice_base, 0, pg, ptr); - svst1_hor_za32(3, slice_base, 3, pg, ptr); + uint32_t slice = slice_ba... |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nit addressed!
The new ACLE PR#225[1] now combines the slice parameters for some builtins. Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset [1]https://github.com/ARM-software/acle/pull/225/files
The new ACLE PR#225[1] now combines the slice parameters for some builtins. This patch is the #1 of 3 patches to update the interface.
Slice specifies the ZA slice number directly and needs to be explicity implemented by the "user" with the base register plus the immediate offset
[1]https://github.com/ARM-software/acle/pull/225/files