diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index dcce325188bc4..91f62c4c76339 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1988,79 +1988,61 @@ def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +} + +multiclass MultiVecLoad { + // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; +} + +let TargetGuard = "sve2p1|sme2" in { + defm LD1 : MultiVecLoad<"ld1">; + defm LDNT1 : MultiVecLoad<"ldnt1">; +} + +multiclass MultiVecStore { + // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; +} -def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1W_X2 : MInst<"svld1[_{2}]_x2", "2}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1D_X2 : MInst<"svld1[_{2}]_x2", "2}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1B_X4 : MInst<"svld1[_{2}]_x4", "4}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1H_X4 : MInst<"svld1[_{2}]_x4", "4}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1W_X4 : MInst<"svld1[_{2}]_x4", "4}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1D_X4 : MInst<"svld1[_{2}]_x4", "4}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; - -def SVLDNT1B_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1H_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1W_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1D_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1B_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1H_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1W_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1D_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; - -def SVLD1B_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1H_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1W_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1D_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; -def SVLD1B_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1H_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1W_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; -def SVLD1D_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">; - -def SVLDNT1B_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1H_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1W_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1D_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">; -def SVLDNT1B_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1H_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1W_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; -def SVLDNT1D_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">; - -def SVST1B_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1H_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1W_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1D_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1B_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1H_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1W_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1D_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; - -def SVST1B_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1H_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1W_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1D_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">; -def SVST1B_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1H_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1W_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; -def SVST1D_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">; - -def SVSTNT1B_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1H_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1W_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1D_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1B_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1H_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1W_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1D_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; - -def SVSTNT1B_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1H_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1W_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1D_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">; -def SVSTNT1B_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1H_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1W_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; -def SVSTNT1D_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">; +let TargetGuard = "sve2p1|sme2" in { + defm ST1 : MultiVecStore<"st1">; + defm STNT1 : MultiVecStore<"stnt1">; +} +let TargetGuard = "sve2p1" in { def SVDOT_X2_S : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [], []>; def SVDOT_X2_U : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>; def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [], []>; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c index 7a25d31de0130..6f1231e776aa3 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -1,9 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include #ifdef SVE_OVERLOADED_FORMS @@ -13,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svld1_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -31,7 +40,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) +svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u8,_x2,)(pn, base); } @@ -54,7 +63,7 @@ svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) +svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u16,_x2,)(pn, base); } @@ -77,7 +86,7 @@ svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) +svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u32,_x2,)(pn, base); } @@ -100,7 +109,7 @@ svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) +svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u64,_x2,)(pn, base); } @@ -131,7 +140,7 @@ svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) +svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u8,_x4,)(pn, base); } @@ -162,7 +171,7 @@ svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) +svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u16,_x4,)(pn, base); } @@ -193,7 +202,7 @@ svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) +svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u32,_x4,)(pn, base); } @@ -224,7 +233,7 @@ svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) +svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_u64,_x4,)(pn, base); } @@ -247,7 +256,7 @@ svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) +svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s8,_x2,)(pn, base); } @@ -270,7 +279,7 @@ svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) +svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s16,_x2,)(pn, base); } @@ -293,7 +302,7 @@ svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) +svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s32,_x2,)(pn, base); } @@ -316,7 +325,7 @@ svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) +svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s64,_x2,)(pn, base); } @@ -347,7 +356,7 @@ svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) +svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s8,_x4,)(pn, base); } @@ -378,7 +387,7 @@ svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) +svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s16,_x4,)(pn, base); } @@ -409,7 +418,7 @@ svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) +svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s32,_x4,)(pn, base); } @@ -440,7 +449,7 @@ svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) +svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_s64,_x4,)(pn, base); } @@ -463,7 +472,7 @@ svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) +svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f16,_x2,)(pn, base); } @@ -486,7 +495,7 @@ svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) +svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f32,_x2,)(pn, base); } @@ -509,7 +518,7 @@ svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) +svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base); } @@ -540,7 +549,7 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) +svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f16,_x4,)(pn, base); } @@ -571,7 +580,7 @@ svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) +svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f32,_x4,)(pn, base); } @@ -602,7 +611,7 @@ svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) +svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR { return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base); } @@ -631,7 +640,7 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) +svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u8,_x2,)(pn, base, vnum); } @@ -656,7 +665,7 @@ svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) +svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u16,_x2,)(pn, base, vnum); } @@ -681,7 +690,7 @@ svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) +svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u32,_x2,)(pn, base, vnum); } @@ -706,7 +715,7 @@ svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) +svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u64,_x2,)(pn, base, vnum); } @@ -739,7 +748,7 @@ svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) +svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u8,_x4,)(pn, base, vnum); } @@ -772,7 +781,7 @@ svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) +svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u16,_x4,)(pn, base, vnum); } @@ -805,7 +814,7 @@ svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) +svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u32,_x4,)(pn, base, vnum); } @@ -838,7 +847,7 @@ svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) +svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_u64,_x4,)(pn, base, vnum); } @@ -863,7 +872,7 @@ svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) +svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s8,_x2,)(pn, base, vnum); } @@ -888,7 +897,7 @@ svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) +svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s16,_x2,)(pn, base, vnum); } @@ -913,7 +922,7 @@ svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) +svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s32,_x2,)(pn, base, vnum); } @@ -938,7 +947,7 @@ svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) +svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s64,_x2,)(pn, base, vnum); } @@ -971,7 +980,7 @@ svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) +svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s8,_x4,)(pn, base, vnum); } @@ -1004,7 +1013,7 @@ svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) +svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s16,_x4,)(pn, base, vnum); } @@ -1037,7 +1046,7 @@ svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) +svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s32,_x4,)(pn, base, vnum); } @@ -1070,7 +1079,7 @@ svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) +svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_s64,_x4,)(pn, base, vnum); } @@ -1095,7 +1104,7 @@ svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) +svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f16,_x2,)(pn, base, vnum); } @@ -1120,7 +1129,7 @@ svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) +svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f32,_x2,)(pn, base, vnum); } @@ -1145,7 +1154,7 @@ svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) +svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum); } @@ -1178,7 +1187,7 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) +svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f16,_x4,)(pn, base, vnum); } @@ -1211,7 +1220,7 @@ svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) +svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f32,_x4,)(pn, base, vnum); } @@ -1244,7 +1253,7 @@ svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) +svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 7a0fcde819dce..3f61cc3de1395 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -1,9 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include #ifdef SVE_OVERLOADED_FORMS @@ -13,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svldnt1_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) @@ -31,7 +40,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) +svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u8,_x2,)(pn, base); } @@ -54,7 +63,7 @@ svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) +svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u16,_x2,)(pn, base); } @@ -77,7 +86,7 @@ svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) +svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u32,_x2,)(pn, base); } @@ -100,7 +109,7 @@ svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) +svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u64,_x2,)(pn, base); } @@ -131,7 +140,7 @@ svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) +svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u8,_x4,)(pn, base); } @@ -162,7 +171,7 @@ svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) +svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u16,_x4,)(pn, base); } @@ -193,7 +202,7 @@ svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) +svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u32,_x4,)(pn, base); } @@ -224,7 +233,7 @@ svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) +svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_u64,_x4,)(pn, base); } @@ -247,7 +256,7 @@ svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) +svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s8,_x2,)(pn, base); } @@ -270,7 +279,7 @@ svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) +svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s16,_x2,)(pn, base); } @@ -293,7 +302,7 @@ svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) +svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s32,_x2,)(pn, base); } @@ -316,7 +325,7 @@ svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) +svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s64,_x2,)(pn, base); } @@ -347,7 +356,7 @@ svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) +svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s8,_x4,)(pn, base); } @@ -378,7 +387,7 @@ svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) +svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s16,_x4,)(pn, base); } @@ -409,7 +418,7 @@ svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) +svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s32,_x4,)(pn, base); } @@ -440,7 +449,7 @@ svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) +svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_s64,_x4,)(pn, base); } @@ -463,7 +472,7 @@ svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) +svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f16,_x2,)(pn, base); } @@ -486,7 +495,7 @@ svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) +svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f32,_x2,)(pn, base); } @@ -509,7 +518,7 @@ svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) +svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base); } @@ -540,7 +549,7 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) +svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f16,_x4,)(pn, base); } @@ -571,7 +580,7 @@ svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) +svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f32,_x4,)(pn, base); } @@ -602,7 +611,7 @@ svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) +svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR { return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base); } @@ -631,7 +640,7 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) +svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u8,_x2,)(pn, base, vnum); } @@ -656,7 +665,7 @@ svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) +svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u16,_x2,)(pn, base, vnum); } @@ -681,7 +690,7 @@ svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) +svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u32,_x2,)(pn, base, vnum); } @@ -706,7 +715,7 @@ svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) +svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u64,_x2,)(pn, base, vnum); } @@ -739,7 +748,7 @@ svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) +svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u8,_x4,)(pn, base, vnum); } @@ -772,7 +781,7 @@ svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) +svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u16,_x4,)(pn, base, vnum); } @@ -805,7 +814,7 @@ svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) +svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u32,_x4,)(pn, base, vnum); } @@ -838,7 +847,7 @@ svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) +svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_u64,_x4,)(pn, base, vnum); } @@ -863,7 +872,7 @@ svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) +svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s8,_x2,)(pn, base, vnum); } @@ -888,7 +897,7 @@ svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) +svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s16,_x2,)(pn, base, vnum); } @@ -913,7 +922,7 @@ svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) +svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s32,_x2,)(pn, base, vnum); } @@ -938,7 +947,7 @@ svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) +svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s64,_x2,)(pn, base, vnum); } @@ -971,7 +980,7 @@ svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) +svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s8,_x4,)(pn, base, vnum); } @@ -1004,7 +1013,7 @@ svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) +svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s16,_x4,)(pn, base, vnum); } @@ -1037,7 +1046,7 @@ svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) +svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s32,_x4,)(pn, base, vnum); } @@ -1070,7 +1079,7 @@ svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) +svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_s64,_x4,)(pn, base, vnum); } @@ -1095,7 +1104,7 @@ svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) +svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f16,_x2,)(pn, base, vnum); } @@ -1120,7 +1129,7 @@ svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) +svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f32,_x2,)(pn, base, vnum); } @@ -1145,7 +1154,7 @@ svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) // CPP-CHECK-NEXT: ret [[TMP5]] // -svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) +svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum); } @@ -1178,7 +1187,7 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) +svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f16,_x4,)(pn, base, vnum); } @@ -1211,7 +1220,7 @@ svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) +svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f32,_x4,)(pn, base, vnum); } @@ -1244,7 +1253,7 @@ svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) // CPP-CHECK-NEXT: ret [[TMP9]] // -svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) +svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c index 9efc37a1dd58e..7aa994345a8c3 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c @@ -1,9 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -13,6 +15,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svst1_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) @@ -27,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) +void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u8_x2,,)(pn, base, v); } @@ -46,7 +54,7 @@ void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) +void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u16_x2,,)(pn, base, v); } @@ -65,7 +73,7 @@ void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) +void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u32_x2,,)(pn, base, v); } @@ -84,7 +92,7 @@ void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) +void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u64_x2,,)(pn, base, v); } @@ -107,7 +115,7 @@ void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) +void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u8_x4,,)(pn, base, v); } @@ -130,7 +138,7 @@ void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) +void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u16_x4,,)(pn, base, v); } @@ -153,7 +161,7 @@ void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) +void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u32_x4,,)(pn, base, v); } @@ -176,7 +184,7 @@ void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) +void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_u64_x4,,)(pn, base, v); } @@ -195,7 +203,7 @@ void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) +void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s8_x2,,)(pn, base, v); } @@ -214,7 +222,7 @@ void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) +void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s16_x2,,)(pn, base, v); } @@ -233,7 +241,7 @@ void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) +void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s32_x2,,)(pn, base, v); } @@ -252,7 +260,7 @@ void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) +void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s64_x2,,)(pn, base, v); } @@ -275,7 +283,7 @@ void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) +void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s8_x4,,)(pn, base, v); } @@ -298,7 +306,7 @@ void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) +void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s16_x4,,)(pn, base, v); } @@ -321,7 +329,7 @@ void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) +void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s32_x4,,)(pn, base, v); } @@ -344,7 +352,7 @@ void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) +void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_s64_x4,,)(pn, base, v); } @@ -363,7 +371,7 @@ void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) +void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f16_x2,,)(pn, base, v); } @@ -382,7 +390,7 @@ void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) +void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f32_x2,,)(pn, base, v); } @@ -401,7 +409,7 @@ void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) +void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f64_x2,,)(pn, base, v); } @@ -424,7 +432,7 @@ void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) +void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f16_x4,,)(pn, base, v); } @@ -447,7 +455,7 @@ void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) +void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f32_x4,,)(pn, base, v); } @@ -470,7 +478,7 @@ void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) +void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1,_f64_x4,,)(pn, base, v); } @@ -495,7 +503,7 @@ void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) +void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u8_x2,,)(pn, base, vnum, v); } @@ -516,7 +524,7 @@ void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) +void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u16_x2,,)(pn, base, vnum, v); } @@ -537,7 +545,7 @@ void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) +void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u32_x2,,)(pn, base, vnum, v); } @@ -558,7 +566,7 @@ void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) +void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u64_x2,,)(pn, base, vnum, v); } @@ -583,7 +591,7 @@ void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) +void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u8_x4,,)(pn, base, vnum, v); } @@ -608,7 +616,7 @@ void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) +void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u16_x4,,)(pn, base, vnum, v); } @@ -633,7 +641,7 @@ void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) +void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u32_x4,,)(pn, base, vnum, v); } @@ -658,7 +666,7 @@ void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) +void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_u64_x4,,)(pn, base, vnum, v); } @@ -679,7 +687,7 @@ void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) +void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s8_x2,,)(pn, base, vnum, v); } @@ -700,7 +708,7 @@ void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) +void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s16_x2,,)(pn, base, vnum, v); } @@ -721,7 +729,7 @@ void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) +void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s32_x2,,)(pn, base, vnum, v); } @@ -742,7 +750,7 @@ void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) +void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s64_x2,,)(pn, base, vnum, v); } @@ -767,7 +775,7 @@ void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) +void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s8_x4,,)(pn, base, vnum, v); } @@ -792,7 +800,7 @@ void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) +void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s16_x4,,)(pn, base, vnum, v); } @@ -817,7 +825,7 @@ void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) +void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s32_x4,,)(pn, base, vnum, v); } @@ -842,7 +850,7 @@ void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) +void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_s64_x4,,)(pn, base, vnum, v); } @@ -865,7 +873,7 @@ void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) +void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f16_x2,,)(pn, base, vnum, v); } @@ -888,7 +896,7 @@ void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) +void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f32_x2,,)(pn, base, vnum, v); } @@ -911,7 +919,7 @@ void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) +void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f64_x2,,)(pn, base, vnum, v); } @@ -938,7 +946,7 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) +void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f16_x4,,)(pn, base, vnum, v); } @@ -965,7 +973,7 @@ void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) +void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f32_x4,,)(pn, base, vnum, v); } @@ -992,7 +1000,7 @@ void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) +void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) ATTR { return SVE_ACLE_FUNC(svst1_vnum,_f64_x4,,)(pn, base, vnum, v); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c index 9b860fe7180e1..0d8696a7634a7 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -1,9 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include #ifdef SVE_OVERLOADED_FORMS @@ -13,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svstnt1_u8_x2( // CHECK-NEXT: entry: @@ -28,7 +36,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) +void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u8_x2,,)(pn, base, v); } @@ -48,7 +56,7 @@ void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) +void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u16_x2,,)(pn, base, v); } @@ -68,7 +76,7 @@ void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) +void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u32_x2,,)(pn, base, v); } @@ -88,7 +96,7 @@ void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) +void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u64_x2,,)(pn, base, v); } @@ -112,7 +120,7 @@ void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) +void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u8_x4,,)(pn, base, v); } @@ -136,7 +144,7 @@ void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) +void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u16_x4,,)(pn, base, v); } @@ -160,7 +168,7 @@ void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) +void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u32_x4,,)(pn, base, v); } @@ -184,7 +192,7 @@ void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) +void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_u64_x4,,)(pn, base, v); } @@ -204,7 +212,7 @@ void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) +void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s8_x2,,)(pn, base, v); } @@ -224,7 +232,7 @@ void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) +void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s16_x2,,)(pn, base, v); } @@ -244,7 +252,7 @@ void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) +void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s32_x2,,)(pn, base, v); } @@ -264,7 +272,7 @@ void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) +void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s64_x2,,)(pn, base, v); } @@ -288,7 +296,7 @@ void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) +void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s8_x4,,)(pn, base, v); } @@ -312,7 +320,7 @@ void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) +void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s16_x4,,)(pn, base, v); } @@ -336,7 +344,7 @@ void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) +void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s32_x4,,)(pn, base, v); } @@ -360,7 +368,7 @@ void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) +void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_s64_x4,,)(pn, base, v); } @@ -380,7 +388,7 @@ void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) +void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f16_x2,,)(pn, base, v); } @@ -400,7 +408,7 @@ void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) +void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f32_x2,,)(pn, base, v); } @@ -420,7 +428,7 @@ void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) +void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f64_x2,,)(pn, base, v); } @@ -444,7 +452,7 @@ void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) +void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f16_x4,,)(pn, base, v); } @@ -468,7 +476,7 @@ void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) +void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f32_x4,,)(pn, base, v); } @@ -492,7 +500,7 @@ void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) +void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v); } @@ -518,7 +526,7 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) +void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u8_x2,,)(pn, base, vnum, v); } @@ -540,7 +548,7 @@ void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) +void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u16_x2,,)(pn, base, vnum, v); } @@ -562,7 +570,7 @@ void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) +void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u32_x2,,)(pn, base, vnum, v); } @@ -584,7 +592,7 @@ void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) +void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u64_x2,,)(pn, base, vnum, v); } @@ -610,7 +618,7 @@ void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) +void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u8_x4,,)(pn, base, vnum, v); } @@ -636,7 +644,7 @@ void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) +void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u16_x4,,)(pn, base, vnum, v); } @@ -662,7 +670,7 @@ void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) +void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u32_x4,,)(pn, base, vnum, v); } @@ -688,7 +696,7 @@ void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) +void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_u64_x4,,)(pn, base, vnum, v); } @@ -710,7 +718,7 @@ void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) +void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s8_x2,,)(pn, base, vnum, v); } @@ -732,7 +740,7 @@ void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) +void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s16_x2,,)(pn, base, vnum, v); } @@ -754,7 +762,7 @@ void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) +void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s32_x2,,)(pn, base, vnum, v); } @@ -776,7 +784,7 @@ void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) +void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s64_x2,,)(pn, base, vnum, v); } @@ -802,7 +810,7 @@ void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) +void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s8_x4,,)(pn, base, vnum, v); } @@ -828,7 +836,7 @@ void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) +void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s16_x4,,)(pn, base, vnum, v); } @@ -854,7 +862,7 @@ void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) +void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s32_x4,,)(pn, base, vnum, v); } @@ -880,7 +888,7 @@ void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) +void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_s64_x4,,)(pn, base, vnum, v); } @@ -904,7 +912,7 @@ void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) +void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f16_x2,,)(pn, base, vnum, v); } @@ -928,7 +936,7 @@ void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) +void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f32_x2,,)(pn, base, vnum, v); } @@ -952,7 +960,7 @@ void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) +void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x2,,)(pn, base, vnum, v); } @@ -980,7 +988,7 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) +void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f16_x4,,)(pn, base, vnum, v); } @@ -1008,7 +1016,7 @@ void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) +void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f32_x4,,)(pn, base, vnum, v); } @@ -1036,7 +1044,7 @@ void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // -void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) +void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) ATTR { return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x4,,)(pn, base, vnum, v); }