diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a1ac926ab9577..cd4c09a3ad7a8 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -310,6 +310,9 @@ let TargetGuard = "sve2p1" in { def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + // Load one vector (scalar base + vector offset) + def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; + // Load N-element structure into N vectors (scalar base) defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">; defm SVLD3Q : StructLoad<"svld3q[_{2}]", "3Pc", "aarch64_sve_ld3q_sret">; @@ -461,6 +464,9 @@ let TargetGuard = "sve2p1" in { def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + // Store one vector (scalar base + vector offset) + def SVST1Q_SCATTER_U64OFFSET : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; + // Store N vectors into N-element structure (scalar base) defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">; defm SVST3Q : StructStore<"svst3q[_{d}]", "vPc3", "aarch64_sve_st3q">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c index 44351347e4cf0..ae3ddd416f7ee 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c @@ -2868,3 +2868,195 @@ svfloat32_t test_svld1q_gather_u64base_index_f32(svbool_t pg, svuint64_t base, i svfloat64_t test_svld1q_gather_u64base_index_f64(svbool_t pg, svuint64_t base, int64_t idx) { return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_f64,)(pg, base, idx); } + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svdl1q_gather_u64offset_s8u10__SVBool_tPKau12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svdl1q_gather_u64offset_s8(svbool_t pg, const int8_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_s8)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svdl1q_gather_u64offset_u8u10__SVBool_tPKhu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svdl1q_gather_u64offset_u8(svbool_t pg, const uint8_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_u8)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_s16u10__SVBool_tPKsu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svdl1q_gather_u64offset_s16(svbool_t pg, const int16_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_s16)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_u16u10__SVBool_tPKtu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svdl1q_gather_u64offset_u16(svbool_t pg, const uint16_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_u16)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_s32u10__SVBool_tPKiu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svdl1q_gather_u64offset_s32(svbool_t pg, const int32_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_s32)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_u32u10__SVBool_tPKju12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svdl1q_gather_u64offset_u32(svbool_t pg, const uint32_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_u32)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_s64u10__SVBool_tPKlu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svdl1q_gather_u64offset_s64(svbool_t pg, const int64_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_s64)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_u64u10__SVBool_tPKmu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svdl1q_gather_u64offset_u64(svbool_t pg, const uint64_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_u64)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z33test_svdl1q_gather_u64offset_bf16u10__SVBool_tPKu6__bf16u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svdl1q_gather_u64offset_bf16(svbool_t pg, const bfloat16_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_bf16)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_f16u10__SVBool_tPKDhu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svdl1q_gather_u64offset_f16(svbool_t pg, const float16_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f16)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_f32u10__SVBool_tPKfu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svdl1q_gather_u64offset_f32(svbool_t pg, const float32_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f32)(pg, base, off); +} + +// CHECK-LABEL: @test_svdl1q_gather_u64offset_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svdl1q_gather_u64offset_f64u10__SVBool_tPKdu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64( [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svdl1q_gather_u64offset_f64(svbool_t pg, const float64_t *base, svuint64_t off) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,offset,_f64)(pg, base, off); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c index 137801cc0814a..2cbea29d33904 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c @@ -2470,3 +2470,195 @@ void test_svst1q_scatter_u64base_index_f32(svbool_t pg, svuint64_t base, int64_t void test_svst1q_scatter_u64base_index_f64(svbool_t pg, svuint64_t base, int64_t idx, svfloat64_t data) { SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f64)(pg, base, idx, data); } + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64offset_s8u10__SVBool_tPau12__SVUint64_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_s8(svbool_t pg, int8_t *base, svuint64_t off, svint8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s8)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64offset_u8u10__SVBool_tPhu12__SVUint64_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_u8(svbool_t pg, uint8_t *base, svuint64_t off, svuint8_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u8)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s16u10__SVBool_tPsu12__SVUint64_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_s16(svbool_t pg, int16_t *base, svuint64_t off, svint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s16)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u16u10__SVBool_tPtu12__SVUint64_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_u16(svbool_t pg, uint16_t *base, svuint64_t off, svuint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u16)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s32u10__SVBool_tPiu12__SVUint64_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_s32(svbool_t pg, int32_t *base, svuint64_t off, svint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s32)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u32u10__SVBool_tPju12__SVUint64_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_u32(svbool_t pg, uint32_t *base, svuint64_t off, svuint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u32)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_s64(svbool_t pg, int64_t *base, svuint64_t off, svint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_s64)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_u64u10__SVBool_tPmu12__SVUint64_tS1_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_u64(svbool_t pg, uint64_t *base, svuint64_t off, svuint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_u64)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z34test_svst1q_scatter_u64offset_bf16u10__SVBool_tPu6__bf16u12__SVUint64_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_bf16(svbool_t pg, bfloat16_t *base, svuint64_t off, svbfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_bf16)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f16u10__SVBool_tPDhu12__SVUint64_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_f16(svbool_t pg, float16_t *base, svuint64_t off, svfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f16)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f32u10__SVBool_tPfu12__SVUint64_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_f32(svbool_t pg, float32_t *base, svuint64_t off, svfloat32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f32)(pg, base, off, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64offset_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64offset_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64( [[DATA:%.*]], [[TMP0]], ptr [[BASE:%.*]], [[OFF:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64offset_f64(svbool_t pg, float64_t *base, svuint64_t off, svfloat64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_,u64,offset,_f64)(pg, base, off, data); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a558a1eca84af..60a8d98f3bc0d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2166,6 +2166,9 @@ def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_In // 128-bit loads, scaled offsets (indices) def int_aarch64_sve_ld1q_gather_index : AdvSIMD_GatherLoadQ_SV_Intrinsic; +// 128-bit loads, unscaled offsets +def int_aarch64_sve_ld1q_gather_vector_offset : AdvSIMD_GatherLoadQ_SV_Intrinsic; + // // Gather loads: vector base + scalar offset // @@ -2247,6 +2250,9 @@ def int_aarch64_sve_st1_scatter_uxtw_index // 128-bit stores, scaled offsets (indices) def int_aarch64_sve_st1q_scatter_index : AdvSIMD_ScatterStoreQ_SV_Intrinsic; +// 128-bit stores, unscaled offsets +def int_aarch64_sve_st1q_scatter_vector_offset : AdvSIMD_ScatterStoreQ_SV_Intrinsic; + // // Scatter stores: vector base + scalar offset // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c48e8a5610142..28e038abcecfc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23748,6 +23748,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::aarch64_sve_ld1_gather: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset: + case Intrinsic::aarch64_sve_ld1q_gather_vector_offset: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1Q_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1q_gather_index: return performGatherLoadCombine(N, DAG, @@ -23796,6 +23797,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_IMM_MERGE_ZERO); case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset: + case Intrinsic::aarch64_sve_st1q_scatter_vector_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_PRED); case Intrinsic::aarch64_sve_st1q_scatter_index: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_INDEX_PRED); diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll index 64f15897ebb9a..8bee44be9f0cd 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll @@ -94,6 +94,126 @@ define @ld1q_gather_u64base_bf16( %pg, %load } +define @test_svdl1q_gather_u64offset_s8( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_u8( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_u8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_s16( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_u16( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_s32( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_u32( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_s64( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_u64( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_bf16( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_f16( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_f32( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32( %pg, ptr %base, %off) + ret %0 +} + +define @test_svdl1q_gather_u64offset_f64( %pg, ptr %base, %off) { +; CHECK-LABEL: test_svdl1q_gather_u64offset_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64( %pg, ptr %base, %off) + ret %0 +} + declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64(, , i64) declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(, , i64) declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(, , i64) @@ -102,4 +222,11 @@ declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16. declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(, , i64) declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(, , i64) declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(, , i64) - +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32(, ptr, ) +declare @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64(, ptr, ) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll index c62df1d8d254c..6493640c06abd 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll @@ -102,12 +102,139 @@ define void @sst1_scatter_u64base_offset_bf16( %data, %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_u8( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_u8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_s16( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_u16( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_s32( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_u32( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_s64( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_u64( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_bf16( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_f16( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_f32( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32( %data, %pg, ptr %base, %off) + ret void +} + +define void @test_svst1q_scatter_u64offset_f64( %pg, ptr %base, %off, %data) { +; CHECK-LABEL: test_svst1q_scatter_u64offset_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64( %data, %pg, ptr %base, %off) + ret void +} + declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(, , , i64) - declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(, , , i64) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv16i8(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4i32(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2i64(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8bf16(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8f16(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv4f32(, , ptr, ) +declare void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv2f64(, , ptr, )