diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index abab117cc3f83..7cb61bafb5ad3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -8951,6 +8951,24 @@ EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL, return VT.changeVectorElementTypeToInteger(); } +bool LoongArchTargetLowering::canMergeStoresTo( + unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const { + // Do not merge to float value size (128 or 256 bits) if no implicit + // float attribute is set. + bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); + unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; + if (NoFloat) + return MemVT.getSizeInBits() <= MaxIntSize; + + // Make sure we don't merge greater than our maximum supported vector width. + if (Subtarget.hasExtLASX()) + MaxIntSize = 256; + else if (Subtarget.hasExtLSX()) + MaxIntSize = 128; + + return MemVT.getSizeInBits() <= MaxIntSize; +} + bool LoongArchTargetLowering::hasAndNot(SDValue Y) const { EVT VT = Y.getValueType(); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index aed7803af275e..dc77154c8a696 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -56,6 +56,8 @@ class LoongArchTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const MachineFunction &MF) const override; bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; bool hasAndNot(SDValue Y) const override; diff --git a/llvm/test/CodeGen/LoongArch/mergestores.ll b/llvm/test/CodeGen/LoongArch/mergestores.ll new file mode 100644 index 0000000000000..e580dfe176493 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/mergestores.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -mattr=+d,-lsx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 -mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+d,+lsx < %s | FileCheck %s --check-prefix=LSX +; RUN: llc --mtriple=loongarch64 -mattr=+d,+lasx < %s | FileCheck %s --check-prefix=LASX + +; ISSUE-186645 - Merge 64-bit operations as 128-bit operations while noimplicitfloat +; attribute is set are not legal. However, we can generate a paired 64-bit loads and +; stores, without using floating point registers. + +define void @memcpy16_noimplicitfloat(ptr %p0, ptr %p1) noimplicitfloat { +; LA64-LABEL: memcpy16_noimplicitfloat: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a2, $a1, 8 +; LA64-NEXT: st.d $a2, $a0, 8 +; LA64-NEXT: ld.d $a1, $a1, 0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret +; +; LA32-LABEL: memcpy16_noimplicitfloat: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LSX-LABEL: memcpy16_noimplicitfloat: +; LSX: # %bb.0: +; LSX-NEXT: ld.d $a2, $a1, 8 +; LSX-NEXT: st.d $a2, $a0, 8 +; LSX-NEXT: ld.d $a1, $a1, 0 +; LSX-NEXT: st.d $a1, $a0, 0 +; LSX-NEXT: ret +; +; LASX-LABEL: memcpy16_noimplicitfloat: +; LASX: # %bb.0: +; LASX-NEXT: ld.d $a2, $a1, 8 +; LASX-NEXT: st.d $a2, $a0, 8 +; LASX-NEXT: ld.d $a1, $a1, 0 +; LASX-NEXT: st.d $a1, $a0, 0 +; LASX-NEXT: ret + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %p0, ptr align 4 %p1, i64 16, i1 false) + ret void +} + +define void @memcpy16_aligned(ptr %p0, ptr %p1) { +; LA64-LABEL: memcpy16_aligned: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a2, $a1, 8 +; LA64-NEXT: st.d $a2, $a0, 8 +; LA64-NEXT: ld.d $a1, $a1, 0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret +; +; LA32-LABEL: memcpy16_aligned: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LSX-LABEL: memcpy16_aligned: +; LSX: # %bb.0: +; LSX-NEXT: vld $vr0, $a1, 0 +; LSX-NEXT: vst $vr0, $a0, 0 +; LSX-NEXT: ret +; +; LASX-LABEL: memcpy16_aligned: +; LASX: # %bb.0: +; LASX-NEXT: vld $vr0, $a1, 0 +; LASX-NEXT: vst $vr0, $a0, 0 +; LASX-NEXT: ret + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %p0, ptr align 4 %p1, i64 16, i1 false) + ret void +} + +define void @memcpy32_aligned(ptr %p0, ptr %p1) { +; LA64-LABEL: memcpy32_aligned: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a2, $a1, 24 +; LA64-NEXT: st.d $a2, $a0, 24 +; LA64-NEXT: ld.d $a2, $a1, 16 +; LA64-NEXT: st.d $a2, $a0, 16 +; LA64-NEXT: ld.d $a2, $a1, 8 +; LA64-NEXT: st.d $a2, $a0, 8 +; LA64-NEXT: ld.d $a1, $a1, 0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret +; +; LA32-LABEL: memcpy32_aligned: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a1, 28 +; LA32-NEXT: st.w $a2, $a0, 28 +; LA32-NEXT: ld.w $a2, $a1, 24 +; LA32-NEXT: st.w $a2, $a0, 24 +; LA32-NEXT: ld.w $a2, $a1, 20 +; LA32-NEXT: st.w $a2, $a0, 20 +; LA32-NEXT: ld.w $a2, $a1, 16 +; LA32-NEXT: st.w $a2, $a0, 16 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LSX-LABEL: memcpy32_aligned: +; LSX: # %bb.0: +; LSX-NEXT: vld $vr0, $a1, 16 +; LSX-NEXT: vst $vr0, $a0, 16 +; LSX-NEXT: vld $vr0, $a1, 0 +; LSX-NEXT: vst $vr0, $a0, 0 +; LSX-NEXT: ret +; +; LASX-LABEL: memcpy32_aligned: +; LASX: # %bb.0: +; LASX-NEXT: xvld $xr0, $a1, 0 +; LASX-NEXT: xvst $xr0, $a0, 0 +; LASX-NEXT: ret + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %p0, ptr align 4 %p1, i64 32, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)