[SelectionDAG] Widen <2 x T> vector types for atomic store#197618
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag Author: jofrn ChangesVector types of 2 elements must be widened. This change does this Store-side counterpart to #148897. Stacked on top of #197166. Full diff: https://github.com/llvm/llvm-project/pull/197618.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9c37eb8065ba5..a1c0e68049544 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1104,6 +1104,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N);
SDValue WidenVecOp_FAKE_USE(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST);
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c6fc5e2152528..1cc78382f025f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -7445,6 +7445,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = WidenVecOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
@@ -8061,6 +8064,59 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
report_fatal_error("Unable to widen vector store");
}
+/// Inverse of coerceLoadedValue: pull a FirstVT-sized scalar/vector out of the
+/// widened value so it can be issued in a single atomic store.
+static SDValue coerceStoredValue(SDValue StVal, EVT FirstVT, EVT WidenVT,
+ TypeSize FirstVTWidth, SDLoc dl,
+ SelectionDAG &DAG) {
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ if (!FirstVT.isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, StVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, FirstVT, VecOp,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ assert(FirstVT == WidenVT && "First value type must equal widen value type");
+ return StVal;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST) {
+ EVT StVT = ST->getMemoryVT();
+ SDLoc dl(ST);
+ assert(StVT.isVector() && "Expected vector");
+
+ SDValue StVal = GetWidenedVector(ST->getVal());
+ EVT WidenVT = StVal.getValueType();
+ assert(WidenVT.isVector() && "Expected vector");
+ assert(StVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+ assert(StVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+ TypeSize StWidth = StVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - StWidth;
+
+ // Find the vector type that can store the original memory width in one
+ // atomic operation.
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinValue(), WidenVT, /*StAlign=*/0,
+ WidthDiff.getKnownMinValue());
+
+ if (!FirstVT)
+ return SDValue();
+
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ SDValue StOp =
+ coerceStoredValue(StVal, *FirstVT, WidenVT, FirstVTWidth, dl, DAG);
+
+ return DAG.getAtomic(ISD::ATOMIC_STORE, dl, *FirstVT, ST->getChain(), StOp,
+ ST->getBasePtr(), ST->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of vp_store");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 8619386fe3c88..659cdec91d3e7 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -352,6 +352,204 @@ define void @store_atomic_vec1_double_align(ptr %x, <1 x double> %v) nounwind {
ret void
}
+define void @store_atomic_vec2_i8(ptr %x, <2 x i8> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-SSE-O0-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-AVX-O0-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i8> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i16> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec2_ptr270(ptr %x, <2 x ptr addrspace(270)> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_ptr270:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr270:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_ptr270:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr270:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x ptr addrspace(270)> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec2_i32_align(ptr %x, <2 x i32> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i32_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i32_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i32_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i32_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i32> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec2_float_align(ptr %x, <2 x float> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x float> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec4_i8(ptr %x, <4 x i8> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec4_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec4_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <4 x i8> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec4_i16(ptr %x, <4 x i16> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec4_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec4_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <4 x i16> %v, ptr %x release, align 8
+ ret void
+}
+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i8:
; CHECK-SSE-O3: # %bb.0:
|
|
@llvm/pr-subscribers-backend-x86 Author: jofrn ChangesVector types of 2 elements must be widened. This change does this Store-side counterpart to #148897. Stacked on top of #197166. Full diff: https://github.com/llvm/llvm-project/pull/197618.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 9c37eb8065ba5..a1c0e68049544 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1104,6 +1104,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N);
SDValue WidenVecOp_FAKE_USE(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST);
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c6fc5e2152528..1cc78382f025f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -7445,6 +7445,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = WidenVecOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
@@ -8061,6 +8064,59 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
report_fatal_error("Unable to widen vector store");
}
+/// Inverse of coerceLoadedValue: pull a FirstVT-sized scalar/vector out of the
+/// widened value so it can be issued in a single atomic store.
+static SDValue coerceStoredValue(SDValue StVal, EVT FirstVT, EVT WidenVT,
+ TypeSize FirstVTWidth, SDLoc dl,
+ SelectionDAG &DAG) {
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ if (!FirstVT.isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, StVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, FirstVT, VecOp,
+ DAG.getVectorIdxConstant(0, dl));
+ }
+ assert(FirstVT == WidenVT && "First value type must equal widen value type");
+ return StVal;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST) {
+ EVT StVT = ST->getMemoryVT();
+ SDLoc dl(ST);
+ assert(StVT.isVector() && "Expected vector");
+
+ SDValue StVal = GetWidenedVector(ST->getVal());
+ EVT WidenVT = StVal.getValueType();
+ assert(WidenVT.isVector() && "Expected vector");
+ assert(StVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+ assert(StVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+ TypeSize StWidth = StVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - StWidth;
+
+ // Find the vector type that can store the original memory width in one
+ // atomic operation.
+ std::optional<EVT> FirstVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinValue(), WidenVT, /*StAlign=*/0,
+ WidthDiff.getKnownMinValue());
+
+ if (!FirstVT)
+ return SDValue();
+
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ SDValue StOp =
+ coerceStoredValue(StVal, *FirstVT, WidenVT, FirstVTWidth, dl, DAG);
+
+ return DAG.getAtomic(ISD::ATOMIC_STORE, dl, *FirstVT, ST->getChain(), StOp,
+ ST->getBasePtr(), ST->getMemOperand());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of vp_store");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 8619386fe3c88..659cdec91d3e7 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -352,6 +352,204 @@ define void @store_atomic_vec1_double_align(ptr %x, <1 x double> %v) nounwind {
ret void
}
+define void @store_atomic_vec2_i8(ptr %x, <2 x i8> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-SSE-O0-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-AVX-O0-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i8> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i16> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec2_ptr270(ptr %x, <2 x ptr addrspace(270)> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_ptr270:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr270:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_ptr270:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr270:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x ptr addrspace(270)> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec2_i32_align(ptr %x, <2 x i32> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i32_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i32_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i32_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i32_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x i32> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec2_float_align(ptr %x, <2 x float> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x float> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec4_i8(ptr %x, <4 x i8> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec4_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec4_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd %xmm0, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovd %xmm0, %eax
+; CHECK-AVX-O0-NEXT: movl %eax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <4 x i8> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec4_i16(ptr %x, <4 x i16> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec4_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O3-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O3-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec4_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movq %xmm0, %rax
+; CHECK-SSE-O0-NEXT: movq %rax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-AVX-O0-NEXT: movq %rax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <4 x i16> %v, ptr %x release, align 8
+ ret void
+}
+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i8:
; CHECK-SSE-O3: # %bb.0:
|
|
|
|
|
||
| /// Inverse of coerceLoadedValue: pull a FirstVT-sized scalar/vector out of the | ||
| /// widened value so it can be issued in a single atomic store. | ||
| static SDValue coerceStoredValue(SDValue StVal, EVT FirstVT, EVT WidenVT, |
There was a problem hiding this comment.
Can you move this to be next to coerceLoadedValue
| std::optional<EVT> FirstVT = | ||
| findMemType(DAG, TLI, StWidth.getKnownMinValue(), WidenVT, /*StAlign=*/0, | ||
| WidthDiff.getKnownMinValue()); | ||
|
|
There was a problem hiding this comment.
Why is this passing 0 for the align instead of the actual alignment?
There was a problem hiding this comment.
Like atomic loads, for atomic stores, this causes a widen beyond the value's width: e.g. <2 x i8> writes beyond object (e.g. %v of <2 x i8> %v). That is, if we pass align, then we get movl %eax, (%rdi), which writes to unrelated / undefined bytes relative to %v.
There was a problem hiding this comment.
A brief comment explaining that would be useful for future reference
There was a problem hiding this comment.
Ok, added one! Thanks.
876a35f to
740f199
Compare
7637943 to
f6ebebc
Compare
47bca23 to
21e67f6
Compare
When lowering `atomic store <1 x T>` vector types with floats (i.e. during scalarization in the selection DAG), selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. Store-side counterpart to #148895. Stacked on top of #197165; and below of #197618.
e0ef9b7 to
63ef83c
Compare
| SDValue DAGTypeLegalizer::WidenVecOp_ATOMIC_STORE(AtomicSDNode *ST) { | ||
| EVT StVT = ST->getMemoryVT(); | ||
| SDLoc dl(ST); | ||
| assert(StVT.isVector() && "Expected vector"); |
There was a problem hiding this comment.
| assert(StVT.isVector() && "Expected vector"); |
Redundant
| assert(WidenVT.isVector() && "Expected vector"); | ||
| assert(StVT.isScalableVector() == WidenVT.isScalableVector() && | ||
| "Must be scalable"); | ||
| assert(StVT.getVectorElementType() == WidenVT.getVectorElementType() && | ||
| "Expected equivalent element types"); |
There was a problem hiding this comment.
All these asserts seem redundant (e.g., getVector* will assert on !isVector anyway), or belong in findMemType
There was a problem hiding this comment.
Removed. findMemType keeps track of the invariants well.
Vector types of 2 elements must be widened. This change does this for vector types of atomic store in SelectionDAG so that it can translate aligned vectors of >1 size.
63ef83c to
fc66de1
Compare
Vector types of 2 elements must be widened. This change does this
for vector types of atomic store in SelectionDAG so that it can
translate aligned vectors of >1 size.
Store-side counterpart to #148897. Stacked on top of #197166; and below of #197619.