Skip to content

Commit

Permalink
[DAGCombine][X86] Pull one-use freeze out of extract_vector_elt v…
Browse files Browse the repository at this point in the history
…ector operand

This may allow us to further simplify the vector,
and freezing the extracted result is still fine:
```
----------------------------------------
define i8 @src(<2 x i8> %src, i64 %idx) {
%0:
  %i1 = freeze <2 x i8> %src
  %i2 = extractelement <2 x i8> %i1, i64 %idx
  ret i8 %i2
}
=>
define i8 @tgt(<2 x i8> %src, i64 %idx) {
%0:
  %i1 = extractelement <2 x i8> %src, i64 %idx
  %i2 = freeze i8 %i1
  ret i8 %i2
}
Transformation seems to be correct!
```

BUT, there must not be other uses of that freeze,
see `@freeze_extractelement_extra_use`.

Also, looks like we are missing some ISEL-level handling for freeze.
  • Loading branch information
LebedevRI committed Dec 22, 2022
1 parent 2d756d2 commit f5700e7
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -20281,6 +20281,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);

// extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
VecOp.getOperand(0), Index));
}

// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
Expand Down
30 changes: 16 additions & 14 deletions llvm/test/CodeGen/X86/freeze-vector.ll
Expand Up @@ -65,13 +65,13 @@ define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovlps %xmm0, (%eax)
; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt:
; X64: # %bb.0:
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: vmovlps %xmm0, (%rsi)
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: retq
%i0 = load <4 x i16>, ptr %origin
%i1 = bitcast <4 x i16> %i0 to <8 x i8>
Expand All @@ -88,15 +88,14 @@ define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovsd %xmm0, (%ecx)
; X86-NEXT: vmovlps %xmm0, (%eax)
; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt_escape:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: vmovq %xmm0, (%rdx)
; X64-NEXT: movq %rax, (%rdx)
; X64-NEXT: retq
%i0 = load <4 x i16>, ptr %origin
%i1 = bitcast <4 x i16> %i0 to <8 x i8>
Expand All @@ -113,13 +112,13 @@ define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovlps %xmm0, (%eax)
; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt:
; X64: # %bb.0:
; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: vmovlps %xmm0, (%rsi)
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: retq
%i0 = load <8 x i8>, ptr %origin
%i1 = bitcast <8 x i8> %i0 to <4 x i16>
Expand All @@ -136,15 +135,14 @@ define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %d
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovsd %xmm0, (%ecx)
; X86-NEXT: vmovlps %xmm0, (%eax)
; X86-NEXT: vmovsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt_escape:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: vmovq %rax, %xmm0
; X64-NEXT: movq %rax, (%rsi)
; X64-NEXT: vmovq %xmm0, (%rdx)
; X64-NEXT: movq %rax, (%rdx)
; X64-NEXT: retq
%i0 = load <8 x i8>, ptr %origin
%i1 = bitcast <8 x i8> %i0 to <4 x i16>
Expand All @@ -163,14 +161,16 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovdqa (%edx), %xmm0
; X86-NEXT: vpand (%ecx), %xmm0, %xmm0
; X86-NEXT: vpextrb $6, %xmm0, (%eax)
; X86-NEXT: vpextrb $6, %xmm0, %ecx
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_extractelement:
; X64: # %bb.0:
; X64-NEXT: vmovdqa (%rdi), %xmm0
; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
; X64-NEXT: vpextrb $6, %xmm0, %eax
; X64-NEXT: movb %al, (%rdx)
; X64-NEXT: retq
%i0 = load <16 x i8>, ptr %origin0
%i1 = load <16 x i8>, ptr %origin1
Expand Down Expand Up @@ -211,6 +211,8 @@ define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst,
store i8 %i4, ptr %dst
ret void
}

; It would be a miscompilation to pull freeze out of extractelement here.
define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind {
; X86-LABEL: freeze_extractelement_extra_use:
; X86: # %bb.0:
Expand Down

0 comments on commit f5700e7

Please sign in to comment.