Skip to content

Commit

Permalink
[x86] extend machine combiner reassociation optimization to SSE scala…
Browse files Browse the repository at this point in the history
…r adds

Extend the reassociation optimization of http://reviews.llvm.org/rL240361 (D10460)
to SSE scalar FP SP adds in addition to AVX scalar FP SP adds.

With the 'switch' in place, we can trivially add other opcodes and test cases in
future patches.

Differential Revision: http://reviews.llvm.org/D10975

llvm-svn: 241515
  • Loading branch information
rotateright committed Jul 6, 2015
1 parent 8fbf1c1 commit 681a56a
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 55 deletions.
32 changes: 19 additions & 13 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Expand Up @@ -6417,18 +6417,30 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
return false;
}

// TODO: There are many more machine instruction opcodes to match:
// 1. Other data types (double, integer, vectors)
// 2. Other math / logic operations (mul, and, or)
static bool isAssociativeAndCommutative(unsigned Opcode) {
switch (Opcode) {
case X86::VADDSSrr:
case X86::ADDSSrr:
return true;
default:
return false;
}
}

/// Return true if the input instruction is part of a chain of dependent ops
/// that are suitable for reassociation, otherwise return false.
/// If the instruction's operands must be commuted to have a previous
/// instruction of the same type define the first source operand, Commuted will
/// be set to true.
static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
bool &Commuted) {
// 1. The instruction must have the correct type.
static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
// 1. The operation must be associative and commutative.
// 2. The instruction must have virtual register definitions for its
// operands in the same basic block.
// 3. The instruction must have a reassociatable sibling.
if (Inst.getOpcode() == AssocOpcode &&
// 3. The instruction must have a reassociable sibling.
if (isAssociativeAndCommutative(Inst.getOpcode()) &&
hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
hasReassocSibling(Inst, Commuted))
return true;
Expand All @@ -6455,14 +6467,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
// B = A op X (Prev)
// C = B op Y (Root)

// TODO: There are many more associative instruction types to match:
// 1. Other forms of scalar FP add (non-AVX)
// 2. Other data types (double, integer, vectors)
// 3. Other math / logic operations (mul, and, or)
unsigned AssocOpcode = X86::VADDSSrr;

bool Commute = false;
if (isReassocCandidate(Root, AssocOpcode, Commute)) {
bool Commute;
if (isReassocCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
// reassociation of operands to increase ILP. Specify each commutation
// possibility for the Prev instruction in the sequence and let the
Expand Down
131 changes: 89 additions & 42 deletions llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,54 +1,83 @@
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX

; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.

define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds1:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds1:
; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds1:
; AVX: # BB#0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
ret float %t2
}

define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds2:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds2:
; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds2:
; AVX: # BB#0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
ret float %t2
}

define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds3:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds3:
; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds3:
; AVX: # BB#0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
ret float %t2
}

define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds4:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds4:
; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds4:
; AVX: # BB#0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
Expand All @@ -59,16 +88,27 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; produced because that would cost more compile time.

define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
; CHECK-LABEL: reassociate_adds5:
; CHECK: # BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
; CHECK-NEXT: vaddss %xmm6, %xmm1, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm7, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds5:
; SSE: # BB#0:
; SSE-NEXT: addss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: addss %xmm5, %xmm4
; SSE-NEXT: addss %xmm6, %xmm4
; SSE-NEXT: addss %xmm4, %xmm0
; SSE-NEXT: addss %xmm7, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds5:
; AVX: # BB#0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
Expand All @@ -83,14 +123,21 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
; Also, we should reassociate such that the result of the high latency division
; is used by the final 'add' rather than reassociating the %x3 operand with the
; division. The latter reassociation would not improve anything.

define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds6:
; CHECK: # BB#0:
; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
; SSE-LABEL: reassociate_adds6:
; SSE: # BB#0:
; SSE-NEXT: divss %xmm1, %xmm0
; SSE-NEXT: addss %xmm3, %xmm2
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: reassociate_adds6:
; AVX: # BB#0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
Expand Down

0 comments on commit 681a56a

Please sign in to comment.