476 changes: 466 additions & 10 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Large diffs are not rendered by default.

20 changes: 19 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"

#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
Expand Down Expand Up @@ -156,9 +157,26 @@ class AArch64InstrInfo : public AArch64GenInstrInfo {
bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const override;
/// hasPattern - return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// listed
/// in the <Pattern> array.
virtual bool hasPattern(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const;

/// genAlternativeCodeSequence - when hasPattern() finds a pattern
/// this function generates the instructions that could replace the
/// original code sequence
virtual void genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
/// useMachineCombiner - AArch64 supports MachineCombiner
virtual bool useMachineCombiner(void) const;

bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;

private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
Expand Down
42 changes: 42 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//===- AArch64MachineCombinerPattern.h -===//
//===- AArch64 instruction pattern supported by combiner -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines instruction pattern supported by combiner
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H
#define LLVM_TARGET_AArch64MACHINECOMBINERPATTERN_H

namespace llvm {

/// Enumeration of instruction pattern supported by machine combiner
///
///
namespace MachineCombinerPattern {
enum MC_PATTERN : int {
MC_NONE = 0,
MC_MULADDW_OP1 = 1,
MC_MULADDW_OP2 = 2,
MC_MULSUBW_OP1 = 3,
MC_MULSUBW_OP2 = 4,
MC_MULADDWI_OP1 = 5,
MC_MULSUBWI_OP1 = 6,
MC_MULADDX_OP1 = 7,
MC_MULADDX_OP2 = 8,
MC_MULSUBX_OP1 = 9,
MC_MULSUBX_OP2 = 10,
MC_MULADDXI_OP1 = 11,
MC_MULSUBXI_OP1 = 12
};
} // end namespace MachineCombinerPattern
} // end namespace llvm

#endif
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ static cl::opt<bool>
EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
cl::init(true), cl::Hidden);

static cl::opt<bool> EnableMCR("aarch64-mcr",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);

static cl::opt<bool>
EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
cl::init(true), cl::Hidden);
Expand Down Expand Up @@ -174,6 +178,8 @@ bool AArch64PassConfig::addInstSelector() {
bool AArch64PassConfig::addILPOpts() {
if (EnableCCMP)
addPass(createAArch64ConditionalCompares());
if (EnableMCR)
addPass(&MachineCombinerID);
addPass(&EarlyIfConverterID);
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -mcpu=cyclone | FileCheck %s
; arm64 has its own copy of this because of the intrinsics

define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
Expand Down Expand Up @@ -450,8 +450,8 @@ define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: srem2x32:
; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
%tmp3 = srem <2 x i32> %A, %B;
ret <2 x i32> %tmp3
Expand Down Expand Up @@ -482,8 +482,8 @@ define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
; CHECK-LABEL: srem2x64:
; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
%tmp3 = srem <2 x i64> %A, %B;
ret <2 x i64> %tmp3
Expand Down Expand Up @@ -612,8 +612,8 @@ define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
; CHECK-LABEL: urem2x32:
; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
%tmp3 = urem <2 x i32> %A, %B;
ret <2 x i32> %tmp3
Expand Down Expand Up @@ -644,8 +644,8 @@ define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
; CHECK-LABEL: urem2x64:
; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
%tmp3 = urem <2 x i64> %A, %B;
ret <2 x i64> %tmp3
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/dp-3source.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s

define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
; CHECK-LABEL: test_madd32:
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/madd-lohi.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s

define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: test_128bitmul:
; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
; CHECK: madd x1, x1, x2, [[PART1]]
; CHECK: mul x0, x0, x2

; CHECK-BE-LABEL: test_128bitmul:
; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
; CHECK-BE: madd x0, x0, x3, [[PART1]]
; CHECK-BE: mul x1, x1, x3

%prod = mul i128 %lhs, %rhs
ret i128 %prod
}
13 changes: 6 additions & 7 deletions llvm/test/CodeGen/AArch64/mul-lohi.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s

; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s
define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: test_128bitmul:
; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
; CHECK: madd x1, x1, x2, [[PART1]]
; CHECK: mul [[PART2:x[0-9]+]], x1, x2
; CHECK: mul x0, x0, x2

; CHECK-BE-LABEL: test_128bitmul:
; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2
; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
; CHECK-BE: madd x0, x0, x3, [[PART1]]
; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3
; CHECK-BE: mul x1, x1, x3

%prod = mul i128 %lhs, %rhs
Expand Down