diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 6648d3928c1f7..9abffae413d4e 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -531,6 +531,30 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } +bool VETargetLowering::hasAndNot(SDValue Y) const { + EVT VT = Y.getValueType(); + + // VE doesn't have vector and not instruction. + if (VT.isVector()) + return false; + + // VE allows different immediate values for X and Y where ~X & Y. + // Only simm7 works for X, and only mimm works for Y on VE. However, this + // function is used to check whether an immediate value is OK for and-not + // instruction as both X and Y. Generating additional instruction to + // retrieve an immediate value is no good since the purpose of this + // function is to convert a series of 3 instructions to another series of + // 3 instructions with better parallelism. Therefore, we return false + // for all immediate values now. + // FIXME: Change hasAndNot function to have two operands to make it work + // correctly with Aurora VE. + if (auto *C = dyn_cast(Y)) + return false; + + // It's ok for generic registers. + return true; +} + VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 097960f05a830..4633220efaa18 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -101,6 +101,8 @@ class VETargetLowering : public TargetLowering { // Block s/udiv lowering for now bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; } + + bool hasAndNot(SDValue Y) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index e92cbb1e6ca21..76b3c657c124c 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1025,7 +1025,9 @@ let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>; defm EQV : RRm<"eqv", 0x47, I64, i64>; // Section 8.5.5 - NND (Negate AND) -defm NND : RRNCm<"nnd", 0x54, I64, i64>; +def and_not : PatFrags<(ops node:$x, node:$y), + [(and (not node:$x), node:$y)]>; +defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>; // Section 8.5.6 - MRG (Merge) defm MRG : RRMRGm<"mrg", 0x56, I64, i64>; diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll index 82df4ee109c51..4b79a0f988e86 100644 --- a/llvm/test/CodeGen/VE/cttz.ll +++ b/llvm/test/CodeGen/VE/cttz.ll @@ -4,8 +4,7 @@ define i64 @func1(i64 %p) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea %s1, -1(, %s0) -; CHECK-NEXT: xor %s0, -1, %s0 -; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: nnd %s0, %s0, %s1 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i64 @llvm.cttz.i64(i64 %p, i1 true) diff --git a/llvm/test/CodeGen/VE/nnd.ll b/llvm/test/CodeGen/VE/nnd.ll new file mode 100644 index 0000000000000..aea10d4834cdd --- /dev/null +++ b/llvm/test/CodeGen/VE/nnd.ll @@ -0,0 +1,225 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func8s(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: func8s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %not, %b + ret i8 %res +} + +define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: func8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %b, %not + ret i8 %res +} + +define signext i8 @funci8s(i8 signext %a) { +; CHECK-LABEL: funci8s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, 5, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 %not, 5 + ret i8 %res +} + +define zeroext i8 @funci8z(i8 zeroext %a) { +; CHECK-LABEL: funci8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: lea %s1, 251 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i8 %a, -1 + %res = and i8 -5, %not + ret i8 %res +} + +define signext i16 @func16s(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: func16s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %not, %b + ret i16 %res +} + +define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %b, %not + ret i16 %res +} + +define signext i16 @funci16s(i16 signext %a) { +; CHECK-LABEL: funci16s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 %not, 65535 + ret i16 %res +} + +define zeroext i16 @funci16z(i16 zeroext %a) { +; CHECK-LABEL: funci16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (52)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i16 %a, -1 + %res = and i16 4095, %not + ret i16 %res +} + +define signext i32 @func32s(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: func32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, %b + ret i32 %res +} + +define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: func32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, %b + ret i32 %res +} + +define signext i32 @funci32s(i32 signext %a) { +; CHECK-LABEL: funci32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, 268435455 + ret i32 %res +} + +define zeroext i32 @funci32z(i32 zeroext %a) { +; CHECK-LABEL: funci32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i32 %a, -1 + %res = and i32 %not, 268435455 + ret i32 %res +} + +define i64 @func64(i64 %a, i64 %b) { +; CHECK-LABEL: func64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i64 %a, -1 + %res = and i64 %not, %b + ret i64 %res +} + +define i64 @func64_2(i64 %a, i64 %b) { +; CHECK-LABEL: func64_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i64 %b, -1 + %res = and i64 %not, %a + ret i64 %res +} + +define i64 @func64i(i64 %a) { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, (24)0 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i64 %a, -1 + %res = and i64 %not, 1099511627775 + ret i64 %res +} + +define i128 @func128(i128 %a, i128 %b) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s2 +; CHECK-NEXT: nnd %s1, %s1, %s3 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i128 %a, -1 + %res = and i128 %b, %not + ret i128 %res +} + +define i128 @funci128(i128 %a) { +; CHECK-LABEL: funci128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %not = xor i128 %a, -1 + %res = and i128 %not, 5 + ret i128 %res +} + +define i64 @func64_nnd_fold(i64 %x, i64 %y, i64 %m) { +; CHECK-LABEL: func64_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s1, %s2, %s1 +; CHECK-NEXT: and %s0, %s0, %s2 +; CHECK-NEXT: or %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, %y + %A = and i64 %D, %m + %res = xor i64 %A, %y + ret i64 %res +} + +define i64 @func64iy_nnd_fold(i64 %x, i64 %m) { +; CHECK-LABEL: func64iy_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s1, -64, %s1 +; CHECK-NEXT: nnd %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, -64 + %A = and i64 %D, %m + %res = xor i64 %A, -64 + ret i64 %res +} + +define i64 @func64im_nnd_fold(i64 %x, i64 %y) { +; CHECK-LABEL: func64im_nnd_fold: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: and %s0, 30, %s0 +; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %D = xor i64 %x, %y + %A = and i64 %D, 30 + %res = xor i64 %A, %y + ret i64 %res +}