diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2da4431cf077e..75073accc8a54 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -294,6 +294,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // the atomic operations in order to exploit SystemZ instructions. setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); // Mark sign/zero extending atomic loads as legal, which will make // DAGCombiner fold extensions into atomic loads if possible. @@ -935,9 +936,6 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { - // Lower fp128 the same way as i128. - if (LI->getType()->isFP128Ty()) - return AtomicExpansionKind::CastToInteger; return AtomicExpansionKind::None; } @@ -4550,7 +4548,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); - assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128."); + assert( + (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && + "Only custom lowering i128 or f128."); // Use same code to handle both legal and non-legal i128 types. SmallVector Results; LowerOperationWrapper(Node, Results, DAG); @@ -6249,6 +6249,26 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, } } +// Manually lower a bitcast to avoid introducing illegal types after type +// legalization. +static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, + SDValue Chain, const SDLoc &SL) { + SDValue Hi = + DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src); + SDValue Lo = + DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src); + + Hi = DAG.getBitcast(MVT::f64, Hi); + Lo = DAG.getBitcast(MVT::f64, Lo); + + SDNode *Pair = DAG.getMachineNode( + SystemZ::REG_SEQUENCE, SL, MVT::f128, + {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, + DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, + DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); + return SDValue(Pair, 0); +} + // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). void @@ -6263,8 +6283,25 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, DL, Tys, Ops, MVT::i128, MMO); - Results.push_back(lowerGR128ToI128(DAG, Res)); - Results.push_back(Res.getValue(1)); + + EVT VT = N->getValueType(0); + + if (VT == MVT::i128 || isTypeLegal(MVT::i128)) { + SDValue Lowered = lowerGR128ToI128(DAG, Res); + Results.push_back(DAG.getBitcast(VT, Lowered)); + Results.push_back(Res.getValue(1)); + } else { + // For the f128 case, after type legalization, we cannot produce a bitcast + // with an illegal type (i.e. i128), so manually lower it. + // + // FIXME: Really v2i64 should be legal, and should be used in place of + // unttyped. Then we could emit the bitcast which will potentially fold + // into the use. + SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL); + Results.push_back(Cast); + Results.push_back(Res.getValue(1)); + } + break; } case ISD::ATOMIC_STORE: { diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll index c4f684ae718e8..15ff2daa0a384 100644 --- a/llvm/test/CodeGen/SystemZ/atomic-load-08.ll +++ b/llvm/test/CodeGen/SystemZ/atomic-load-08.ll @@ -1,20 +1,27 @@ -; Test long double atomic loads. These are emitted by the Clang FE as i128 -; loads with a bitcast, and this test case gets converted into that form as -; well by the AtomicExpand pass. +; Test long double atomic loads. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s - ; TODO: Is it worth testing softfp with vector? ; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s +; FIXME: Without vector support, v2i64 should be legal and we should +; introduce a simple bitcast, which could fold into the store use +; avoid the intermediate f registers. define void @f1(ptr %ret, ptr %src) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: lpq %r0, 0(%r3) -; CHECK-NEXT: stg %r1, 8(%r2) -; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: br %r14 +; Z13-NEXT: lpq %r0, 0(%r3) +; Z13-NEXT: stg %r1, 8(%r2) +; Z13-NEXT: stg %r0, 0(%r2) +; Z13-NEXT: br %r14 + +; BASE: lpq %r0, 0(%r3) +; BASE-NEXT: ldgr %f0, %r0 +; BASE-NEXT: ldgr %f2, %r1 +; BASE-NEXT: std %f0, 0(%r2) +; BASE-NEXT: std %f2, 8(%r2) +; BASE-NEXT: br %r14 ; SOFTFP-LABEL: f1: ; SOFTFP: # %bb.0: @@ -30,15 +37,10 @@ define void @f1(ptr %ret, ptr %src) { define void @f1_fpuse(ptr %ret, ptr %src) { ; CHECK-LABEL: f1_fpuse: ; CHECK: # %bb.0: -; BASE-NEXT: aghi %r15, -176 -; BASE-NEXT: .cfi_def_cfa_offset 336 - ; CHECK-NEXT: lpq %r0, 0(%r3) -; BASE-NEXT: stg %r1, 168(%r15) -; BASE-NEXT: stg %r0, 160(%r15) -; BASE-NEXT: ld %f0, 160(%r15) -; BASE-NEXT: ld %f2, 168(%r15) +; BASE-NEXT: ldgr %f0, %r0 +; BASE-NEXT: ldgr %f2, %r1 ; Z13-NEXT: vlvgp %v0, %r0, %r1 ; Z13-NEXT: vrepg %v2, %v0, 1 @@ -46,7 +48,6 @@ define void @f1_fpuse(ptr %ret, ptr %src) { ; CHECK-NEXT: axbr %f0, %f0 ; CHECK-NEXT: std %f0, 0(%r2) ; CHECK-NEXT: std %f2, 8(%r2) -; BASE-NEXT: aghi %r15, 176 ; CHECK-NEXT: br %r14