Skip to content

Commit

Permalink
[SystemZ] Allow fp/int casting with inline assembly operands.
Browse files Browse the repository at this point in the history
Support bitcasting between int/fp/vector values and 'r'/'f'/'v' inline
assembly operands. This is intended to match GCCs beahvior.

Reviewed By: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D146059
  • Loading branch information
JonPsson committed Mar 24, 2023
1 parent d820772 commit b4b4950
Show file tree
Hide file tree
Showing 7 changed files with 994 additions and 31 deletions.
65 changes: 34 additions & 31 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1105,19 +1105,18 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'd': // Data register (equivalent to 'r')
case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
break;

case 'f': // Floating-point register
if (type->isFloatingPointTy())
weight = CW_Register;
if (!useSoftFloat())
weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
break;

case 'v': // Vector register
if ((type->isVectorTy() || type->isFloatingPointTy()) &&
Subtarget.hasVector())
weight = CW_Register;
if (Subtarget.hasVector())
weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
: CW_Default;
break;

case 'I': // Unsigned 8-bit constant
Expand Down Expand Up @@ -1179,9 +1178,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
default: break;
case 'd': // Data register (equivalent to 'r')
case 'r': // General-purpose register
if (VT == MVT::i64)
if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::GR64BitRegClass);
else if (VT == MVT::i128)
else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &SystemZ::GR128BitRegClass);
return std::make_pair(0U, &SystemZ::GR32BitRegClass);

Expand All @@ -1197,34 +1196,42 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(

case 'f': // Floating-point register
if (!useSoftFloat()) {
if (VT == MVT::f64)
if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
else if (VT == MVT::f128)
else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &SystemZ::FP128BitRegClass);
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
}
break;

case 'v': // Vector register
if (Subtarget.hasVector()) {
if (VT == MVT::f32)
if (VT.getSizeInBits() == 32)
return std::make_pair(0U, &SystemZ::VR32BitRegClass);
if (VT == MVT::f64)
if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::VR64BitRegClass);
return std::make_pair(0U, &SystemZ::VR128BitRegClass);
}
break;
}
}
if (Constraint.size() > 0 && Constraint[0] == '{') {

// A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
// to check the size on.
auto getVTSizeInBits = [&VT]() {
return VT == MVT::Other ? 0 : VT.getSizeInBits();
};

// We need to override the default register parsing for GPRs and FPRs
// because the interpretation depends on VT. The internal names of
// the registers are also different from the external names
// (F0D and F0S instead of F0, etc.).
if (Constraint[1] == 'r') {
if (VT == MVT::i32)
if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
SystemZMC::GR32Regs, 16);
if (VT == MVT::i128)
if (getVTSizeInBits() == 128)
return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
SystemZMC::GR128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
Expand All @@ -1234,10 +1241,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
if (useSoftFloat())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
if (VT == MVT::f128)
if (getVTSizeInBits() == 128)
return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
SystemZMC::FP128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
Expand All @@ -1247,10 +1254,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
if (!Subtarget.hasVector())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
if (VT == MVT::f32)
if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
if (VT == MVT::f64)
if (getVTSizeInBits() == 64)
return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
SystemZMC::VR64Regs, 32);
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
Expand Down Expand Up @@ -1453,28 +1460,24 @@ bool SystemZTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
EVT ValueVT = Val.getValueType();
assert((ValueVT != MVT::i128 ||
((NumParts == 1 && PartVT == MVT::Untyped) ||
(NumParts == 2 && PartVT == MVT::i64))) &&
"Unknown handling of i128 value.");
if (ValueVT == MVT::i128 && NumParts == 1) {
if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
// Inline assembly operand.
Parts[0] = lowerI128ToGR128(DAG, Val);
Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
return true;
}

return false;
}

SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
assert((ValueVT != MVT::i128 ||
((NumParts == 1 && PartVT == MVT::Untyped) ||
(NumParts == 2 && PartVT == MVT::i64))) &&
"Unknown handling of i128 value.");
if (ValueVT == MVT::i128 && NumParts == 1)
if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
// Inline assembly operand.
return lowerGR128ToI128(DAG, Parts[0]);
SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
return DAG.getBitcast(ValueVT, Res);
}

return SDValue();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: not llc -mtriple=s390x-linux-gnu -mcpu=z15 -mattr=soft-float < %s 2>&1 | FileCheck %s

; CHECK: error: couldn't allocate output register for constraint 'f'

define signext i32 @int_and_f(i32 signext %cc_dep1) {
entry:
%0 = tail call i32 asm sideeffect "", "=f,0"(i32 %cc_dep1)
ret i32 %0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=zEC12 < %s | FileCheck %s
;
; Test inline assembly where the operand is bitcasted.

define signext i32 @int_and_f(i32 signext %cc_dep1) {
; CHECK-LABEL: int_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32
; CHECK-NEXT: ldgr %f1, %r0
; CHECK-NEXT: # kill: def $f1s killed $f1s killed $f1d
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # kill: def $f1s killed $f1s def $f1d
; CHECK-NEXT: lgdr %r0, %f1
; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32
; CHECK-NEXT: lgfr %r2, %r0
; CHECK-NEXT: br %r14
entry:
%0 = tail call i32 asm sideeffect "", "={f1},0"(i32 %cc_dep1)
ret i32 %0
}

define i64 @long_and_f(i64 %cc_dep1) {
; CHECK-LABEL: long_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ldgr %f2, %r2
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lgdr %r2, %f2
; CHECK-NEXT: br %r14
entry:
%0 = tail call i64 asm sideeffect "", "={f2},0"(i64 %cc_dep1)
ret i64 %0
}

define void @__int128_and_f(ptr noalias nocapture writeonly sret(i128) align 8 %agg.result, ptr %0) {
; CHECK-LABEL: __int128_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ld %f1, 0(%r3)
; CHECK-NEXT: ld %f3, 8(%r3)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: std %f1, 0(%r2)
; CHECK-NEXT: std %f3, 8(%r2)
; CHECK-NEXT: br %r14
entry:
%cc_dep1 = load i128, ptr %0, align 8
%1 = tail call i128 asm sideeffect "", "={f1},0"(i128 %cc_dep1)
store i128 %1, ptr %agg.result, align 8
ret void
}

define float @float_and_r(float %cc_dep1) {
; CHECK-LABEL: float_and_r:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d
; CHECK-NEXT: lgdr %r0, %f0
; CHECK-NEXT: risblg %r2, %r0, 0, 159, 32
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: risbhg %r0, %r2, 0, 159, 32
; CHECK-NEXT: ldgr %f0, %r0
; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d
; CHECK-NEXT: br %r14
entry:
%0 = tail call float asm sideeffect "", "={r2},0"(float %cc_dep1)
ret float %0
}

define double @double_and_r(double %cc_dep1) {
; CHECK-LABEL: double_and_r:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgdr %r3, %f0
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ldgr %f0, %r3
; CHECK-NEXT: br %r14
entry:
%0 = tail call double asm sideeffect "", "={r3},0"(double %cc_dep1)
ret double %0
}

define void @longdouble_and_r(ptr noalias nocapture writeonly sret(fp128) align 8 %agg.result, ptr %0) {
; CHECK-LABEL: longdouble_and_r:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lg %r5, 8(%r3)
; CHECK-NEXT: lg %r4, 0(%r3)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: stg %r5, 8(%r2)
; CHECK-NEXT: stg %r4, 0(%r2)
; CHECK-NEXT: br %r14
entry:
%cc_dep1 = load fp128, ptr %0, align 8
%1 = tail call fp128 asm sideeffect "", "={r4},0"(fp128 %cc_dep1)
store fp128 %1, ptr %agg.result, align 8
ret void
}

define <2 x i16> @vec32_and_r(<2 x i16> %cc_dep1) {
; CHECK-LABEL: vec32_and_r:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $r3l killed $r3l def $r3d
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: risbgn %r3, %r2, 32, 47, 16
; CHECK-NEXT: # kill: def $r3l killed $r3l killed $r3d
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: srlk %r2, %r3, 16
; CHECK-NEXT: br %r14
entry:
%0 = tail call <2 x i16> asm sideeffect "", "={r3},0"(<2 x i16> %cc_dep1)
ret <2 x i16> %0
}

define <2 x i32> @vec64_and_r(<2 x i32> %cc_dep1) {
; CHECK-LABEL: vec64_and_r:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: sllg %r5, %r2, 32
; CHECK-NEXT: lr %r5, %r3
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lgr %r3, %r5
; CHECK-NEXT: srlg %r2, %r5, 32
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
; CHECK-NEXT: # kill: def $r3l killed $r3l killed $r3d
; CHECK-NEXT: br %r14
entry:
%0 = tail call <2 x i32> asm sideeffect "", "={r5},0"(<2 x i32> %cc_dep1)
ret <2 x i32> %0
}

define <2 x i16> @vec32_and_f(<2 x i16> %cc_dep1) {
; CHECK-LABEL: vec32_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $r3l killed $r3l def $r3d
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: risbgn %r3, %r2, 32, 47, 16
; CHECK-NEXT: risbhg %r0, %r3, 0, 159, 32
; CHECK-NEXT: ldgr %f3, %r0
; CHECK-NEXT: # kill: def $f3s killed $f3s killed $f3d
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: # kill: def $f3s killed $f3s def $f3d
; CHECK-NEXT: lgdr %r0, %f3
; CHECK-NEXT: risblg %r3, %r0, 0, 159, 32
; CHECK-NEXT: srlk %r2, %r3, 16
; CHECK-NEXT: br %r14
entry:
%0 = tail call <2 x i16> asm sideeffect "", "={f3},0"(<2 x i16> %cc_dep1)
ret <2 x i16> %0
}

define <2 x i32> @vec64_and_f(<2 x i32> %cc_dep1) {
; CHECK-LABEL: vec64_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: sllg %r0, %r2, 32
; CHECK-NEXT: lr %r0, %r3
; CHECK-NEXT: ldgr %f4, %r0
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lgdr %r3, %f4
; CHECK-NEXT: srlg %r2, %r3, 32
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
; CHECK-NEXT: # kill: def $r3l killed $r3l killed $r3d
; CHECK-NEXT: br %r14
entry:
%0 = tail call <2 x i32> asm sideeffect "", "={f4},0"(<2 x i32> %cc_dep1)
ret <2 x i32> %0
}

define <4 x i32> @vec128_and_f(<4 x i32> %cc_dep1) {
; CHECK-LABEL: vec128_and_f:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: aghi %r15, -176
; CHECK-NEXT: .cfi_def_cfa_offset 336
; CHECK-NEXT: # kill: def $r4l killed $r4l def $r4d
; CHECK-NEXT: sllg %r0, %r4, 32
; CHECK-NEXT: lr %r0, %r5
; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
; CHECK-NEXT: stg %r0, 168(%r15)
; CHECK-NEXT: sllg %r0, %r2, 32
; CHECK-NEXT: lr %r0, %r3
; CHECK-NEXT: stg %r0, 160(%r15)
; CHECK-NEXT: ld %f0, 160(%r15)
; CHECK-NEXT: ld %f2, 168(%r15)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lgdr %r3, %f0
; CHECK-NEXT: lgdr %r5, %f2
; CHECK-NEXT: srlg %r2, %r3, 32
; CHECK-NEXT: srlg %r4, %r5, 32
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
; CHECK-NEXT: # kill: def $r3l killed $r3l killed $r3d
; CHECK-NEXT: # kill: def $r4l killed $r4l killed $r4d
; CHECK-NEXT: # kill: def $r5l killed $r5l killed $r5d
; CHECK-NEXT: aghi %r15, 176
; CHECK-NEXT: br %r14
entry:
%0 = tail call <4 x i32> asm sideeffect "", "={f0},0"(<4 x i32> %cc_dep1)
ret <4 x i32> %0
}

Loading

0 comments on commit b4b4950

Please sign in to comment.