Skip to content

Commit

Permalink
[ARM] Switch to soft promoting half types. (#80440)
Browse files Browse the repository at this point in the history
The traditional promotion is known to generate wrong code.

Fixes #73805.
  • Loading branch information
hvdijk committed Feb 2, 2024
1 parent a4cd981 commit 52864d9
Show file tree
Hide file tree
Showing 16 changed files with 929 additions and 1,048 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9055,7 +9055,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);

if (getTypeAction(*DAG.getContext(), EltVT) ==
TargetLowering::TypePromoteFloat) {
TargetLowering::TypeSoftPromoteHalf) {
// INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
// but the type system will try to do that if we don't intervene.
// Reinterpret any such vector-element insertion as one with the
Expand All @@ -9065,7 +9065,7 @@ SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
assert(getTypeAction(*DAG.getContext(), IEltVT) !=
TargetLowering::TypePromoteFloat);
TargetLowering::TypeSoftPromoteHalf);

SDValue VecIn = Op.getOperand(0);
EVT VecVT = VecIn.getValueType();
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,10 @@ class VectorType;
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;

bool softPromoteHalfType() const override { return true; }

bool useFPRegsForHalfType() const override { return true; }

protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
Expand Down
1,098 changes: 490 additions & 608 deletions llvm/test/CodeGen/ARM/aes-erratum-fix.ll

Large diffs are not rendered by default.

155 changes: 95 additions & 60 deletions llvm/test/CodeGen/ARM/arm-half-promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,113 @@

define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() {
; CHECK-LABEL: _f1
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
}

define swiftcc { <8 x half>, <8 x half> } @f2() {
; CHECK-LABEL: _f2
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
}

define fastcc { <8 x half>, <8 x half> } @f3() {
; CHECK-LABEL: _f3
; CHECK: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, #5.000000e-01
; CHECK-NEXT: vmov.i32 d8, #0x0
; CHECK-NEXT: vmov.i32 d0, #0x0
; CHECK-NEXT: vmov.i32 d1, #0x0
; CHECK-NEXT: vmov.i32 d2, #0x0
; CHECK-NEXT: vmov.i32 d3, #0x0
; CHECK-NEXT: vmov.i32 d4, #0x0
; CHECK-NEXT: vmov.i32 d5, #0x0
; CHECK-NEXT: vmov.i32 d6, #0x0
; CHECK-NEXT: vmov.i32 d7, #0x0
; CHECK-NEXT: vmov.f32 s1, s16
; CHECK-NEXT: vmov.f32 s3, s16
; CHECK-NEXT: vmov.f32 s5, s16
; CHECK-NEXT: vmov.f32 s7, s16
; CHECK-NEXT: vmov.f32 s9, s16
; CHECK-NEXT: vmov.f32 s11, s16
; CHECK-NEXT: vmov.f32 s13, s16
; CHECK-NEXT: vmov.f32 s15, s16
; CHECK-NEXT: vpop {d8}
; CHECK: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q8, #0x0
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vmov d4, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[1]
; CHECK-NEXT: vmov d8, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[2]
; CHECK-NEXT: vmov d5, r0, r0
; CHECK-NEXT: vmov.u16 r0, d16[3]
; CHECK-NEXT: vmov d9, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[0]
; CHECK-NEXT: vmov d6, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[1]
; CHECK-NEXT: vmov d10, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[2]
; CHECK-NEXT: vmov d7, r0, r0
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vmov d11, r0, r0
; CHECK: vmov.f32 s0, s8
; CHECK: vmov.f32 s1, s16
; CHECK: vmov.f32 s2, s10
; CHECK: vmov.f32 s3, s18
; CHECK: vmov.f32 s4, s12
; CHECK: vmov.f32 s5, s20
; CHECK: vmov.f32 s6, s14
; CHECK: vmov.f32 s7, s22
; CHECK: vmov.f32 s9, s16
; CHECK: vmov.f32 s11, s18
; CHECK: vmov.f32 s13, s20
; CHECK: vmov.f32 s15, s22
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr

ret { <8 x half>, <8 x half> } zeroinitializer
Expand Down
40 changes: 0 additions & 40 deletions llvm/test/CodeGen/ARM/fp16-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,46 +46,6 @@ entry:
}

define <4 x half> @foo_vec(<4 x half> %a) {
; SOFT-LABEL: foo_vec:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: vmov s0, r3
; SOFT-NEXT: vmov s2, r1
; SOFT-NEXT: vcvtb.f32.f16 s0, s0
; SOFT-NEXT: vmov s4, r0
; SOFT-NEXT: vcvtb.f32.f16 s2, s2
; SOFT-NEXT: vmov s6, r2
; SOFT-NEXT: vcvtb.f32.f16 s4, s4
; SOFT-NEXT: vcvtb.f32.f16 s6, s6
; SOFT-NEXT: vadd.f32 s0, s0, s0
; SOFT-NEXT: vadd.f32 s2, s2, s2
; SOFT-NEXT: vcvtb.f16.f32 s0, s0
; SOFT-NEXT: vadd.f32 s4, s4, s4
; SOFT-NEXT: vcvtb.f16.f32 s2, s2
; SOFT-NEXT: vadd.f32 s6, s6, s6
; SOFT-NEXT: vcvtb.f16.f32 s4, s4
; SOFT-NEXT: vcvtb.f16.f32 s6, s6
; SOFT-NEXT: vmov r0, s4
; SOFT-NEXT: vmov r1, s2
; SOFT-NEXT: vmov r2, s6
; SOFT-NEXT: vmov r3, s0
; SOFT-NEXT: bx lr
;
; HARD-LABEL: foo_vec:
; HARD: @ %bb.0: @ %entry
; HARD-NEXT: vcvtb.f32.f16 s4, s3
; HARD-NEXT: vcvtb.f32.f16 s2, s2
; HARD-NEXT: vcvtb.f32.f16 s6, s1
; HARD-NEXT: vcvtb.f32.f16 s0, s0
; HARD-NEXT: vadd.f32 s2, s2, s2
; HARD-NEXT: vadd.f32 s0, s0, s0
; HARD-NEXT: vcvtb.f16.f32 s2, s2
; HARD-NEXT: vadd.f32 s4, s4, s4
; HARD-NEXT: vcvtb.f16.f32 s0, s0
; HARD-NEXT: vadd.f32 s6, s6, s6
; HARD-NEXT: vcvtb.f16.f32 s3, s4
; HARD-NEXT: vcvtb.f16.f32 s1, s6
; HARD-NEXT: bx lr
;
; FULL-SOFT-LE-LABEL: foo_vec:
; FULL-SOFT-LE: @ %bb.0: @ %entry
; FULL-SOFT-LE-NEXT: vmov d16, r0, r1
Expand Down

0 comments on commit 52864d9

Please sign in to comment.