Skip to content

Commit

Permalink
[ARM] FP16 codegen support for VSEL
Browse files Browse the repository at this point in the history
This implements lowering of SELECT_CC for f16s, which enables
codegen of VSEL with f16 types.

Differential Revision: https://reviews.llvm.org/D44518

llvm-svn: 327695
  • Loading branch information
Sjoerd Meijer committed Mar 16, 2018
1 parent 9f373a3 commit d391a1a
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -1048,6 +1048,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrVFP.td
Expand Up @@ -451,9 +451,9 @@ multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
Uses = [CPSR], AddedComplexity = 4 in {
def H : AHbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
[]>,
[(set HPR:$Sd, (ARMcmov HPR:$Sm, HPR:$Sn, CC))]>,
Requires<[HasFullFP16]>;

def S : ASbInp<0b11100, opc, 0,
Expand Down
40 changes: 39 additions & 1 deletion llvm/test/CodeGen/ARM/fp16-instructions.ll
Expand Up @@ -687,18 +687,56 @@ entry:
; CHECK-HARDFP-FULLFP16: vnmul.f16 s0, s0, s1
}

; TODO:
; 28. VRINTA
; 29. VRINTM
; 30. VRINTN
; 31. VRINTP
; 32. VRINTR
; 33. VRINTX
; 34. VRINTZ

; 35. VSELEQ
define half @select_cc1() {
%1 = fcmp nsz oeq half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2

; CHECK-LABEL: select_cc1:
; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}}
}

; 36. VSELGE
define half @select_cc2() {
%1 = fcmp nsz oge half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2

; CHECK-LABEL: select_cc2:
; CHECK-HARDFP-FULLFP16: vselge.f16 s0, s{{.}}, s{{.}}
}

; 37. VSELGT
define half @select_cc3() {
%1 = fcmp nsz ogt half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2

; CHECK-LABEL: select_cc3:
; CHECK-HARDFP-FULLFP16: vselgt.f16 s0, s{{.}}, s{{.}}
}

; 38. VSELVS
; 39. VSQRT
define half @select_cc4() {
%1 = fcmp nsz ueq half undef, 0xH0001
%2 = select i1 %1, half 0xHC000, half 0xH0002
ret half %2

; CHECK-LABEL: select_cc4:
; CHECK-HARDFP-FULLFP16: vselvs.f16 s0, s{{.}}, s{{.}}
}

; 39. VSQRT - TODO

; 40. VSUB
define float @Sub(float %a.coerce, float %b.coerce) {
Expand Down

0 comments on commit d391a1a

Please sign in to comment.