diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f5f732da99349..2325363bc2e0d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19988,7 +19988,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, return Res; EVT VT = N->getValueType(0); - if (VT != MVT::f32 && VT != MVT::f64) + if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + if (VT == MVT::f16 && !Subtarget->hasFullFP16()) return SDValue(); // Only optimize when the source and destination types have the same width. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 84f0d47c02bad..7297ffc80d3a8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7014,6 +7014,19 @@ multiclass UIntToFPROLoadPat; } +let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in { +defm : UIntToFPROLoadPat; +def : Pat <(f16 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f16 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +} + defm : UIntToFPROLoadPat; def : Pat <(f32 (uint_to_fp (i32 diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll index 478ccf58f32c5..56e4b1988b8d1 100644 --- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll +++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll @@ -43,11 +43,17 @@ entry: } define half @ui8_to_half(ptr %i, ptr %f) { -; CHECK-LABEL: ui8_to_half: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ucvtf h0, w8 -; CHECK-NEXT: ret +; NEON-ENABLED-LABEL: ui8_to_half: +; NEON-ENABLED: // %bb.0: // %entry +; NEON-ENABLED-NEXT: ldr b0, [x0] +; NEON-ENABLED-NEXT: ucvtf h0, h0 +; NEON-ENABLED-NEXT: ret +; +; NEON-DISABLED-LABEL: ui8_to_half: +; NEON-DISABLED: // %bb.0: // %entry +; NEON-DISABLED-NEXT: ldrb w8, [x0] +; NEON-DISABLED-NEXT: ucvtf h0, w8 +; NEON-DISABLED-NEXT: ret entry: %ld = load i8, ptr %i, align 1 %conv = uitofp i8 %ld to half @@ -91,11 +97,17 @@ entry: } define half @ui16_to_half(ptr %i, ptr %f) { -; CHECK-LABEL: ui16_to_half: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ucvtf h0, w8 -; CHECK-NEXT: ret +; NEON-ENABLED-LABEL: ui16_to_half: +; NEON-ENABLED: // %bb.0: // %entry +; NEON-ENABLED-NEXT: ldr h0, [x0] +; NEON-ENABLED-NEXT: ucvtf h0, h0 +; NEON-ENABLED-NEXT: ret +; +; NEON-DISABLED-LABEL: ui16_to_half: +; NEON-DISABLED: // %bb.0: // %entry +; NEON-DISABLED-NEXT: ldrh w8, [x0] +; NEON-DISABLED-NEXT: ucvtf h0, w8 +; NEON-DISABLED-NEXT: ret entry: %ld = load i16, ptr %i, align 1 %conv = uitofp i16 %ld to half @@ -277,11 +289,17 @@ entry: } define half @si16_to_half(ptr %i, ptr %f) { -; CHECK-LABEL: si16_to_half: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsh w8, [x0] -; CHECK-NEXT: scvtf h0, w8 -; CHECK-NEXT: ret +; NEON-ENABLED-LABEL: si16_to_half: +; NEON-ENABLED: // %bb.0: // %entry +; NEON-ENABLED-NEXT: ldr h0, [x0] +; NEON-ENABLED-NEXT: scvtf h0, h0 +; NEON-ENABLED-NEXT: ret +; +; NEON-DISABLED-LABEL: si16_to_half: +; NEON-DISABLED: // %bb.0: // %entry +; NEON-DISABLED-NEXT: ldrsh w8, [x0] +; NEON-DISABLED-NEXT: scvtf h0, w8 +; NEON-DISABLED-NEXT: ret entry: %ld = load i16, ptr %i, align 1 %conv = sitofp i16 %ld to half diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index caf87a13f283b..56dd08feb2e15 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -1090,11 +1090,17 @@ define half @stofp_load_i16_f16(ptr %p) { ; CHECK-NOFP16-NEXT: fcvt h0, s0 ; CHECK-NOFP16-NEXT: ret ; -; CHECK-FP16-LABEL: stofp_load_i16_f16: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: ldrsh w8, [x0] -; CHECK-FP16-NEXT: scvtf h0, w8 -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: stofp_load_i16_f16: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: ldr h0, [x0] +; CHECK-FP16-SD-NEXT: scvtf h0, h0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-FP16-GI-LABEL: stofp_load_i16_f16: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: ldrsh w8, [x0] +; CHECK-FP16-GI-NEXT: scvtf h0, w8 +; CHECK-FP16-GI-NEXT: ret entry: %a = load i16, ptr %p %c = sitofp i16 %a to half @@ -1109,11 +1115,17 @@ define half @utofp_load_i16_f16(ptr %p) { ; CHECK-NOFP16-NEXT: fcvt h0, s0 ; CHECK-NOFP16-NEXT: ret ; -; CHECK-FP16-LABEL: utofp_load_i16_f16: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: ldrh w8, [x0] -; CHECK-FP16-NEXT: ucvtf h0, w8 -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: utofp_load_i16_f16: +; CHECK-FP16-SD: // %bb.0: // %entry +; CHECK-FP16-SD-NEXT: ldr h0, [x0] +; CHECK-FP16-SD-NEXT: ucvtf h0, h0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-FP16-GI-LABEL: utofp_load_i16_f16: +; CHECK-FP16-GI: // %bb.0: // %entry +; CHECK-FP16-GI-NEXT: ldrh w8, [x0] +; CHECK-FP16-GI-NEXT: ucvtf h0, w8 +; CHECK-FP16-GI-NEXT: ret entry: %a = load i16, ptr %p %c = uitofp i16 %a to half @@ -1149,8 +1161,8 @@ define half @utofp_load_i8_f16(ptr %p) { ; ; CHECK-FP16-LABEL: utofp_load_i8_f16: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: ldrb w8, [x0] -; CHECK-FP16-NEXT: ucvtf h0, w8 +; CHECK-FP16-NEXT: ldr b0, [x0] +; CHECK-FP16-NEXT: ucvtf h0, h0 ; CHECK-FP16-NEXT: ret entry: %a = load i8, ptr %p