Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18570,7 +18570,7 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
case MVT::f64:
return true;
case MVT::bf16:
return VT.isScalableVector() && Subtarget->hasSVEB16B16() &&
return VT.isScalableVector() && Subtarget->hasBF16() &&
Subtarget->isNonStreamingSVEorSME2Available();
default:
break;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2578,6 +2578,11 @@ let Predicates = [HasBF16, HasSVE_or_SME] in {
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;

def : Pat<(nxv4f32 (AArch64fmla_p (SVEAllActive), nxv4f32:$acc,
(nxv4f32 (AArch64fcvte_mt (SVEAllActive), nxv4bf16:$Zn, (undef))),
(nxv4f32 (AArch64fcvte_mt (SVEAllActive), nxv4bf16:$Zm, (undef))))),
(BFMLALB_ZZZ nxv4f32:$acc, ZPR:$Zn, ZPR:$Zm)>;

defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
} // End HasBF16, HasSVE_or_SME
Expand Down
28 changes: 11 additions & 17 deletions llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,10 @@ define <vscale x 2 x bfloat> @fmla_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
define <vscale x 4 x bfloat> @fmla_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c) {
; NOB16B16-LABEL: fmla_nxv4bf16:
; NOB16B16: // %bb.0:
; NOB16B16-NEXT: lsl z1.s, z1.s, #16
; NOB16B16-NEXT: lsl z0.s, z0.s, #16
; NOB16B16-NEXT: lsl z2.s, z2.s, #16
; NOB16B16-NEXT: ptrue p0.s
; NOB16B16-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s
; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z2.s
; NOB16B16-NEXT: ret
;
; B16B16-LABEL: fmla_nxv4bf16:
Expand All @@ -486,24 +484,20 @@ define <vscale x 4 x bfloat> @fmla_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
define <vscale x 8 x bfloat> @fmla_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) {
; NOB16B16-LABEL: fmla_nxv8bf16:
; NOB16B16: // %bb.0:
; NOB16B16-NEXT: uunpkhi z3.s, z1.h
; NOB16B16-NEXT: uunpkhi z4.s, z0.h
; NOB16B16-NEXT: uunpkhi z5.s, z2.h
; NOB16B16-NEXT: uunpkhi z3.s, z2.h
; NOB16B16-NEXT: uunpklo z2.s, z2.h
; NOB16B16-NEXT: uunpkhi z4.s, z1.h
; NOB16B16-NEXT: uunpkhi z5.s, z0.h
; NOB16B16-NEXT: uunpklo z1.s, z1.h
; NOB16B16-NEXT: uunpklo z0.s, z0.h
; NOB16B16-NEXT: uunpklo z2.s, z2.h
; NOB16B16-NEXT: ptrue p0.s
; NOB16B16-NEXT: lsl z3.s, z3.s, #16
; NOB16B16-NEXT: lsl z4.s, z4.s, #16
; NOB16B16-NEXT: lsl z5.s, z5.s, #16
; NOB16B16-NEXT: lsl z1.s, z1.s, #16
; NOB16B16-NEXT: lsl z0.s, z0.s, #16
; NOB16B16-NEXT: lsl z2.s, z2.s, #16
; NOB16B16-NEXT: fmad z3.s, p0/m, z4.s, z5.s
; NOB16B16-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; NOB16B16-NEXT: bfcvt z1.h, p0/m, z3.s
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z0.s
; NOB16B16-NEXT: uzp1 z0.h, z0.h, z1.h
; NOB16B16-NEXT: bfmlalb z3.s, z5.h, z4.h
; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
; NOB16B16-NEXT: bfcvt z0.h, p0/m, z3.s
; NOB16B16-NEXT: bfcvt z1.h, p0/m, z2.s
; NOB16B16-NEXT: uzp1 z0.h, z1.h, z0.h
; NOB16B16-NEXT: ret
;
; B16B16-LABEL: fmla_nxv8bf16:
Expand Down
Loading