@@ -540,7 +540,7 @@ define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
540540; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h
541541; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
542542; NOB16B16-STREAMING-NEXT: ret
543- %res = fmul <vscale x 2 x bfloat> %a , %b
543+ %res = fmul nsz <vscale x 2 x bfloat> %a , %b
544544 ret <vscale x 2 x bfloat> %res
545545}
546546
@@ -565,7 +565,7 @@ define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
565565; NOB16B16-STREAMING-NEXT: bfmlalb z2.s, z0.h, z1.h
566566; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
567567; NOB16B16-STREAMING-NEXT: ret
568- %res = fmul <vscale x 4 x bfloat> %a , %b
568+ %res = fmul nsz <vscale x 4 x bfloat> %a , %b
569569 ret <vscale x 4 x bfloat> %res
570570}
571571
@@ -596,6 +596,27 @@ define <vscale x 8 x bfloat> @fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x
596596; NOB16B16-STREAMING-NEXT: bfcvt z0.h, p0/m, z2.s
597597; NOB16B16-STREAMING-NEXT: bfcvtnt z0.h, p0/m, z3.s
598598; NOB16B16-STREAMING-NEXT: ret
599+ %res = fmul nsz <vscale x 8 x bfloat> %a , %b
600+ ret <vscale x 8 x bfloat> %res
601+ }
602+
603+ define <vscale x 8 x bfloat> @fmul_nxv8bf16_no_nsz (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) {
604+ ; NOB16B16-LABEL: fmul_nxv8bf16_no_nsz:
605+ ; NOB16B16: // %bb.0:
606+ ; NOB16B16-NEXT: mov w8, #-2147483648 // =0x80000000
607+ ; NOB16B16-NEXT: ptrue p0.s
608+ ; NOB16B16-NEXT: mov z2.s, w8
609+ ; NOB16B16-NEXT: mov z3.d, z2.d
610+ ; NOB16B16-NEXT: bfmlalb z2.s, z0.h, z1.h
611+ ; NOB16B16-NEXT: bfmlalt z3.s, z0.h, z1.h
612+ ; NOB16B16-NEXT: bfcvt z0.h, p0/m, z2.s
613+ ; NOB16B16-NEXT: bfcvtnt z0.h, p0/m, z3.s
614+ ; NOB16B16-NEXT: ret
615+ ;
616+ ; B16B16-LABEL: fmul_nxv8bf16_no_nsz:
617+ ; B16B16: // %bb.0:
618+ ; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
619+ ; B16B16-NEXT: ret
599620 %res = fmul <vscale x 8 x bfloat> %a , %b
600621 ret <vscale x 8 x bfloat> %res
601622}
0 commit comments