diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a2dcf9dde47b6..f87b9e38fbaae 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3100,6 +3100,12 @@ let Predicates = [HasSVEorSME] in { (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)), (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(bf16 (vector_extract (nxv8bf16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(bf16 (vector_extract (nxv4bf16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; + def : Pat<(bf16 (vector_extract (nxv2bf16 ZPR:$vec), GPR64:$index)), + (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)), (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)), @@ -3122,6 +3128,12 @@ let Predicates = [HasSVEorSME] in { (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; + def : Pat<(bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), + (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; + def : Pat<(bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)), + (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; + def : Pat<(bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), + (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)), @@ -3172,6 +3184,12 @@ let Predicates = [HasSVEorSME] in { (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)), (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv8bf16 ZPR:$Zs), (i64 0)), + (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv4bf16 ZPR:$Zs), (i64 0)), + (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; + def : Pat<(vector_extract (nxv2bf16 ZPR:$Zs), (i64 0)), + (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)), diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index 6ca9934a21caf..3385ad525c7ed 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -202,6 +202,93 @@ define half @test_lane2_2xf16( %a) #0 { ret half %b } +define bfloat @test_lane0_8xbf16( %a) #0 { +; CHECK-LABEL: test_lane0_8xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret bfloat %b +} + +define bfloat @test_lane7_8xbf16( %a) #0 { +; CHECK-LABEL: test_lane7_8xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[7] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + ret bfloat %b +} + +define bfloat @test_lane8_8xbf16( %a) #0 { +; CHECK-LABEL: test_lane8_8xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + ret bfloat %b +} + +define bfloat @test_lane0_4xbf16( %a) #0 { +; CHECK-LABEL: test_lane0_4xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret bfloat %b +} + +define bfloat @test_lane3_4xbf16( %a) #0 { +; CHECK-LABEL: test_lane3_4xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[3] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 3 + ret bfloat %b +} + +define bfloat @test_lane4_4xbf16( %a) #0 { +; CHECK-LABEL: test_lane4_4xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, z0.s[4] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 4 + ret bfloat %b +} + +define bfloat @test_lane0_2xbf16( %a) #0 { +; CHECK-LABEL: test_lane0_2xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + ret bfloat %b +} + +define bfloat @test_lane1_2xbf16( %a) #0 { +; CHECK-LABEL: test_lane1_2xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 1 + ret bfloat %b +} + +define bfloat @test_lane2_2xbf16( %a) #0 { +; CHECK-LABEL: test_lane2_2xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, z0.d[2] +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 +; CHECK-NEXT: ret + %b = extractelement %a, i32 2 + ret bfloat %b +} + define float @test_lane0_4xf32( %a) #0 { ; CHECK-LABEL: test_lane0_4xf32: ; CHECK: // %bb.0: @@ -366,6 +453,39 @@ define half @test_lanex_2xf16( %a, i32 %x) #0 { ret half %b } +define bfloat @test_lanex_8xbf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_8xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: whilels p0.h, xzr, x8 +; CHECK-NEXT: lastb h0, p0, z0.h +; CHECK-NEXT: ret + %b = extractelement %a, i32 %x + ret bfloat %b +} + +define bfloat @test_lanex_4xbf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_4xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: whilels p0.s, xzr, x8 +; CHECK-NEXT: lastb h0, p0, z0.h +; CHECK-NEXT: ret + %b = extractelement %a, i32 %x + ret bfloat %b +} + +define bfloat @test_lanex_2xbf16( %a, i32 %x) #0 { +; CHECK-LABEL: test_lanex_2xbf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: whilels p0.d, xzr, x8 +; CHECK-NEXT: lastb h0, p0, z0.h +; CHECK-NEXT: ret + %b = extractelement %a, i32 %x + ret bfloat %b +} + define float @test_lanex_4xf32( %a, i32 %x) #0 { ; CHECK-LABEL: test_lanex_4xf32: ; CHECK: // %bb.0: @@ -532,4 +652,4 @@ define i1 @test_lane4_2xi1( %a) #0 { declare i64 @llvm.vscale.i64() -attributes #0 = { "target-features"="+sve" } +attributes #0 = { "target-features"="+sve,+bf16" }