From 78cf616629af0edde9cb932dbe9a2670d124620f Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 16 Sep 2025 19:18:46 +0100 Subject: [PATCH 1/2] [AArch64] Add patterns for extending loads bitcast to an vector. Similar to <>, we can convert a extending load + bitcast to vector to a b/h/s register load, tha also zeros the top parts of the register. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 +++- llvm/test/CodeGen/AArch64/aarch64-addv.ll | 9 +- llvm/test/CodeGen/AArch64/bitcast-extend.ll | 144 +++++++++++--------- llvm/test/CodeGen/AArch64/dp1.ll | 6 +- 4 files changed, 108 insertions(+), 80 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6cea453f271be..c1570ba5be7d8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4026,6 +4026,27 @@ def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))), (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +// Similar to the patterns above we can turn a bitcast zextload to a vector type into a FPR load. +multiclass BitcastLoad { + def : Pat <(VT (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; + def : Pat <(VT (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; + def : Pat <(VT (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))), + (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +} +let Predicates = [IsLE] in { + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; + defm : BitcastLoad; +} + + // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, @@ -4172,13 +4193,13 @@ def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; //--- diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll index d9180a28bd40b..4c168e3306500 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -501,8 +501,7 @@ define i16 @addv_zero_lanes_v4i16(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: addv h0, v0.4h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -521,8 +520,7 @@ define i8 @addv_zero_lanes_v8i8(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v8i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: addv b0, v0.8b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -633,8 +631,7 @@ define i32 @addv_zero_lanes_v2i32(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr w8, [x0] -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ldr s0, [x0] ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 741dcf3ad4c2f..1ecc945eb107c 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -291,8 +291,7 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) { define <8 x i8> @load_zext_i8_v8i8(ptr %p) { ; CHECK-LABEL: load_zext_i8_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -303,8 +302,7 @@ define <8 x i8> @load_zext_i8_v8i8(ptr %p) { define <8 x i8> @load_zext_i16_v8i8(ptr %p) { ; CHECK-LABEL: load_zext_i16_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -315,8 +313,7 @@ define <8 x i8> @load_zext_i16_v8i8(ptr %p) { define <8 x i8> @load_zext_i32_v8i8(ptr %p) { ; CHECK-LABEL: load_zext_i32_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -360,8 +357,7 @@ define <16 x i8> @load_zext_v16i8(ptr %p) { define <4 x i16> @load_zext_i8_v4i16(ptr %p) { ; CHECK-LABEL: load_zext_i8_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -372,8 +368,7 @@ define <4 x i16> @load_zext_i8_v4i16(ptr %p) { define <4 x i16> @load_zext_i16_v4i16(ptr %p) { ; CHECK-LABEL: load_zext_i16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -384,8 +379,7 @@ define <4 x i16> @load_zext_i16_v4i16(ptr %p) { define <4 x i16> @load_zext_i32_v4i16(ptr %p) { ; CHECK-LABEL: load_zext_i32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -396,8 +390,7 @@ define <4 x i16> @load_zext_i32_v4i16(ptr %p) { define <2 x i32> @load_zext_i8_v2i32(ptr %p) { ; CHECK-LABEL: load_zext_i8_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -408,8 +401,7 @@ define <2 x i32> @load_zext_i8_v2i32(ptr %p) { define <2 x i32> @load_zext_i16_v2i32(ptr %p) { ; CHECK-LABEL: load_zext_i16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -420,8 +412,7 @@ define <2 x i32> @load_zext_i16_v2i32(ptr %p) { define <2 x i32> @load_zext_i32_v2i32(ptr %p) { ; CHECK-LABEL: load_zext_i32_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -430,11 +421,16 @@ define <2 x i32> @load_zext_i32_v2i32(ptr %p) { } define <1 x i64> @load_zext_i8_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i8_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -442,11 +438,16 @@ define <1 x i64> @load_zext_i8_v1i64(ptr %p) { } define <1 x i64> @load_zext_i16_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i16_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -454,11 +455,16 @@ define <1 x i64> @load_zext_i16_v1i64(ptr %p) { } define <1 x i64> @load_zext_i32_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i32_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -469,8 +475,7 @@ define <1 x i64> @load_zext_i32_v1i64(ptr %p) { define <4 x half> @load_zext_i8_v4f16(ptr %p) { ; CHECK-LABEL: load_zext_i8_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -481,8 +486,7 @@ define <4 x half> @load_zext_i8_v4f16(ptr %p) { define <4 x half> @load_zext_i16_v4f16(ptr %p) { ; CHECK-LABEL: load_zext_i16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -493,8 +497,7 @@ define <4 x half> @load_zext_i16_v4f16(ptr %p) { define <4 x half> @load_zext_i32_v4f16(ptr %p) { ; CHECK-LABEL: load_zext_i32_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -505,8 +508,7 @@ define <4 x half> @load_zext_i32_v4f16(ptr %p) { define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i8_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -517,8 +519,7 @@ define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) { define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i16_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -529,8 +530,7 @@ define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) { define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i32_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -541,8 +541,7 @@ define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) { define <2 x float> @load_zext_i8_v2f32(ptr %p) { ; CHECK-LABEL: load_zext_i8_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -553,8 +552,7 @@ define <2 x float> @load_zext_i8_v2f32(ptr %p) { define <2 x float> @load_zext_i16_v2f32(ptr %p) { ; CHECK-LABEL: load_zext_i16_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -565,8 +563,7 @@ define <2 x float> @load_zext_i16_v2f32(ptr %p) { define <2 x float> @load_zext_i32_v2f32(ptr %p) { ; CHECK-LABEL: load_zext_i32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -575,11 +572,16 @@ define <2 x float> @load_zext_i32_v2f32(ptr %p) { } define <1 x double> @load_zext_i8_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i8_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <1 x double> @@ -587,11 +589,16 @@ define <1 x double> @load_zext_i8_v1f64(ptr %p) { } define <1 x double> @load_zext_i16_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i16_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <1 x double> @@ -599,11 +606,16 @@ define <1 x double> @load_zext_i16_v1f64(ptr %p) { } define <1 x double> @load_zext_i32_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i32_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <1 x double> diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll index e904f4b6d247a..c761e0ec578e9 100644 --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -201,8 +201,7 @@ define void @ctpop_i32() { ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: adrp x8, :got:var32 ; CHECK-SDAG-NEXT: ldr x8, [x8, :got_lo12:var32] -; CHECK-SDAG-NEXT: ldr w9, [x8] -; CHECK-SDAG-NEXT: fmov d0, x9 +; CHECK-SDAG-NEXT: ldr s0, [x8] ; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b ; CHECK-SDAG-NEXT: addv b0, v0.8b ; CHECK-SDAG-NEXT: str s0, [x8] @@ -212,8 +211,7 @@ define void @ctpop_i32() { ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: adrp x8, :got:var32 ; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var32] -; CHECK-GISEL-NEXT: ldr w9, [x8] -; CHECK-GISEL-NEXT: fmov d0, x9 +; CHECK-GISEL-NEXT: ldr s0, [x8] ; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b ; CHECK-GISEL-NEXT: uaddlv h0, v0.8b ; CHECK-GISEL-NEXT: str s0, [x8] From 962663268aa2cb98e766326bd60da8f9b3e7b2f8 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 22 Sep 2025 07:21:14 +0100 Subject: [PATCH 2/2] Preprocess bitcast(load) --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 68 ++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 +-- llvm/test/CodeGen/AArch64/aarch64-addv.ll | 9 +- llvm/test/CodeGen/AArch64/bitcast-extend.ll | 210 +++++++++++++----- llvm/test/CodeGen/AArch64/dp1.ll | 3 +- 5 files changed, 230 insertions(+), 89 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 54bdb8750f709..b2483043136a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -60,6 +60,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return SelectionDAGISel::runOnMachineFunction(MF); } + void PreprocessISelDAG() override; + void Select(SDNode *Node) override; /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -529,6 +531,72 @@ char AArch64DAGToDAGISelLegacy::ID = 0; INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) +void AArch64DAGToDAGISel::PreprocessISelDAG() { + bool MadeChange = false; + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. + + switch (N->getOpcode()) { + case ISD::BITCAST: { + // Canonicalize bitcast(extload) or bitcast(zextload) into + // scalar_to_vector(load) or insert(zero, load), to help generate the + // canonical patterns that tablegen expects. This helps generate extending + // loads that zero the top data implicitly. + EVT VT = N->getValueType(0); + if (Subtarget->isLittleEndian() && + (/*VT == MVT::f32 || VT == MVT::f64 ||*/ VT.isVector())) { + auto *Ld = dyn_cast(N->getOperand(0)); + if (Ld && Ld->isSimple() && !Ld->isIndexed() && + (Ld->getExtensionType() == ISD::EXTLOAD || + Ld->getExtensionType() == ISD::ZEXTLOAD)) { + LLVM_DEBUG({ + dbgs() << "Found an extending load "; + Ld->dump(); + }); + + EVT MemVT = Ld->getMemoryVT(); + assert(VT.is64BitVector() || VT.is128BitVector() || VT == MVT::f32 || + VT == MVT::f64); + assert( + VT.getScalarSizeInBits() == 8 || VT.getScalarSizeInBits() == 16 || + VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64); + assert(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32); + EVT ScalarVT = MemVT.getSizeInBits() < 32 ? MVT::i32 : MemVT; + EVT ExtVT = + EVT::getVectorVT(*CurDAG->getContext(), MemVT, + VT.getSizeInBits() / MemVT.getSizeInBits()); + + SDLoc DL(N); + SDValue NewLd = + CurDAG->getExtLoad(ISD::EXTLOAD, DL, ScalarVT, Ld->getChain(), + Ld->getBasePtr(), MemVT, Ld->getMemOperand()); + SDValue Ext; + if (Ld->getExtensionType() == ISD::EXTLOAD) + Ext = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, NewLd); + else + Ext = CurDAG->getNode(ISD::INSERT_VECTOR_ELT, DL, ExtVT, + CurDAG->getConstant(0, DL, ExtVT), NewLd, + CurDAG->getConstant(0, DL, MVT::i64)); + Ext = CurDAG->getBitcast(VT, Ext); + + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Ext); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1)); + ++I; + MadeChange = true; + } + } + break; + } + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} + /// isIntImmediate - This method tests to see if the node is a constant /// operand. If so Imm will receive the 32-bit value. static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c1570ba5be7d8..6cea453f271be 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4026,27 +4026,6 @@ def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))), (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; -// Similar to the patterns above we can turn a bitcast zextload to a vector type into a FPR load. -multiclass BitcastLoad { - def : Pat <(VT (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; - def : Pat <(VT (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; - def : Pat <(VT (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))), - (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; -} -let Predicates = [IsLE] in { - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; - defm : BitcastLoad; -} - - // Pre-fetch. def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", [(AArch64Prefetch timm:$Rt, @@ -4193,13 +4172,13 @@ def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), (LDURBBi GPR64sp:$Rn, simm9:$offset)>; def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), - (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; //--- diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll index 4c168e3306500..d9180a28bd40b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -501,7 +501,8 @@ define i16 @addv_zero_lanes_v4i16(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v4i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: addv h0, v0.4h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -520,7 +521,8 @@ define i8 @addv_zero_lanes_v8i8(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v8i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: addv b0, v0.8b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret @@ -631,7 +633,8 @@ define i32 @addv_zero_lanes_v2i32(ptr %arr) { ; ; CHECK-GI-LABEL: addv_zero_lanes_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v0.2s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 1ecc945eb107c..b6b1d75207123 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -289,10 +289,16 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) { } define <8 x i8> @load_zext_i8_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i8_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -300,10 +306,16 @@ define <8 x i8> @load_zext_i8_v8i8(ptr %p) { } define <8 x i8> @load_zext_i16_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i16_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -311,10 +323,16 @@ define <8 x i8> @load_zext_i16_v8i8(ptr %p) { } define <8 x i8> @load_zext_i32_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i32_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -355,10 +373,16 @@ define <16 x i8> @load_zext_v16i8(ptr %p) { define <4 x i16> @load_zext_i8_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i8_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -366,10 +390,16 @@ define <4 x i16> @load_zext_i8_v4i16(ptr %p) { } define <4 x i16> @load_zext_i16_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i16_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -377,10 +407,16 @@ define <4 x i16> @load_zext_i16_v4i16(ptr %p) { } define <4 x i16> @load_zext_i32_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i32_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -388,10 +424,16 @@ define <4 x i16> @load_zext_i32_v4i16(ptr %p) { } define <2 x i32> @load_zext_i8_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i8_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -399,10 +441,16 @@ define <2 x i32> @load_zext_i8_v2i32(ptr %p) { } define <2 x i32> @load_zext_i16_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i16_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -410,10 +458,16 @@ define <2 x i32> @load_zext_i16_v2i32(ptr %p) { } define <2 x i32> @load_zext_i32_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i32_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -473,10 +527,16 @@ define <1 x i64> @load_zext_i32_v1i64(ptr %p) { define <4 x half> @load_zext_i8_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i8_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -484,10 +544,16 @@ define <4 x half> @load_zext_i8_v4f16(ptr %p) { } define <4 x half> @load_zext_i16_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i16_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -495,10 +561,16 @@ define <4 x half> @load_zext_i16_v4f16(ptr %p) { } define <4 x half> @load_zext_i32_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i32_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -539,10 +611,16 @@ define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) { } define <2 x float> @load_zext_i8_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i8_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <2 x float> @@ -550,10 +628,16 @@ define <2 x float> @load_zext_i8_v2f32(ptr %p) { } define <2 x float> @load_zext_i16_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i16_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <2 x float> @@ -561,10 +645,16 @@ define <2 x float> @load_zext_i16_v2f32(ptr %p) { } define <2 x float> @load_zext_i32_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i32_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <2 x float> diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll index c761e0ec578e9..e52fec96d51e3 100644 --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -211,7 +211,8 @@ define void @ctpop_i32() { ; CHECK-GISEL: // %bb.0: ; CHECK-GISEL-NEXT: adrp x8, :got:var32 ; CHECK-GISEL-NEXT: ldr x8, [x8, :got_lo12:var32] -; CHECK-GISEL-NEXT: ldr s0, [x8] +; CHECK-GISEL-NEXT: ldr w9, [x8] +; CHECK-GISEL-NEXT: fmov d0, x9 ; CHECK-GISEL-NEXT: cnt v0.8b, v0.8b ; CHECK-GISEL-NEXT: uaddlv h0, v0.8b ; CHECK-GISEL-NEXT: str s0, [x8]