diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 95c4fafba8640..a65fafc4d2db1 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -240,8 +240,10 @@ bool InterleavedAccess::lowerInterleavedLoad( continue; } if (auto *BI = dyn_cast(User)) { - if (all_of(BI->users(), - [](auto *U) { return isa(U); })) { + if (all_of(BI->users(), [](auto *U) { + auto *SVI = dyn_cast(U); + return SVI && isa(SVI->getOperand(1)); + })) { for (auto *SVI : BI->users()) BinOpShuffles.insert(cast(SVI)); continue; diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll index 2d113c2e6f046..67910305f56d6 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll @@ -8,12 +8,12 @@ define <4 x float> @vld2(ptr %pSrc) { ; CHECK-LABEL: @vld2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PSRC:%.*]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; @@ -31,16 +31,16 @@ define <4 x float> @vld3(ptr %pSrc) { ; CHECK-LABEL: @vld3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr [[PSRC:%.*]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] -; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; @@ -61,19 +61,19 @@ define <4 x float> @vld4(ptr %pSrc) { ; CHECK-LABEL: @vld4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr [[PSRC:%.*]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] -; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; @@ -96,13 +96,13 @@ define <4 x float> @twosrc(ptr %pSrc1, ptr %pSrc2) { ; CHECK-LABEL: @twosrc( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PSRC1:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PSRC2:%.*]]) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 -; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP0]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -121,13 +121,13 @@ define <4 x float> @twosrc2(ptr %pSrc1, ptr %pSrc2) { ; CHECK-LABEL: @twosrc2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PSRC1:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PSRC2:%.*]]) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 -; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP0]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -142,3 +142,81 @@ entry: %l8 = fadd fast <4 x float> %l6, %l5 ret <4 x float> %l8 } + +define void @noncanonical(ptr %p0, ptr %p1, ptr %p2) { +; CHECK-LABEL: @noncanonical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 8 +; CHECK-NEXT: [[V1:%.*]] = add <8 x i8> [[V0]], +; CHECK-NEXT: [[V2:%.*]] = load <8 x i8>, ptr [[P1:%.*]], align 8 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <8 x i8> [[V2]], <8 x i8> [[V1]], <4 x i32> +; CHECK-NEXT: store <4 x i8> [[SHUFFLED]], ptr [[P2:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %v0 = load <8 x i8>, ptr %p0 + %v1 = add <8 x i8> %v0, + %v2 = load <8 x i8>, ptr %p1 + %shuffled = shufflevector <8 x i8> %v2, <8 x i8> %v1, <4 x i32> + store <4 x i8> %shuffled, ptr %p2 + ret void +} + +define void @noncanonical2(ptr %p0, ptr %p1, ptr %p2) { +; CHECK-LABEL: @noncanonical2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 8 +; CHECK-NEXT: [[V1:%.*]] = load <8 x i8>, ptr [[P1:%.*]], align 8 +; CHECK-NEXT: [[V2:%.*]] = add <8 x i8> [[V0]], [[V1]] +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <8 x i8> undef, <8 x i8> [[V2]], <4 x i32> +; CHECK-NEXT: store <4 x i8> [[SHUFFLED]], ptr [[P2:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %v0 = load <8 x i8>, ptr %p0 + %v1 = load <8 x i8>, ptr %p1 + %v2 = add <8 x i8> %v0, %v1 + %shuffled = shufflevector <8 x i8> undef, <8 x i8> %v2, <4 x i32> + store <4 x i8> %shuffled, ptr %p2 + ret void +} + +define <4 x float> @noncanonical3(ptr %pSrc) { +; CHECK-LABEL: @noncanonical3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[PSRC:%.*]], align 4 +; CHECK-NEXT: [[L2:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]] +; CHECK-NEXT: [[L3:%.*]] = shufflevector <8 x float> undef, <8 x float> [[L2]], <4 x i32> +; CHECK-NEXT: [[L4:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[WIDE_VEC]] +; CHECK-NEXT: [[L5:%.*]] = shufflevector <8 x float> [[L4]], <8 x float> undef, <4 x i32> +; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L5]], [[L3]] +; CHECK-NEXT: ret <4 x float> [[L6]] +; +entry: + %wide.vec = load <8 x float>, ptr %pSrc, align 4 + %l2 = fmul fast <8 x float> %wide.vec, %wide.vec + %l3 = shufflevector <8 x float> undef, <8 x float> %l2, <4 x i32> + %l4 = fmul fast <8 x float> %wide.vec, %wide.vec + %l5 = shufflevector <8 x float> %l4, <8 x float> undef, <4 x i32> + %l6 = fadd fast <4 x float> %l5, %l3 + ret <4 x float> %l6 +} + +define void @noncanonical_extmask(ptr %p0, ptr %p1, ptr %p2) { +; CHECK-LABEL: @noncanonical_extmask( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 8 +; CHECK-NEXT: [[V1:%.*]] = add <8 x i8> [[V0]], +; CHECK-NEXT: [[V2:%.*]] = load <8 x i8>, ptr [[P1:%.*]], align 8 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <8 x i8> [[V2]], <8 x i8> [[V1]], <8 x i32> +; CHECK-NEXT: store <8 x i8> [[SHUFFLED]], ptr [[P2:%.*]], align 8 +; CHECK-NEXT: ret void +; +entry: + %v0 = load <8 x i8>, ptr %p0 + %v1 = add <8 x i8> %v0, + %v2 = load <8 x i8>, ptr %p1 + %shuffled = shufflevector <8 x i8> %v2, <8 x i8> %v1, <8 x i32> + store <8 x i8> %shuffled, ptr %p2 + ret void +}