diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 979ba31b0431b..fc00883528dc2 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3613,6 +3613,10 @@ def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A)), (v4i32 (VSPLTISW imm:$A))>; +// Optimize for vector of 1s addition operation +def : Pat<(add v4i32:$A, (build_vector (i32 1), (i32 1), (i32 1), (i32 1))), + (VSUBUWM $A, (v4i32 (COPY_TO_REGCLASS (XXLEQVOnes), VSRC)))>; + // Splat loads. def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)), (v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>; diff --git a/llvm/test/CodeGen/PowerPC/vector-all-ones.ll b/llvm/test/CodeGen/PowerPC/vector-all-ones.ll index e4c93adcf50a6..49c46d8eff726 100644 --- a/llvm/test/CodeGen/PowerPC/vector-all-ones.ll +++ b/llvm/test/CodeGen/PowerPC/vector-all-ones.ll @@ -8,14 +8,12 @@ ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s -; Currently the generated code uses `vspltisw` to generate vector of 1s followed by add operation. -; This pattern is expected to be optimized in a future patch by using `xxleqv` to generate vector of -1s -; followed by subtraction operation. +; Optimized version of vector addition with {1,1,1,1} by replacing `vspltisw + vadduwm` with 'xxleqv + vsubuwm' define dso_local noundef <4 x i32> @test1(<4 x i32> %a) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vspltisw v3, 1 -; CHECK-NEXT: vadduwm v2, v2, v3 +; CHECK-NEXT: xxleqv v3, v3, v3 +; CHECK-NEXT: vsubuwm v2, v2, v3 ; CHECK-NEXT: blr entry: %add = add <4 x i32> %a, splat (i32 1)