diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 779ec49f4d13a..e8a5c6fedc395 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1199,6 +1199,18 @@ bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI, if (!VecEltTy.isByteSized()) return false; + // Check for load fold barriers between the extraction and the load. + if (MI.getParent() != LoadMI->getParent()) + return false; + const unsigned MaxIter = 20; + unsigned Iter = 0; + for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) { + if (II->isLoadFoldBarrier()) + return false; + if (Iter++ == MaxIter) + return false; + } + // Check if the new load that we are going to create is legal // if we are in the post-legalization phase. MachineMemOperand MMO = LoadMI->getMMO(); diff --git a/llvm/test/CodeGen/AArch64/extractvector-of-load.mir b/llvm/test/CodeGen/AArch64/extractvector-of-load.mir new file mode 100644 index 0000000000000..43051232b436d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extractvector-of-load.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: f +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: f + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>)) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %3:_(s32) = G_CONSTANT i32 0 + %2:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %3(s32), %3(s32), %3(s32) + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) + G_STORE %2(<4 x s32>), %0(p0) :: (store (<4 x s32>)) + %4:_(s32) = G_EXTRACT_VECTOR_ELT %1(<2 x s32>), %5(s64) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +...