diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 6d24c407eb5f4..c28314f6ab124 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -953,15 +953,15 @@ bool Vectorizer::vectorizeChain(Chain &C) { unsigned EOffset = (E.OffsetFromLeader - C[0].OffsetFromLeader).getZExtValue(); unsigned VecIdx = 8 * EOffset / DL.getTypeSizeInBits(VecElemTy); - if (auto *VT = dyn_cast(T)) { + if (!VecTy->isVectorTy()) { + V = VecInst; + } else if (auto *VT = dyn_cast(T)) { auto Mask = llvm::to_vector<8>( llvm::seq(VecIdx, VecIdx + VT->getNumElements())); V = Builder.CreateShuffleVector(VecInst, Mask, I->getName()); - } else if (VecTy != VecElemTy) { + } else { V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx), I->getName()); - } else { - V = VecInst; } if (V->getType() != I->getType()) V = Builder.CreateBitOrPointerCast(V, I->getType()); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-loads.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-loads.ll index 55b511fd51a2b..802795da47894 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-loads.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-loads.ll @@ -1,6 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s +define void @onevec(ptr %ptr) { +; CHECK-LABEL: define void @onevec( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x i32> +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32> +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to <1 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP5]] to <1 x i32> +; CHECK-NEXT: ret void +; + %ld0 = load <1 x i32>, ptr %ptr, align 4 + %ld1 = load i32, ptr %ptr, align 4 + + %gep1 = getelementptr inbounds i8, ptr %ptr, i32 16 + %ld2 = load i32, ptr %gep1, align 4 + %ld3 = load <1 x i32>, ptr %gep1, align 4 + + %gep2 = getelementptr inbounds i8, ptr %ptr, i32 32 + %ld4 = load <1 x i32>, ptr %gep2, align 4 + %ld5 = load <1 x i32>, ptr %gep2, align 4 + ret void +} + define void @test(ptr %ptr) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ptr [[PTR:%.*]]) {