diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 4e2a5c78e0ac8..1275d53a075b5 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1812,6 +1812,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { // erased in the correct order. Worklist.push(LI); + Type *ElemType = VecTy->getElementType(); + // Replace extracts with narrow scalar loads. for (User *U : LI->users()) { auto *EI = cast(U); @@ -1825,13 +1827,19 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { Builder.SetInsertPoint(EI); Value *GEP = Builder.CreateInBoundsGEP(VecTy, Ptr, {Builder.getInt32(0), Idx}); - auto *NewLoad = cast(Builder.CreateLoad( - VecTy->getElementType(), GEP, EI->getName() + ".scalar")); + auto *NewLoad = cast( + Builder.CreateLoad(ElemType, GEP, EI->getName() + ".scalar")); - Align ScalarOpAlignment = computeAlignmentAfterScalarization( - LI->getAlign(), VecTy->getElementType(), Idx, *DL); + Align ScalarOpAlignment = + computeAlignmentAfterScalarization(LI->getAlign(), ElemType, Idx, *DL); NewLoad->setAlignment(ScalarOpAlignment); + if (auto *ConstIdx = dyn_cast(Idx)) { + size_t Offset = ConstIdx->getZExtValue() * DL->getTypeStoreSize(ElemType); + AAMDNodes OldAAMD = LI->getAAMetadata(); + NewLoad->setAAMetadata(OldAAMD.adjustForAccess(Offset, ElemType, *DL)); + } + replaceValue(*EI, *NewLoad, false); } diff --git a/llvm/test/Transforms/VectorCombine/alias.ll b/llvm/test/Transforms/VectorCombine/alias.ll new file mode 100644 index 0000000000000..459956cd997d8 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/alias.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=vector-combine -S | FileCheck %s --check-prefixes=CHECK + +define <4 x i32> @quux(ptr addrspace(3) %arg) { +; CHECK-LABEL: define <4 x i32> @quux( +; CHECK-SAME: ptr addrspace(3) [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = load i8, ptr addrspace(3) [[ARG]], align 4, !tbaa [[TBAA0:![0-9]+]], !alias.scope [[META0:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 1 +; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = load i8, ptr addrspace(3) [[TMP0]], align 1, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 2 +; CHECK-NEXT: [[EXTRACTELEMENT2:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 2, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 3 +; CHECK-NEXT: [[EXTRACTELEMENT3:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[EXTRACTELEMENT]] to i32 +; CHECK-NEXT: [[ZEXT4:%.*]] = zext i8 [[EXTRACTELEMENT1]] to i32 +; CHECK-NEXT: [[ZEXT5:%.*]] = zext i8 [[EXTRACTELEMENT2]] to i32 +; CHECK-NEXT: [[ZEXT6:%.*]] = zext i8 [[EXTRACTELEMENT3]] to i32 +; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <4 x i32> poison, i32 [[ZEXT]], i64 0 +; CHECK-NEXT: [[INSERTELEMENT7:%.*]] = insertelement <4 x i32> [[INSERTELEMENT]], i32 [[ZEXT4]], i64 1 +; CHECK-NEXT: [[INSERTELEMENT8:%.*]] = insertelement <4 x i32> [[INSERTELEMENT7]], i32 [[ZEXT5]], i64 2 +; CHECK-NEXT: [[INSERTELEMENT9:%.*]] = insertelement <4 x i32> [[INSERTELEMENT8]], i32 [[ZEXT6]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[INSERTELEMENT9]] +; +bb: + %load = load <4 x i8>, ptr addrspace(3) %arg, align 4, !alias.scope !0, !noalias !0, !tbaa !5 + %extractelement = extractelement <4 x i8> %load, i64 0 + %extractelement1 = extractelement <4 x i8> %load, i64 1 + %extractelement2 = extractelement <4 x i8> %load, i64 2 + %extractelement3 = extractelement <4 x i8> %load, i64 3 + %zext = zext i8 %extractelement to i32 + %zext4 = zext i8 %extractelement1 to i32 + %zext5 = zext i8 %extractelement2 to i32 + %zext6 = zext i8 %extractelement3 to i32 + %insertelement = insertelement <4 x i32> poison, i32 %zext, i64 0 + %insertelement7 = insertelement <4 x i32> %insertelement, i32 %zext4, i64 1 + %insertelement8 = insertelement <4 x i32> %insertelement7, i32 %zext5, i64 2 + %insertelement9 = insertelement <4 x i32> %insertelement8, i32 %zext6, i64 3 + ret <4 x i32> %insertelement9 +} + +!0 = !{!1} +!1 = distinct !{!1, !2} +!2 = distinct !{!2} +!3 = !{!"Simple C/C++ TBAA"} +!4 = !{!"omnipotent char", !3, i64 0} +!5 = !{!"i8", !4, i64 0} +;. +; CHECK: [[TBAA0]] = !{[[META3:![0-9]+]], [[META3]], i64 0, i64 0} +; CHECK: [[META3]] = !{!"i8", [[META4:![0-9]+]]} +; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +; CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} +; CHECK: [[META2]] = distinct !{[[META2]]} +;. \ No newline at end of file