diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 043892c7990740..6e1f157a23f314 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1501,9 +1501,12 @@ std::optional Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB, if (DistScev != SE.getCouldNotCompute()) { LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n"); ConstantRange DistRange = SE.getSignedRange(DistScev); - if (DistRange.isSingleElement()) - return (OffsetB - OffsetA + *DistRange.getSingleElement()) - .sextOrTrunc(OrigBitWidth); + if (DistRange.isSingleElement()) { + // Handle index width (the width of Dist) != pointer width (the width of + // the Offset*s at this point). + APInt Dist = DistRange.getSingleElement()->sextOrTrunc(NewPtrBitWidth); + return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth); + } } std::optional Diff = getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/addrspace-7.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/addrspace-7.ll index 8e4fd575345625..9a57237916994b 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/addrspace-7.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/addrspace-7.ll @@ -1,10 +1,18 @@ -; REQUIRES: asserts -; RUN: not --crash opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s -; RUN: not --crash opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" define { float, float } @f() { +; CHECK-LABEL: define { float, float } @f() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(7) null, align 4 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr addrspace(7) getelementptr (i8, ptr addrspace(7) null, i64 24), align 4 +; CHECK-NEXT: [[IV1:%.*]] = insertvalue { float, float } zeroinitializer, float [[L1]], 0 +; CHECK-NEXT: [[IV2:%.*]] = insertvalue { float, float } [[IV1]], float [[L2]], 1 +; CHECK-NEXT: ret { float, float } [[IV2]] +; bb: %l1 = load float, ptr addrspace(7) null %l2 = load float, ptr addrspace(7) getelementptr (i8, ptr addrspace(7) null, i64 24)