diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 4de2483169ae9..7e4651627d65e 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -192,7 +192,7 @@ class AlignVectors { private: using InstList = std::vector; - using InstMap = DenseMap; + using InstMap = DenseMap; struct AddrInfo { AddrInfo(const AddrInfo &) = default; @@ -299,9 +299,11 @@ class AlignVectors { Value *getPassThrough(Value *Val) const; Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy, - int Adjust) const; + int Adjust, + const InstMap &CloneMap = InstMap()) const; Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy, - int Alignment) const; + int Alignment, + const InstMap &CloneMap = InstMap()) const; Value *createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr, Value *Predicate, int Alignment, Value *Mask, @@ -662,8 +664,17 @@ auto AlignVectors::getPassThrough(Value *Val) const -> Value * { } auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, - Type *ValTy, int Adjust) const + Type *ValTy, int Adjust, + const InstMap &CloneMap) const -> Value * { + auto remap = [&](Value *V) -> Value * { + if (auto *I = dyn_cast(V)) { + for (auto [Old, New] : CloneMap) + I->replaceUsesOfWith(Old, New); + return I; + } + return V; + }; // The adjustment is in bytes, but if it's a multiple of the type size, // we don't need to do pointer casts. auto *PtrTy = cast(Ptr->getType()); @@ -673,23 +684,33 @@ auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, if (Adjust % ElemSize == 0 && Adjust != 0) { Value *Tmp0 = Builder.CreateGEP( ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize), "gep"); - return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo(), "cst"); + return Builder.CreatePointerCast(remap(Tmp0), ValTy->getPointerTo(), + "cst"); } } PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext()); Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy, "cst"); - Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0, - HVC.getConstInt(Adjust), "gep"); - return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo(), "cst"); + Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), + remap(Tmp0), HVC.getConstInt(Adjust), "gep"); + return Builder.CreatePointerCast(remap(Tmp1), ValTy->getPointerTo(), "cst"); } auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, - Type *ValTy, int Alignment) const + Type *ValTy, int Alignment, + const InstMap &CloneMap) const -> Value * { + auto remap = [&](Value *V) -> Value * { + if (auto *I = dyn_cast(V)) { + for (auto [Old, New] : CloneMap) + I->replaceUsesOfWith(Old, New); + return I; + } + return V; + }; Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti"); Value *Mask = HVC.getConstInt(-Alignment); - Value *And = Builder.CreateAnd(AsInt, Mask, "add"); + Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and"); return Builder.CreateIntToPtr(And, ValTy->getPointerTo(), "itp"); } @@ -1028,7 +1049,7 @@ auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool { for (Instruction *M : Main) { if (M != Where) M->moveAfter(Where); - for (auto [Old, New]: Move.Clones) + for (auto [Old, New] : Move.Clones) M->replaceUsesOfWith(Old, New); Where = M; } @@ -1431,7 +1452,7 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { // of potential bitcasts to i8*. int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value()); AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr, - WithMaxAlign.ValTy, Adjust); + WithMaxAlign.ValTy, Adjust, Move.Clones); int Diff = Start - (OffAtMax + Adjust); AlignVal = HVC.getConstInt(Diff); assert(Diff >= 0); @@ -1444,10 +1465,15 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { // the alignment amount. // Do an explicit down-alignment of the address to avoid creating an // aligned instruction with an address that is not really aligned. - AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr, - WithMinOffset.ValTy, MinNeeded.value()); + AlignAddr = + createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy, + MinNeeded.value(), Move.Clones); AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti"); + if (auto *I = dyn_cast(AlignVal)) { + for (auto [Old, New] : Move.Clones) + I->replaceUsesOfWith(Old, New); + } } ByteSpan VSpan; @@ -1456,16 +1482,6 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { AI.Offset - WithMinOffset.Offset); } - // Update the AlignAddr/AlignVal to use cloned dependencies. - if (auto *I = dyn_cast(AlignAddr)) { - for (auto [Old, New] : Move.Clones) - I->replaceUsesOfWith(Old, New); - } - if (auto *I = dyn_cast(AlignVal)) { - for (auto [Old, New] : Move.Clones) - I->replaceUsesOfWith(Old, New); - } - // The aligned loads/stores will use blocks that are either scalars, // or HVX vectors. Let "sector" be the unified term for such a block. // blend(scalar, vector) -> sector... diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move3.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move3.ll new file mode 100644 index 0000000000000..cd075c9a9953b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-bad-move3.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this doesn't crash. +; CHECK: vmem + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dso_local void @f0(<64 x i16> %a0, <64 x i16> %a1, ptr %a2, ptr %a3) local_unnamed_addr #0 { +b0: + %v8 = getelementptr inbounds i16, ptr %a2, i32 -1 + %v9 = load i16, ptr %v8, align 2 + %v10 = sext i16 %v9 to i32 + %v145 = xor i32 0, -1 + %tt0 = getelementptr inbounds i16, ptr %a2, i32 1 + %v469 = load i16, ptr %tt0, align 2 + %v533 = insertelement <64 x i16> %a0, i16 %v469, i64 63 + %tt1 = getelementptr inbounds i16, ptr %a2, i32 3 + %v597 = load i16, ptr %tt1, align 2 + %v661 = insertelement <64 x i16> %a1, i16 %v597, i64 63 + %v662 = sext <64 x i16> %v533 to <64 x i32> + %v663 = sext <64 x i16> %v661 to <64 x i32> + %v1837 = getelementptr inbounds i16, ptr %a2, i32 %v145 + %v1838 = getelementptr inbounds i16, ptr %v1837, i32 -63 + %v1839 = load <64 x i16>, ptr %v1838, align 2 + %v1840 = shufflevector <64 x i16> %v1839, <64 x i16> poison, <64 x i32> + %v1841 = getelementptr inbounds i16, ptr %v1837, i32 -127 + %v1842 = load <64 x i16>, ptr %v1841, align 2 + %v1843 = shufflevector <64 x i16> %v1842, <64 x i16> poison, <64 x i32> + %v1844 = sext <64 x i16> %v1840 to <64 x i32> + %v1845 = sext <64 x i16> %v1843 to <64 x i32> + %v1846 = mul nsw <64 x i32> %v1844, %v662 + %v1847 = mul nsw <64 x i32> %v1845, %v663 + %v1848 = add <64 x i32> %v1846, zeroinitializer + %v1849 = add <64 x i32> %v1847, zeroinitializer + %v1930 = add <64 x i32> %v1849, %v1848 + %v1932 = add <64 x i32> %v1930, zeroinitializer + %v1934 = add <64 x i32> %v1932, zeroinitializer + %v1936 = add <64 x i32> %v1934, zeroinitializer + %v1938 = add <64 x i32> %v1936, zeroinitializer + %v1940 = add <64 x i32> %v1938, zeroinitializer + %v1942 = add <64 x i32> %v1940, zeroinitializer + %v1943 = extractelement <64 x i32> %v1942, i32 0 + %v2515 = load i16, ptr %a3, align 2 + %v2516 = sext i16 %v2515 to i32 + %v2524 = mul nsw i32 %v10, %v2516 + %v2525 = sub nsw i32 %v1943, %v2524 + %v2527 = add nsw i32 %v2525, 0 + %v2572 = getelementptr inbounds i32, ptr %a3, i32 0 + store i32 %v2527, ptr %v2572, align 4 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 + +attributes #0 = { "target-features"="+hvx-ieee-fp,+hvx-length128b,+hvxv73,-long-calls" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"frame-pointer", i32 2}