diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b2e173dec9fe1a..bd4fd84441d891 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -288,16 +288,20 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) { // If the mask is all ones or undefs, this is a plain vector load of the 1st // argument. - if (maskIsAllOneOrUndef(II.getArgOperand(2))) - return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, - "unmaskedload"); + if (maskIsAllOneOrUndef(II.getArgOperand(2))) { + LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, + "unmaskedload"); + L->copyMetadata(II); + return L; + } // If we can unconditionally load from this address, replace with a // load/select idiom. TODO: use DT for context sensitive query if (isDereferenceablePointer(LoadPtr, II.getType(), II.getModule()->getDataLayout(), &II, nullptr)) { - Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, - "unmaskedload"); + LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, + "unmaskedload"); + LI->copyMetadata(II); return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); } @@ -320,7 +324,10 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { if (ConstMask->isAllOnesValue()) { Value *StorePtr = II.getArgOperand(1); Align Alignment = cast(II.getArgOperand(2))->getAlignValue(); - return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); + StoreInst *S = + new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment); + S->copyMetadata(II); + return S; } if (isa(ConstMask->getType())) diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll new file mode 100644 index 00000000000000..d2ab4e8cc59108 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics_keep_metadata.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +@g0 = global <4 x i32> zeroinitializer, align 16 + +define inreg <4 x i32> @mload1(<4 x i32>* nocapture readonly %a0) #0 { +; CHECK-LABEL: @mload1( +; CHECK-NEXT: b0: +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, <4 x i32>* [[A0:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: ret <4 x i32> [[UNMASKEDLOAD]] +; +b0: + %v0 = call <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>* %a0, i32 16, <4 x i1> , <4 x i32> undef), !tbaa !0 + ret <4 x i32> %v0 +} + +define inreg <4 x i32> @mload2() #0 { +; CHECK-LABEL: @mload2( +; CHECK-NEXT: b0: +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <4 x i32>, <4 x i32>* @g0, align 16, !tbaa [[TBAA0]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> [[UNMASKEDLOAD]], i32 0, i32 0 +; CHECK-NEXT: ret <4 x i32> [[TMP0]] +; +b0: + %v0 = call <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>* @g0, i32 16, <4 x i1> , <4 x i32> zeroinitializer), !tbaa !0 + ret <4 x i32> %v0 +} + +define void @mstore(<4 x i32> %a0, <4 x i32>* nocapture readonly %a1) #0 { +; CHECK-LABEL: @mstore( +; CHECK-NEXT: b0: +; CHECK-NEXT: store <4 x i32> [[A0:%.*]], <4 x i32>* [[A1:%.*]], align 16, !tbaa [[TBAA0]] +; CHECK-NEXT: ret void +; +b0: + call void @llvm.masked.store.v4i1.p0v4i1(<4 x i32> %a0, <4 x i32>* %a1, i32 16, <4 x i1> ), !tbaa !0 + ret void +} + +attributes #0 = { norecurse nounwind } + +declare <4 x i32> @llvm.masked.load.v4i1.p0v4i1(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v4i1.p0v4i1(<4 x i32>, <4 x i32>*, i32, <4 x i1>) + +!0 = !{!1, !1, i64 0} +!1 = !{!"omnipotent char", !2, i64 0} +!2 = !{!"Simple C/C++ TBAA"} +