diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll new file mode 100644 index 00000000000000..03d88b1757dee7 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -tbaa -dse -S < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + +define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { +; CHECK-LABEL: @f0( +; CHECK-NEXT: b0: +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds i8*, i8** [[A0:%.*]], i32 [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[V0]], align 4, [[TBAA0:!tbaa !.*]] +; CHECK-NEXT: [[V2:%.*]] = getelementptr i8, i8* [[V1]], i32 [[A3:%.*]] +; CHECK-NEXT: [[V3:%.*]] = bitcast i8* [[V2]] to <128 x i8>* +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3:!tbaa !.*]] +; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds i8*, i8** [[A1:%.*]], i32 [[A4:%.*]] +; CHECK-NEXT: [[V7:%.*]] = load i8*, i8** [[V6]], align 4, [[TBAA6:!tbaa !.*]] +; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, i8* [[V7]], i32 [[A5:%.*]] +; CHECK-NEXT: [[V9:%.*]] = bitcast i8* [[V8]] to <128 x i8>* +; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V9]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8:!tbaa !.*]] +; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V14]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] +; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds i8*, i8** [[A1]], i32 [[A6:%.*]] +; CHECK-NEXT: [[V18:%.*]] = load i8*, i8** [[V17]], align 4, [[TBAA6]] +; CHECK-NEXT: [[V19:%.*]] = getelementptr i8, i8* [[V18]], i32 [[A7:%.*]] +; CHECK-NEXT: [[V20:%.*]] = bitcast i8* [[V19]] to <128 x i8>* +; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V20]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA8]] +; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> undef, <32 x i32> +; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] +; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] +; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V25]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA3]] +; CHECK-NEXT: ret i32 0 +; +b0: + %v0 = getelementptr inbounds i8*, i8** %a0, i32 %a2 + %v1 = load i8*, i8** %v0, align 4, !tbaa !0 + %v2 = getelementptr i8, i8* %v1, i32 %a3 + %v3 = bitcast i8* %v2 to <128 x i8>* + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v6 = getelementptr inbounds i8*, i8** %a1, i32 %a4 + %v7 = load i8*, i8** %v6, align 4, !tbaa !6 + %v8 = getelementptr i8, i8* %v7, i32 %a5 + %v9 = bitcast i8* %v8 to <128 x i8>* + %v10 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v9, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v11 = shufflevector <128 x i8> %v10, <128 x i8> undef, <32 x i32> + %v14 = shufflevector <32 x i8> %v11, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v14, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v16 = shufflevector <128 x i8> %v14, <128 x i8> undef, <32 x i32> + %v17 = getelementptr inbounds i8*, i8** %a1, i32 %a6 + %v18 = load i8*, i8** %v17, align 4, !tbaa !6 + %v19 = getelementptr i8, i8* %v18, i32 %a7 + %v20 = bitcast i8* %v19 to <128 x i8>* + %v21 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v20, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v22 = shufflevector <128 x i8> %v21, <128 x i8> undef, <32 x i32> + %v23 = icmp ugt <32 x i8> %v16, %v22 + %v24 = select <32 x i1> %v23, <32 x i8> %v16, <32 x i8> %v22 + %v25 = shufflevector <32 x i8> %v24, <32 x i8> undef, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v25, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + ret i32 0 +} + +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1 +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) #2 + +attributes #0 = { nounwind willreturn } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind readonly willreturn } + +!0 = !{!1, !1, i64 0} +!1 = !{!"0x2cf74d0", !2, i64 0} +!2 = !{!"tvm-tbaa"} +!3 = !{!4, !4, i64 0} +!4 = !{!"i8", !5, i64 0} +!5 = !{!"0x2c6ebb0", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"0x2cff870", !2, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"i8", !10, i64 0} +!10 = !{!"0x2c6c3c0", !2, i64 0}