Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DSE] Merge stores when the later store only writes to memory locatio…
…ns the early store also wrote to (2nd try) This is a 2nd attempt at: https://reviews.llvm.org/rL310055 ...which was reverted at rL310123 because of PR34074: https://bugs.llvm.org/show_bug.cgi?id=34074 In this version, we break out of the inner loop after we successfully merge and kill a pair of stores. In the earlier rev, we were continuing instead, which meant we could process the invalid info from a now dead store. Original commit message (authored by Filipe Cabecinhas): This fixes PR31777. If both stores' values are ConstantInt, we merge the two stores (shifting the smaller store appropriately) and replace the earlier (and larger) store with an updated constant. In the future we should also support vectors of integers. And maybe float/double if we can. Differential Revision: https://reviews.llvm.org/D30703 llvm-svn: 314206
- Loading branch information
1 parent
f47c4b4
commit 1d04b5b
Showing
5 changed files
with
494 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
llvm/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
173 changes: 173 additions & 0 deletions
173
llvm/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s | ||
target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128" | ||
|
||
define void @byte_by_byte_replacement(i32 *%ptr) { | ||
; CHECK-LABEL: @byte_by_byte_replacement( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: store i32 151653132, i32* [[PTR:%.*]] | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
;; This store's value should be modified as it should be better to use one | ||
;; larger store than several smaller ones. | ||
;; store will turn into 0x090A0B0C == 151653132 | ||
store i32 305419896, i32* %ptr ; 0x12345678 | ||
%bptr = bitcast i32* %ptr to i8* | ||
%bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1 | ||
%bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2 | ||
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 | ||
|
||
;; We should be able to merge these four stores with the i32 above | ||
; value (and bytes) stored before ; 0x12345678 | ||
store i8 9, i8* %bptr ; 09 | ||
store i8 10, i8* %bptr1 ; 0A | ||
store i8 11, i8* %bptr2 ; 0B | ||
store i8 12, i8* %bptr3 ; 0C | ||
; 0x090A0B0C | ||
|
||
ret void | ||
} | ||
|
||
define void @word_replacement(i64 *%ptr) { | ||
; CHECK-LABEL: @word_replacement( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: store i64 72638273700655232, i64* [[PTR:%.*]] | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
store i64 72623859790382856, i64* %ptr ; 0x0102030405060708 | ||
|
||
%wptr = bitcast i64* %ptr to i16* | ||
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 | ||
%wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 | ||
%wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3 | ||
|
||
;; We should be able to merge these two stores with the i64 one above | ||
; value (and bytes) stored before ; 0x0102030405060708 | ||
store i16 4128, i16* %wptr1 ; 1020 | ||
store i16 28800, i16* %wptr3 ; 7080 | ||
; 0x0102102005067080 | ||
|
||
ret void | ||
} | ||
|
||
|
||
define void @differently_sized_replacements(i64 *%ptr) { | ||
; CHECK-LABEL: @differently_sized_replacements( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: store i64 289077004501059343, i64* [[PTR:%.*]] | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f | ||
|
||
%bptr = bitcast i64* %ptr to i8* | ||
%bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6 | ||
%wptr = bitcast i64* %ptr to i16* | ||
%wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2 | ||
%dptr = bitcast i64* %ptr to i32* | ||
|
||
;; We should be able to merge all these stores with the i64 one above | ||
; value (and bytes) stored before ; 0x08090a0b0c0d0e0f | ||
store i8 7, i8* %bptr6 ; 07 | ||
store i16 1541, i16* %wptr2 ; 0605 | ||
store i32 67305985, i32* %dptr ; 04030201 | ||
; 0x040302010605070f | ||
ret void | ||
} | ||
|
||
|
||
define void @multiple_replacements_to_same_byte(i64 *%ptr) { | ||
; CHECK-LABEL: @multiple_replacements_to_same_byte( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: store i64 289077004602248719, i64* [[PTR:%.*]] | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f | ||
|
||
%bptr = bitcast i64* %ptr to i8* | ||
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 | ||
%wptr = bitcast i64* %ptr to i16* | ||
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 | ||
%dptr = bitcast i64* %ptr to i32* | ||
|
||
;; We should be able to merge all these stores with the i64 one above | ||
; value (and bytes) stored before ; 0x08090a0b0c0d0e0f | ||
store i8 7, i8* %bptr3 ; 07 | ||
store i16 1541, i16* %wptr1 ; 0605 | ||
store i32 67305985, i32* %dptr ; 04030201 | ||
; 0x040302010c0d0e0f | ||
ret void | ||
} | ||
|
||
define void @merged_merges(i64 *%ptr) { | ||
; CHECK-LABEL: @merged_merges( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: store i64 289081428418563599, i64* [[PTR:%.*]] | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f | ||
|
||
%bptr = bitcast i64* %ptr to i8* | ||
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 | ||
%wptr = bitcast i64* %ptr to i16* | ||
%wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1 | ||
%dptr = bitcast i64* %ptr to i32* | ||
|
||
;; We should be able to merge all these stores with the i64 one above | ||
; value (not bytes) stored before ; 0x08090a0b0c0d0e0f | ||
store i32 67305985, i32* %dptr ; 04030201 | ||
store i16 1541, i16* %wptr1 ; 0605 | ||
store i8 7, i8* %bptr3 ; 07 | ||
; 0x040306070c0d0e0f | ||
ret void | ||
} | ||
|
||
define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) { | ||
; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8* | ||
; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1 | ||
; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 | ||
; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32* | ||
; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64* | ||
; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1 | ||
; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1 | ||
; CHECK-NEXT: ret i8 0 | ||
; | ||
entry: | ||
|
||
store i64 0, i64* %ptr | ||
|
||
%bptr = bitcast i64* %ptr to i8* | ||
%bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1 | ||
%bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3 | ||
%dptr = bitcast i8* %bptrm1 to i32* | ||
%qptr = bitcast i8* %bptr3 to i64* | ||
|
||
store i32 1234, i32* %dptr, align 1 | ||
store i64 5678, i64* %qptr, align 1 | ||
|
||
ret i8 0 | ||
} | ||
|
||
;; Test case from PR31777 | ||
%union.U = type { i64 } | ||
|
||
define void @foo(%union.U* nocapture %u) { | ||
; CHECK-LABEL: @foo( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0 | ||
; CHECK-NEXT: store i64 11821949021847552, i64* [[I]], align 8 | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
%i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0 | ||
store i64 0, i64* %i, align 8 | ||
%s = bitcast %union.U* %u to i16* | ||
store i16 42, i16* %s, align 8 | ||
ret void | ||
} |
Oops, something went wrong.