Skip to content

Commit

Permalink
[PGO][PGSO] Enable size optimizations in code gen / target passes for…
Browse files Browse the repository at this point in the history
… cold code.

Summary: Split off of D67120.

Reviewers: davidxl

Subscribers: hiraditya, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71288
  • Loading branch information
hjyamauchi committed Dec 13, 2019
1 parent d6c445e commit ed50e60
Show file tree
Hide file tree
Showing 36 changed files with 4,208 additions and 206 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/SizeOpts.cpp
Expand Up @@ -29,7 +29,7 @@ cl::opt<bool> PGSOColdCodeOnly(
"to cold code."));

cl::opt<bool> PGSOIRPassOrTestOnly(
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(true),
"pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
cl::desc("Apply the profile guided size optimizations only"
"to the IR passes or tests."));

Expand Down
128 changes: 128 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-memset-to-bzero-pgso.ll
@@ -0,0 +1,128 @@
; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \
; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s
; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()

; CHECK-LABEL: fct1:
; For small size (<= 256), we do not change memset to bzero.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct1(i8* nocapture %ptr) !prof !14 {
entry:
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false)
ret void
}

declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)

; CHECK-LABEL: fct2:
; When the size is bigger than 256, change into bzero.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct2(i8* nocapture %ptr) !prof !14 {
entry:
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false)
ret void
}

; CHECK-LABEL: fct3:
; For unknown size, change to bzero.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct3(i8* nocapture %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false)
ret void
}

; CHECK-LABEL: fct4:
; Size <= 256, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct4(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
ret void
}

declare i8* @__memset_chk(i8*, i32, i64, i64)

declare i64 @llvm.objectsize.i64(i8*, i1)

; CHECK-LABEL: fct5:
; Size > 256, change.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct5(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
ret void
}

; CHECK-LABEL: fct6:
; Size = unknown, change.
; CHECK-DARWIN: {{b|bl}} _bzero
; CHECK-LINUX: {{b|bl}} memset
define void @fct6(i8* %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
ret void
}

; Next functions check that memset is not turned into bzero
; when the set constant is non-zero, whatever the given size.

; CHECK-LABEL: fct7:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct7(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
ret void
}

; CHECK-LABEL: fct8:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct8(i8* %ptr) !prof !14 {
entry:
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
ret void
}

; CHECK-LABEL: fct9:
; memset with something that is not a zero, no change.
; CHECK-DARWIN: {{b|bl}} _memset
; CHECK-LINUX: {{b|bl}} memset
define void @fct9(i8* %ptr, i32 %unknown) !prof !14 {
entry:
%conv = sext i32 %unknown to i64
%tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
%call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
133 changes: 133 additions & 0 deletions llvm/test/CodeGen/AArch64/max-jump-table.ll
Expand Up @@ -203,3 +203,136 @@ default: unreachable

return: ret void
}

define i32 @jt1_optsize(i32 %a, i32 %b) optsize {
entry:
switch i32 %a, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
i32 10, label %bb10
i32 11, label %bb11
i32 12, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 15, label %bb15
i32 16, label %bb16
i32 17, label %bb17
]
; CHECK-LABEL: function jt1_optsize:
; CHECK-NEXT: Jump Tables:
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK0-NOT: %jump-table.1:
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK4-NOT: %jump-table.1:
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK8-NOT: %jump-table.1:
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK16-NOT: %jump-table.1:
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM1-NOT: %jump-table.1:
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM3-NOT: %jump-table.1:
; CHECK-DAG: End machine code for function jt1_optsize.

bb1: tail call void @ext(i32 1, i32 0) br label %return
bb2: tail call void @ext(i32 2, i32 2) br label %return
bb3: tail call void @ext(i32 3, i32 4) br label %return
bb4: tail call void @ext(i32 4, i32 6) br label %return
bb5: tail call void @ext(i32 5, i32 8) br label %return
bb6: tail call void @ext(i32 6, i32 10) br label %return
bb7: tail call void @ext(i32 7, i32 12) br label %return
bb8: tail call void @ext(i32 8, i32 14) br label %return
bb9: tail call void @ext(i32 9, i32 16) br label %return
bb10: tail call void @ext(i32 1, i32 18) br label %return
bb11: tail call void @ext(i32 2, i32 20) br label %return
bb12: tail call void @ext(i32 3, i32 22) br label %return
bb13: tail call void @ext(i32 4, i32 24) br label %return
bb14: tail call void @ext(i32 5, i32 26) br label %return
bb15: tail call void @ext(i32 6, i32 28) br label %return
bb16: tail call void @ext(i32 7, i32 30) br label %return
bb17: tail call void @ext(i32 8, i32 32) br label %return

return: ret i32 %b
}

define i32 @jt1_pgso(i32 %a, i32 %b) !prof !14 {
entry:
switch i32 %a, label %return [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
i32 5, label %bb5
i32 6, label %bb6
i32 7, label %bb7
i32 8, label %bb8
i32 9, label %bb9
i32 10, label %bb10
i32 11, label %bb11
i32 12, label %bb12
i32 13, label %bb13
i32 14, label %bb14
i32 15, label %bb15
i32 16, label %bb16
i32 17, label %bb17
]
; CHECK-LABEL: function jt1_pgso:
; CHECK-NEXT: Jump Tables:
; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK0-NOT: %jump-table.1:
; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK4-NOT: %jump-table.1:
; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK8-NOT: %jump-table.1:
; CHECK16-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECK16-NOT: %jump-table.1:
; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM1-NOT: %jump-table.1:
; CHECKM3-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.5 %bb.6 %bb.7 %bb.8 %bb.9 %bb.10 %bb.11 %bb.12 %bb.13 %bb.14 %bb.15 %bb.16 %bb.17
; CHECKM3-NOT: %jump-table.1:
; CHECK-DAG: End machine code for function jt1_pgso.

bb1: tail call void @ext(i32 1, i32 0) br label %return
bb2: tail call void @ext(i32 2, i32 2) br label %return
bb3: tail call void @ext(i32 3, i32 4) br label %return
bb4: tail call void @ext(i32 4, i32 6) br label %return
bb5: tail call void @ext(i32 5, i32 8) br label %return
bb6: tail call void @ext(i32 6, i32 10) br label %return
bb7: tail call void @ext(i32 7, i32 12) br label %return
bb8: tail call void @ext(i32 8, i32 14) br label %return
bb9: tail call void @ext(i32 9, i32 16) br label %return
bb10: tail call void @ext(i32 1, i32 18) br label %return
bb11: tail call void @ext(i32 2, i32 20) br label %return
bb12: tail call void @ext(i32 3, i32 22) br label %return
bb13: tail call void @ext(i32 4, i32 24) br label %return
bb14: tail call void @ext(i32 5, i32 26) br label %return
bb15: tail call void @ext(i32 6, i32 28) br label %return
bb16: tail call void @ext(i32 7, i32 30) br label %return
bb17: tail call void @ext(i32 8, i32 32) br label %return

return: ret i32 %b
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/ARM/constantpool-align.ll
Expand Up @@ -17,3 +17,28 @@ define void @f_optsize(<4 x i32>* %p) optsize {
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
ret void
}

; CHECK-LABEL: f_pgso:
; CHECK: vld1.64 {{.*}}, [r1]
; CHECK: .p2align 3
define void @f_pgso(<4 x i32>* %p) !prof !14 {
store <4 x i32> <i32 -1, i32 0, i32 0, i32 -1>, <4 x i32>* %p, align 4
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/RISCV/tail-calls.ll
Expand Up @@ -23,6 +23,17 @@ entry:
ret void
}

; Perform tail call optimization for external symbol.
@dest_pgso = global [2 x i8] zeroinitializer
define void @caller_extern_pgso(i8* %src) !prof !14 {
entry:
; CHECK: caller_extern_pgso
; CHECK-NOT: call memcpy
; CHECK: tail memcpy
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @dest_pgso, i32 0, i32 0), i8* %src, i32 7, i1 false)
ret void
}

; Perform indirect tail call optimization (for function pointer call).
declare void @callee_indirect1()
declare void @callee_indirect2()
Expand Down Expand Up @@ -146,3 +157,20 @@ entry:
tail call void @callee_nostruct()
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/X86/atom-pad-short-functions.ll
Expand Up @@ -29,6 +29,13 @@ define i32 @test_minsize(i32 %a) nounwind minsize {
ret i32 %a
}

define i32 @test_pgso(i32 %a) nounwind !prof !14 {
; CHECK: test_pgso
; CHECK: movl
; CHECK-NEXT: ret
ret i32 %a
}

define i32 @test_add(i32 %a, i32 %b) nounwind {
; CHECK: test_add
; CHECK: addl
Expand Down Expand Up @@ -101,3 +108,19 @@ while.end:
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}

0 comments on commit ed50e60

Please sign in to comment.