-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Closed
Labels
Description
Godbolt example:
https://godbolt.org/z/bTGT3Mdxn
opt -passes=licm -debug-only=licm -S
define void @test_01(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) {
bb:
%tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !5, !align !6
%tmp117 = icmp eq i8 addrspace(1)* %tmp103, null
%tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8
%tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)*
br i1 %tmp117, label %bb122, label %bb149
bb122: ; preds = %bb
ret void
bb149: ; preds = %bb
br label %bb150
bb150: ; preds = %bb150, %bb149
%tmp151 = phi i32 [ 0, %bb149 ], [ %tmp163, %bb150 ]
%tmp152 = icmp ult i32 %tmp151, %arg2
call void (i1, ...) @llvm.experimental.guard(i1 %tmp152, i32 12) [ "deopt"() ]
%tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8
%tmp158 = icmp ult i32 %tmp151, %tmp157
call void (i1, ...) @llvm.experimental.guard(i1 %tmp158, i32 12) [ "deopt"() ]
%tmp163 = add i32 %tmp151, 1
br label %bb150
}
define void @test_02(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) {
bb:
%tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !5, !align !6
%tmp117 = icmp eq i8 addrspace(1)* %tmp103, null
%tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8
%tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)*
%freeze = freeze i1 %tmp117
br i1 %freeze, label %bb122, label %bb149
bb122: ; preds = %bb
ret void
bb149: ; preds = %bb
br label %bb150
bb150: ; preds = %bb150, %bb149
%tmp151 = phi i32 [ 0, %bb149 ], [ %tmp163, %bb150 ]
%tmp152 = icmp ult i32 %tmp151, %arg2
call void (i1, ...) @llvm.experimental.guard(i1 %tmp152, i32 12) [ "deopt"() ]
%tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8
%tmp158 = icmp ult i32 %tmp151, %tmp157
call void (i1, ...) @llvm.experimental.guard(i1 %tmp158, i32 12) [ "deopt"() ]
%tmp163 = add i32 %tmp151, 1
br label %bb150
}
; Function Attrs: nocallback nofree nosync willreturn
declare void @llvm.experimental.guard(i1, ...) #2
!5 = !{i64 16}
!6 = !{i64 8}
Output:
LICM hoisting to bb149: %tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8
; ModuleID = 'reduced.ll'
source_filename = "reduced.ll"
define void @test_01(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) {
bb:
%tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !0, !align !1
%tmp117 = icmp eq i8 addrspace(1)* %tmp103, null
%tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8
%tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)*
br i1 %tmp117, label %bb122, label %bb149
bb122: ; preds = %bb
ret void
bb149: ; preds = %bb
%tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8
br label %bb150
bb150: ; preds = %bb150, %bb149
%tmp151 = phi i32 [ 0, %bb149 ], [ %tmp163, %bb150 ]
%tmp152 = icmp ult i32 %tmp151, %arg2
call void (i1, ...) @llvm.experimental.guard(i1 %tmp152, i32 12) [ "deopt"() ]
%tmp158 = icmp ult i32 %tmp151, %tmp157
call void (i1, ...) @llvm.experimental.guard(i1 %tmp158, i32 12) [ "deopt"() ]
%tmp163 = add i32 %tmp151, 1
br label %bb150
}
define void @test_02(i8 addrspace(1)* addrspace(1)* %arg, i32 %arg2) {
bb:
%tmp103 = load atomic i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %arg unordered, align 8, !dereferenceable_or_null !0, !align !1
%tmp117 = icmp eq i8 addrspace(1)* %tmp103, null
%tmp118 = getelementptr inbounds i8, i8 addrspace(1)* %tmp103, i64 8
%tmp119 = bitcast i8 addrspace(1)* %tmp118 to i32 addrspace(1)*
%freeze = freeze i1 %tmp117
br i1 %freeze, label %bb122, label %bb149
bb122: ; preds = %bb
ret void
bb149: ; preds = %bb
br label %bb150
bb150: ; preds = %bb150, %bb149
%tmp151 = phi i32 [ 0, %bb149 ], [ %tmp163, %bb150 ]
%tmp152 = icmp ult i32 %tmp151, %arg2
call void (i1, ...) @llvm.experimental.guard(i1 %tmp152, i32 12) [ "deopt"() ]
%tmp157 = load atomic i32, i32 addrspace(1)* %tmp119 unordered, align 8
%tmp158 = icmp ult i32 %tmp151, %tmp157
call void (i1, ...) @llvm.experimental.guard(i1 %tmp158, i32 12) [ "deopt"() ]
%tmp163 = add i32 %tmp151, 1
br label %bb150
}
; Function Attrs: nocallback nofree nosync willreturn
declare void @llvm.experimental.guard(i1, ...) #0
attributes #0 = { nocallback nofree nosync willreturn }
!0 = !{i64 16}
!1 = !{i64 8}
The only difference between test_01 and test_02 is freeze on loop-invariant condition. In former case invariant load gets hoisted, while in presence of freeze it doesn't. For us, it causes massive negative performance impact after freeze instructions started generated with -freeze-loop-unswitch-cond set to true.