-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Fix incorrect stepping in gdb for amdgcn.end.cf intrinsic. #83010
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Valery Pykhtin (vpykhtin) ChangesAfter #73958 gdb.rocm/lane-execution.exp test started to fail due to incorrect dbg location. This is kind of a revert patch. Full diff: https://github.com/llvm/llvm-project/pull/83010.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 58214f30bb8d67..2ccdc496f36de8 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -207,6 +207,7 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
return false;
IRBuilder<> IRB(Term);
+ IRB.SetCurrentDebugLocation(DebugLoc());
Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
Value *Cond = IRB.CreateExtractValue(IfCall, {0});
Value *Mask = IRB.CreateExtractValue(IfCall, {1});
@@ -222,6 +223,7 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
IRBuilder<> IRB(Term);
+ IRB.SetCurrentDebugLocation(DebugLoc());
Value *ElseCall = IRB.CreateCall(Else, {popSaved()});
Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
@@ -235,7 +237,9 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
- return IRBuilder<>(I).CreateCall(IfBreak, {Cond, Broken});
+ IRBuilder<> IRB(I);
+ IRB.SetCurrentDebugLocation(DebugLoc());
+ return IRB.CreateCall(IfBreak, {Cond, Broken});
};
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
@@ -296,7 +300,9 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
Broken->addIncoming(PHIValue, Pred);
}
- CallInst *LoopCall = IRBuilder<>(Term).CreateCall(Loop, {Arg});
+ IRBuilder<> IRB(Term);
+ IRB.SetCurrentDebugLocation(DebugLoc());
+ CallInst *LoopCall = IRB.CreateCall(Loop, {Arg});
Term->setCondition(LoopCall);
push(Term->getSuccessor(0), Arg);
@@ -336,8 +342,9 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// Split edge to make Def dominate Use
FirstInsertionPt = SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt();
}
- IRBuilder<>(FirstInsertionPt->getParent(), FirstInsertionPt)
- .CreateCall(EndCf, {Exec});
+ IRBuilder<> IRB(FirstInsertionPt->getParent(), FirstInsertionPt);
+ IRB.SetCurrentDebugLocation(DebugLoc());
+ IRB.CreateCall(EndCf, {Exec});
}
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
index a7af02017001fb..41828d163fba72 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
@@ -7,15 +7,15 @@ define amdgpu_ps i32 @if_else(i32 %0) !dbg !5 {
; OPT-SAME: i32 [[TMP0:%.*]]) !dbg [[DBG5:![0-9]+]] {
; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0, !dbg [[DBG13:![0-9]+]]
; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]]
-; OPT-NEXT: [[TMP2:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]), !dbg [[DBG14:![0-9]+]]
-; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP2]], 0, !dbg [[DBG14]]
-; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP2]], 1, !dbg [[DBG14]]
-; OPT-NEXT: br i1 [[TMP3]], label [[FALSE:%.*]], label [[FLOW:%.*]], !dbg [[DBG14]]
+; OPT-NEXT: [[TMP2:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]])
+; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP2]], 0
+; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP2]], 1
+; OPT-NEXT: br i1 [[TMP3]], label [[FALSE:%.*]], label [[FLOW:%.*]], !dbg [[DBG14:![0-9]+]]
; OPT: Flow:
; OPT-NEXT: [[TMP5:%.*]] = phi i32 [ 33, [[FALSE]] ], [ undef, [[TMP1:%.*]] ]
-; OPT-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP4]]), !dbg [[DBG14]]
-; OPT-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0, !dbg [[DBG14]]
-; OPT-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1, !dbg [[DBG14]]
+; OPT-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP4]])
+; OPT-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0
+; OPT-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1
; OPT-NEXT: br i1 [[TMP7]], label [[TRUE:%.*]], label [[EXIT:%.*]], !dbg [[DBG14]]
; OPT: true:
; OPT-NEXT: br label [[EXIT]], !dbg [[DBG15:![0-9]+]]
@@ -23,9 +23,9 @@ define amdgpu_ps i32 @if_else(i32 %0) !dbg !5 {
; OPT-NEXT: br label [[FLOW]], !dbg [[DBG16:![0-9]+]]
; OPT: exit:
; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP5]], [[FLOW]] ], [ 42, [[TRUE]] ], !dbg [[DBG17:![0-9]+]]
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]]), !dbg [[DBG18:![0-9]+]]
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[RET]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]]
-; OPT-NEXT: ret i32 [[RET]], !dbg [[DBG18]]
+; OPT-NEXT: ret i32 [[RET]], !dbg [[DBG18:![0-9]+]]
;
%c = icmp eq i32 %0, 0, !dbg !13
tail call void @llvm.dbg.value(metadata i1 %c, metadata !9, metadata !DIExpression()), !dbg !13
@@ -54,10 +54,10 @@ define amdgpu_ps void @loop_if_break(i32 %n) !dbg !19 {
; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]]
; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0, !dbg [[DBG26:![0-9]+]]
; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]]
-; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]), !dbg [[DBG27:![0-9]+]]
-; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0, !dbg [[DBG27]]
-; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1, !dbg [[DBG27]]
-; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_BODY:%.*]], label [[FLOW]], !dbg [[DBG27]]
+; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]])
+; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
+; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
+; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_BODY:%.*]], label [[FLOW]], !dbg [[DBG27:![0-9]+]]
; OPT: loop_body:
; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1, !dbg [[DBG28:![0-9]+]]
; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I_NEXT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]]
@@ -65,13 +65,13 @@ define amdgpu_ps void @loop_if_break(i32 %n) !dbg !19 {
; OPT: Flow:
; OPT-NEXT: [[TMP3]] = phi i32 [ [[I_NEXT]], [[LOOP_BODY]] ], [ undef, [[LOOP]] ]
; OPT-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[LOOP_BODY]] ], [ true, [[LOOP]] ]
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]]), !dbg [[DBG27]]
-; OPT-NEXT: [[TMP5]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]]), !dbg [[DBG27]]
-; OPT-NEXT: [[TMP6:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP5]]), !dbg [[DBG27]]
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
+; OPT-NEXT: [[TMP5]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP4]], i64 [[PHI_BROKEN]])
+; OPT-NEXT: [[TMP6:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP5]])
; OPT-NEXT: br i1 [[TMP6]], label [[EXIT:%.*]], label [[LOOP]], !dbg [[DBG27]]
; OPT: exit:
-; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]]), !dbg [[DBG30:![0-9]+]]
-; OPT-NEXT: ret void, !dbg [[DBG30]]
+; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]])
+; OPT-NEXT: ret void, !dbg [[DBG30:![0-9]+]]
;
entry:
br label %loop, !dbg !24
|
IRBuilder<>(FirstInsertionPt->getParent(), FirstInsertionPt) | ||
.CreateCall(EndCf, {Exec}); | ||
IRBuilder<> IRB(FirstInsertionPt->getParent(), FirstInsertionPt); | ||
IRB.SetCurrentDebugLocation(DebugLoc()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the only DebugLoc that is causing GDB test failures, where control flow joins in the 'Flow' block. Could you remove the other changes in this PR? I think the other debug locations should all be correct.
A comment might be nice here too, maybe something like: "StructurizeCFG 'Flow' blocks have debug locations from the condition, for now just avoid copying these DebugLocs so that stepping out of the then/else block in a debugger doesn't step to the condition."?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Emma, done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I follow. Isn't the location we are now dropping with this patch associated with the return, not the condition?
The locations in the if_else
test are:
[[DBG13]] = !DILocation(line: 1, column: 1, scope: [[DBG5]])
[[DBG14]] = !DILocation(line: 2, column: 1, scope: [[DBG5]])
[[DBG15]] = !DILocation(line: 3, column: 1, scope: [[DBG5]])
[[DBG16]] = !DILocation(line: 4, column: 1, scope: [[DBG5]])
[[DBG17]] = !DILocation(line: 5, column: 1, scope: [[DBG5]])
[[DBG18]] = !DILocation(line: 6, column: 1, scope: [[DBG5]])
Where DBG18
is associated with the return
instruction.
This change drops , !dbg [[DBG18:![0-9]+]]
from the end.cf
call in the exit
block. If the description in the comment applied wouldn't we have been emitting , !dbg [[DBG13]]
or , !dbg [[DBG14]]
before?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@slinder1 good point, probably this testcase doesn't reflect what's happening with the failing gdb test.
I'm not the right person to work on this particular issue, I just tried to set dbg location on end.cf intrinsic because it's the next instruction after asan_report call and the call uses return address (that is end.cf intrinsic) to report faulting location. I have a workaround for this - insert dummy instruction with proper dbg loc between asan_report call and end.cf intrinsic.
I suggest to submit this fix to restore original behaviour and continue to work on this later. Maybe it worth to edit the TODO comment.
Alas, I don't think I'm rated to review this (I'm unfamiliar with what the pass does). I was likely the last person to touch that line, but due to unrelated instruction-insertion maintenance for the RemoveDIs project. In general, there's advice on maintaining source locations here: https://llvm.org/docs/HowToUpdateDebugInfo.html . We also usually try to avoid source-locations appearing on paths through the function where they didn't previously appear before optimisation. |
// TODO: StructurizeCFG 'Flow' blocks have debug locations from the | ||
// condition, for now just avoid copying these DebugLocs so that stepping | ||
// out of the then/else block in a debugger doesn't step to the condition. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should StructurizeCFG have not preserved these locations then?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe, but let's "revert" the change first and then what the structurizer should do there.
I suggest submitting this patch as we have test failures and find a better solution later. |
395f70b
to
63ec93f
Compare
After #73958 gdb.rocm/lane-execution.exp test started to fail due to incorrect dbg location. This is kind of a revert patch.