-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Fix RISCVInsertVSETVLI coalescing clobbering VL def segment #167712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -879,3 +879,46 @@ entry: | |
| %3 = tail call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff(<vscale x 8 x i8> poison, ptr %p, i64 %2) | ||
| ret void | ||
| } | ||
|
|
||
| ; This will create a live interval in such a way we can't coalesce two vsetvlis, | ||
| ; see the corresponding .mir test for more details. Make sure we check for this | ||
| ; and don't crash. | ||
| define void @coalesce_vl_clobber(ptr %p) { | ||
| ; CHECK-LABEL: coalesce_vl_clobber: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: li a2, 0 | ||
| ; CHECK-NEXT: li a1, 0 | ||
| ; CHECK-NEXT: vsetivli zero, 0, e8, mf2, ta, ma | ||
| ; CHECK-NEXT: vmclr.m v8 | ||
| ; CHECK-NEXT: vmv.v.i v9, 0 | ||
| ; CHECK-NEXT: vmv1r.v v0, v8 | ||
| ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 | ||
| ; CHECK-NEXT: .LBB43_1: # %vector.body | ||
| ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma | ||
| ; CHECK-NEXT: vmv1r.v v0, v8 | ||
| ; CHECK-NEXT: slli a3, a1, 32 | ||
| ; CHECK-NEXT: vsetvli a1, a2, e8, mf8, ta, ma | ||
| ; CHECK-NEXT: vsetivli zero, 0, e8, mf2, ta, mu | ||
| ; CHECK-NEXT: vmv.v.i v10, 0 | ||
| ; CHECK-NEXT: srli a3, a3, 32 | ||
| ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 | ||
| ; CHECK-NEXT: vslideup.vx v10, v9, a3, v0.t | ||
| ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma | ||
| ; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t | ||
| ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma | ||
| ; CHECK-NEXT: vmv.v.i v10, 0 | ||
| ; CHECK-NEXT: vse32.v v10, (a0), v0.t | ||
| ; CHECK-NEXT: li a2, 1 | ||
| ; CHECK-NEXT: j .LBB43_1 | ||
| entry: | ||
| br label %vector.body | ||
|
|
||
| vector.body: | ||
| %avl = phi i64 [ 0, %entry ], [ 1, %vector.body ] | ||
| %prev.evl = phi i32 [ 0, %entry ], [ %0, %vector.body ] | ||
| %0 = tail call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 1, i1 true) | ||
| %1 = tail call <vscale x 4 x i1> @llvm.experimental.vp.splice(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 0, <vscale x 4 x i1> zeroinitializer, i32 %prev.evl, i32 0) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test case looks fragile. Adding constant folding to experimental.vp.splice in SelectionDAG would break this. |
||
| tail call void @llvm.vp.store(<vscale x 4 x float> zeroinitializer, ptr %p, <vscale x 4 x i1> %1, i32 %0) | ||
| br label %vector.body | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,6 +100,10 @@ | |
| ret void | ||
| } | ||
|
|
||
| define void @coalesce_vl_clobber() { | ||
| ret void | ||
| } | ||
|
|
||
| define void @vsetvli_vleff() { | ||
| ret void | ||
| } | ||
|
|
@@ -624,10 +628,33 @@ body: | | |
| ; CHECK: liveins: $x8, $v8 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: %x:gprnox0 = COPY $x8 | ||
| ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype | ||
| ; CHECK-NEXT: dead %v:vr = COPY $v8, implicit $vtype | ||
| ; CHECK-NEXT: dead %x:gprnox0 = PseudoVSETVLI %x, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype | ||
| %x:gprnox0 = COPY $x8 | ||
| dead $x0 = PseudoVSETIVLI 1, 208, implicit-def $vl, implicit-def $vtype | ||
| %v:vr = COPY $v8, implicit $vtype | ||
| %x = PseudoVSETVLI %x, 208, implicit-def $vl, implicit-def $vtype | ||
| ... | ||
| --- | ||
| # Because of the %y:gprnox0 = COPY %x, we can't extend the live range of %x from | ||
| # the second vsetvli to the first vsetvli when coalescing. | ||
| name: coalesce_vl_clobber | ||
| tracksRegLiveness: true | ||
| body: | | ||
| bb.0: | ||
| liveins: $x8, $v8 | ||
| ; CHECK-LABEL: name: coalesce_vl_clobber | ||
| ; CHECK: liveins: $x8, $v8 | ||
| ; CHECK-NEXT: {{ $}} | ||
| ; CHECK-NEXT: %x:gprnox0 = COPY $x8 | ||
| ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype | ||
| ; CHECK-NEXT: dead %y:gprnox0 = COPY %x | ||
| ; CHECK-NEXT: dead %v:vr = COPY $v8, implicit $vtype | ||
| ; CHECK-NEXT: dead %x:gprnox0 = PseudoVSETVLI %x, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype | ||
| %x:gprnox0 = COPY $x8 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it bails out in this case because we're not in SSA form at this point, and thus the interval of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the segment between It's more that we have two segments for %x, Because the two segments have different value nos from the defs at 16r and 80r, when we go to coalesce the vsetvlis we can't move the 80r definition of %x upwards, since it overlaps with The We could try to be really smart and here and figure out that because we're going to move the use of the 16r def up to 32r, the segment could be shrunk to 48r). That would fix the regression in coalesce_vl_avl_same_reg, but it seems like a lot of effort for something that doesn't seem to show up in practice |
||
| dead $x0 = PseudoVSETIVLI 1, 208, implicit-def $vl, implicit-def $vtype | ||
| %y:gprnox0 = COPY %x | ||
| %v:vr = COPY $v8, implicit $vtype | ||
| %x = PseudoVSETVLI %x, 208, implicit-def $vl, implicit-def $vtype | ||
| ... | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This vsetivli seems unnecessary. It only exists because of the vmv1r.v right below it, but vtype is valid for all paths that can get here which is all the vmv1r.v should care about.