Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid lifetime intrinsic after MemCpyOpt #66403

Open
vitalybuka opened this issue Sep 14, 2023 · 3 comments · May be fixed by #68990
Open

Invalid lifetime intrinsic after MemCpyOpt #66403

vitalybuka opened this issue Sep 14, 2023 · 3 comments · May be fixed by #68990
Assignees

Comments

@vitalybuka
Copy link
Collaborator

This is followup to https://reviews.llvm.org/D155406

Here is reproducer for issues caused revert e0f9cc7.

Thanks to @joanahalili for reproducer.

Cpp: https://gist.github.com/vitalybuka/b6361a44762ad3cf0c3e7ce47a09512f
IR before transformation: https://gist.github.com/vitalybuka/86a2bdc17d8e074da8839baacea4ec94

 define void @_ZN6rClass6refineERKNS_6paramsE(ptr nocapture noundef nonnull readnone align 1 dereferenceable(1) %this, ptr nocapture noundef nonnull readonly align 1 dereferenceable(1) %p) local_unnamed_addr #0 align 2 {
 entry:
   %c = alloca %class.vec, align 8
-  %b = alloca %class.vec, align 8
-  %ref.tmp = alloca %class.vec, align 8
   %ref.tmp5 = alloca %class.vec, align 8
   %h = alloca %class.vec.0, align 8
   %0 = load i8, ptr getelementptr inbounds ([8 x i8], ptr @linearVal, i64 0, i64 3), align 1
@@ -3506,18 +3504,17 @@
 
 _ZN3vecILj3EdEC2Ed.exit:                          ; preds = %cont2.i.i
   %conv4 = fpext float %sub to double
-  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %b) #9
+  call void @llvm.lifetime.start.p0(i64 24, ptr %ref.tmp5)
   %cmp = fcmp oeq float %sub, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %_ZN3vecILj3EdEC2Ed.exit
-  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %ref.tmp) #9
   call void @llvm.experimental.noalias.scope.decl(metadata !7)
   br label %cont.i
 
 cont.i:                                           ; preds = %cont.i, %if.then
   %indvars.iv.i = phi i64 [ %indvars.iv.next.i, %cont.i ], [ 0, %if.then ]
-  %arrayidx3.i = getelementptr inbounds [3 x double], ptr %ref.tmp, i64 0, i64 %indvars.iv.i
+  %arrayidx3.i = getelementptr inbounds [3 x double], ptr %ref.tmp5, i64 0, i64 %indvars.iv.i
   %arrayidx.i = getelementptr inbounds [3 x double], ptr %c, i64 0, i64 %indvars.iv.i
   %1 = load double, ptr %arrayidx.i, align 8, !noalias !7
   %div.i = fdiv double %1, %conv4
@@ -3527,12 +3524,9 @@
   br i1 %exitcond.not.i, label %_ZdvRK3vecILj3EdEd.exit, label %cont.i, !llvm.loop !10
 
 _ZdvRK3vecILj3EdEd.exit:                          ; preds = %cont.i
-  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %b, ptr noundef nonnull align 8 dereferenceable(24) %ref.tmp, i64 24, i1 false)
-  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %ref.tmp) #9
   br label %if.end
 
 if.else:                                          ; preds = %_ZN3vecILj3EdEC2Ed.exit
-  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %ref.tmp5) #9
   call void @llvm.experimental.noalias.scope.decl(metadata !11)
   br label %cont.i12
 
@@ -3548,8 +3542,6 @@
   br i1 %exitcond.not.i17, label %_ZmlRK3vecILj3EdEd.exit, label %cont.i12, !llvm.loop !14
 
 _ZmlRK3vecILj3EdEd.exit:                          ; preds = %cont.i12
-  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %b, ptr noundef nonnull align 8 dereferenceable(24) %ref.tmp5, i64 24, i1 false)
-  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %ref.tmp5) #9
   br label %if.end
 
 if.end:                                           ; preds = %_ZmlRK3vecILj3EdEd.exit, %_ZdvRK3vecILj3EdEd.exit
@@ -3562,8 +3554,9 @@
 
 cont4.i:                                          ; preds = %cont4.i.preheader, %cont4.i
   %indvars.iv.i23 = phi i64 [ %indvars.iv.next.i26, %cont4.i ], [ 0, %cont4.i.preheader ]
-  %arrayidx.i.i24 = getelementptr inbounds [3 x double], ptr %b, i64 0, i64 %indvars.iv.i23
+  %arrayidx.i.i24 = getelementptr inbounds [3 x double], ptr %ref.tmp5, i64 0, i64 %indvars.iv.i23
   %3 = load double, ptr %arrayidx.i.i24, align 8
+  call void @llvm.lifetime.end.p0(i64 24, ptr %ref.tmp5)  # BAD BAD BAD BAD
   %arrayidx.i25 = getelementptr inbounds [3 x float], ptr %h, i64 0, i64 %indvars.iv.i23
   %conv.i = fptrunc double %3 to float
   store float %conv.i, ptr %arrayidx.i25, align 4
@@ -3599,7 +3592,6 @@
 
 cont7:                                            ; preds = %_ZN3vecILj3EfE5clampEff.exit
   call void @llvm.lifetime.end.p0(i64 12, ptr nonnull %h) #9
-  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %b) #9
   call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %c) #9
   ret void
 }

So the transformation eliminates 2 allocas and keeps %ref.tmp5.
Before transofmation: %ref.tmp5 was alive for the loop on "cont.i12".

But after transfomration we start lifetime of tmp5 at _ZN3vecILj3EdEC2Ed.exit:
and end lifetime inside of the loop cont4.i:
And the next iteration of the loop will be bad.

@vitalybuka
Copy link
Collaborator Author

cc @nikic

@vitalybuka vitalybuka pinned this issue Sep 15, 2023
@vitalybuka vitalybuka unpinned this issue Sep 15, 2023
@khei4
Copy link
Contributor

khei4 commented Sep 18, 2023

@vitalybuka @joanahalili
Thank you for the reproducer! I could see the problem, although the order may be slightly different.

  1. merging b with tmp.ref removes last lifetime.end for b, because no post-dominators exist with the bailing out branch handler.load_invalid_value.
  2. merging tmp.ref, with tmp.ref5 by widening the tmp.ref5's lifetime, inserts lifetime.end to the post-dominator of all uses, which is in the loop.

I'll reduce this case.

@khei4
Copy link
Contributor

khei4 commented Sep 19, 2023

This is the reduced problematic case. The lifetime.end insertion point is in the loop.

define void @multi_bb_loop_post_dominator(i32 %n) {
entry:
  %nlt1 = icmp slt i32 %n, 1
  %src = alloca %struct.Foo, align 8
  %dest = alloca %struct.Foo, align 8
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
  store %struct.Foo { i32 0, i32 1, i32 42 }, ptr %src
  br label %pre

pre:
  br label %loop_body

loop_body:
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false)
  %v = load i32, ptr %src
  %new_v = add i32 %v, 1
  store i32 %new_v, ptr %src
  %igtn = icmp sgt i32 %new_v, %n
  br i1 %igtn, label %loop_exit, label %pre

loop_exit:
  ret void
}

So, before inserting lifetime.end, we need to check whether it's in the loop or not. If so, we need to insert lifetime.end into the immediate post-dominator for that if it exists. This can be done by introducing LoopInfo to the MemCpyOpt.

Moreover, to avoid the lifetime marker dropping, we can insert lifetime.end for all the last uses for each execution path.

define void @multi_bb_unreachable_modref(i1 %b0) {
  %src = alloca %struct.Foo, align 4
  %dest = alloca %struct.Foo, align 4
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
  br i1 %b0, label %bb0, label %exit

exit:
  %2 = call i32 @use_nocapture(ptr noundef nocapture %src)
  ret void

bb0:
  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
  ret void
}

Then, we can keep a lifetime range like the above. These can be obtained by calculating each leaf for memory location uses.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants