[ArgPromotion] Miscompile at -O3 #84807

dtcxzyw · 2024-03-11T18:15:28Z

Reduced test case: https://godbolt.org/z/5z5sYv4nq

#include <stdint.h>
#include <stdio.h>
struct a {
  int16_t b;
  uint64_t c;
} f = {1};
uint8_t d, h;
static struct a *g[] = {&f, &f, &f, &f};
int32_t i;
static struct a j(struct a *q) {
  for (d = 5; d < 6; d = d + 1)
    f.b = h;
  return *q;
}
int main() {
  struct a *l = g[3];
  *l = j(l);
  i = f.b;
  printf("%d\n", i);
}

Bisected to ArgumentPromotionPass.

Before:

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.a = type { i16, i64 }

@f = dso_local global %struct.a { i16 1, i64 0 }, align 8
@i = dso_local local_unnamed_addr global i32 0, align 4
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
@d = dso_local local_unnamed_addr global i8 0, align 1
@h = dso_local local_unnamed_addr global i8 0, align 1

; Function Attrs: nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #0 {
entry:
  %tmp.sroa.4 = alloca [6 x i8], align 2
  call void @llvm.lifetime.start.p0(i64 6, ptr nonnull %tmp.sroa.4)
  %call = call fastcc { i16, i64 } @j(ptr noundef nonnull @f)
  %0 = extractvalue { i16, i64 } %call, 0
  %1 = extractvalue { i16, i64 } %call, 1
  store i16 %0, ptr @f, align 8, !tbaa !5
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 2 dereferenceable(6) getelementptr inbounds (i8, ptr @f, i64 2), ptr noundef nonnull align 2 dereferenceable(6) %tmp.sroa.4, i64 6, i1 false), !tbaa.struct !9
  store i64 %1, ptr getelementptr inbounds (%struct.a, ptr @f, i64 0, i32 1), align 8, !tbaa !10
  call void @llvm.lifetime.end.p0(i64 6, ptr nonnull %tmp.sroa.4)
  %conv = sext i16 %0 to i32
  store i32 %conv, ptr @i, align 4, !tbaa !12
  %call1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef %conv)
  ret i32 0
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: nounwind uwtable
define internal fastcc { i16, i64 } @j(ptr nocapture noundef readonly %q) unnamed_addr #0 {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.body, %entry
  %storemerge = phi i8 [ 5, %entry ], [ %add, %for.body ]
  store i8 %storemerge, ptr @d, align 1, !tbaa !14
  %cmp = icmp ult i8 %storemerge, 6
  br i1 %cmp, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond
  %0 = load i8, ptr @h, align 1, !tbaa !14
  %conv2 = zext i8 %0 to i16
  store i16 %conv2, ptr @f, align 8, !tbaa !15
  %1 = load i8, ptr @d, align 1, !tbaa !14
  %add = add i8 %1, 1
  br label %for.cond, !llvm.loop !17

for.end:                                          ; preds = %for.cond
  %retval.sroa.0.0.copyload = load i16, ptr %q, align 8, !tbaa !5
  %retval.sroa.25.0.q.addr.0..sroa_idx = getelementptr inbounds i8, ptr %q, i64 8
  %retval.sroa.25.0.copyload = load i64, ptr %retval.sroa.25.0.q.addr.0..sroa_idx, align 8, !tbaa !10
  %.fca.0.insert = insertvalue { i16, i64 } poison, i16 %retval.sroa.0.0.copyload, 0
  %.fca.1.insert = insertvalue { i16, i64 } %.fca.0.insert, i64 %retval.sroa.25.0.copyload, 1
  ret { i16, i64 } %.fca.1.insert
}

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: nofree nounwind
declare noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #3

attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #3 = { nofree nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 19.0.0git"}
!5 = !{!6, !6, i64 0}
!6 = !{!"short", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
!9 = !{i64 6, i64 8, !10}
!10 = !{!11, !11, i64 0}
!11 = !{!"long", !7, i64 0}
!12 = !{!13, !13, i64 0}
!13 = !{!"int", !7, i64 0}
!14 = !{!7, !7, i64 0}
!15 = !{!16, !6, i64 0}
!16 = !{!"a", !6, i64 0, !11, i64 8}
!17 = distinct !{!17, !18}
!18 = !{!"llvm.loop.mustprogress"}

After:

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.a = type { i16, i64 }

@f = dso_local global %struct.a { i16 1, i64 0 }, align 8
@i = dso_local local_unnamed_addr global i32 0, align 4
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
@d = dso_local local_unnamed_addr global i8 0, align 1
@h = dso_local local_unnamed_addr global i8 0, align 1

; Function Attrs: nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #0 {
entry:
  %tmp.sroa.4 = alloca [6 x i8], align 2
  call void @llvm.lifetime.start.p0(i64 6, ptr nonnull %tmp.sroa.4)
  %f.val = load i16, ptr @f, align 8
  %0 = getelementptr i8, ptr @f, i64 8
  %f.val4 = load i64, ptr %0, align 8
  %call = call fastcc { i16, i64 } @j(i16 %f.val, i64 %f.val4)
  %1 = extractvalue { i16, i64 } %call, 0
  %2 = extractvalue { i16, i64 } %call, 1
  store i16 %1, ptr @f, align 8, !tbaa !5
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 2 dereferenceable(6) getelementptr inbounds (i8, ptr @f, i64 2), ptr noundef nonnull align 2 dereferenceable(6) %tmp.sroa.4, i64 6, i1 false), !tbaa.struct !9
  store i64 %2, ptr getelementptr inbounds (%struct.a, ptr @f, i64 0, i32 1), align 8, !tbaa !10
  call void @llvm.lifetime.end.p0(i64 6, ptr nonnull %tmp.sroa.4)
  %conv = sext i16 %1 to i32
  store i32 %conv, ptr @i, align 4, !tbaa !12
  %call1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef %conv)
  ret i32 0
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: nounwind uwtable
define internal fastcc { i16, i64 } @j(i16 %q.0.val, i64 %q.8.val) unnamed_addr #0 {
entry:
  br label %for.cond

for.cond:                                         ; preds = %for.body, %entry
  %storemerge = phi i8 [ 5, %entry ], [ %add, %for.body ]
  store i8 %storemerge, ptr @d, align 1, !tbaa !14
  %cmp = icmp ult i8 %storemerge, 6
  br i1 %cmp, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond
  %0 = load i8, ptr @h, align 1, !tbaa !14
  %conv2 = zext i8 %0 to i16
  store i16 %conv2, ptr @f, align 8, !tbaa !15
  %1 = load i8, ptr @d, align 1, !tbaa !14
  %add = add i8 %1, 1
  br label %for.cond, !llvm.loop !17

for.end:                                          ; preds = %for.cond
  %.fca.0.insert = insertvalue { i16, i64 } poison, i16 %q.0.val, 0
  %.fca.1.insert = insertvalue { i16, i64 } %.fca.0.insert, i64 %q.8.val, 1
  ret { i16, i64 } %.fca.1.insert
}

; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

; Function Attrs: nofree nounwind
declare noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #3

attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #3 = { nofree nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 19.0.0git"}
!5 = !{!6, !6, i64 0}
!6 = !{!"short", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
!9 = !{i64 6, i64 8, !10}
!10 = !{!11, !11, i64 0}
!11 = !{!"long", !7, i64 0}
!12 = !{!13, !13, i64 0}
!13 = !{!"int", !7, i64 0}
!14 = !{!7, !7, i64 0}
!15 = !{!16, !6, i64 0}
!16 = !{!"a", !6, i64 0, !11, i64 8}
!17 = distinct !{!17, !18}
!18 = !{!"llvm.loop.mustprogress"}

We cannot promote the loads because %q aliases with @f.

LLVM version: 0f501c3

The text was updated successfully, but these errors were encountered:

dtcxzyw · 2024-03-11T18:21:34Z

cc @nikic @fhahn

fhahn · 2024-03-11T19:08:47Z

Hmm, cannot reproduce with plain opt.

fhahn · 2024-03-11T19:40:09Z

Ok reproduced with custom use list order. Will prepare a fix soon

Test case for #84807, showing a mis-compile in ArgPromotion.

The TranspBlocks set was used to cache aliasing decision for all processed loads in the parent loop. This is incorrect, because each load can access a different location, which means one load not being modified in a block doesn't translate to another load not being modified in the same block. All loads access the same underlying object, so we could perhaps use a location without size for all loads and retain the cache, but that would mean we loose precision. For now, just drop the cache. Fixes llvm#84807

antoniofrighetto · 2024-03-11T21:25:33Z

Just out of curiosity, when querying AA, we are expecting canInstructionRangeModRef to return true on the first load popped out (%retval.sroa.25.0.copyload) here, correct?

fhahn · 2024-03-11T21:27:24Z

Just out of curiosity, when querying AA, we are expecting canInstructionRangeModRef to return true on the first load popped out (%retval.sroa.25.0.copyload) here, correct?

No, AFAICT AA is working correctly, but the results are used for incorrect caching, see #84835

antoniofrighetto · 2024-03-11T21:38:13Z

Just out of curiosity, when querying AA, we are expecting canInstructionRangeModRef to return true on the first load popped out (%retval.sroa.25.0.copyload) here, correct?

No, AFAICT AA is working correctly, but the results are used for incorrect caching, see #84835

Makes sense, thanks (didn't see the PR opened by the time had commented).

fhahn · 2024-03-11T21:49:38Z

Makes sense, thanks (didn't see the PR opened by the time had commented).

Unfortunately Github does not seem to notify when a PR mentions an issue :(

The TranspBlocks set was used to cache aliasing decision for all processed loads in the parent loop. This is incorrect, because each load can access a different location, which means one load not being modified in a block doesn't translate to another load not being modified in the same block. All loads access the same underlying object, so we could perhaps use a location without size for all loads and retain the cache, but that would mean we loose precision. For now, just drop the cache. Fixes #84807 PR: #84835

nikic · 2024-03-12T16:40:58Z

/cherry-pick 31ffdb5 bba4a1d

Test case for llvm#84807, showing a mis-compile in ArgPromotion. (cherry picked from commit 31ffdb5)

The TranspBlocks set was used to cache aliasing decision for all processed loads in the parent loop. This is incorrect, because each load can access a different location, which means one load not being modified in a block doesn't translate to another load not being modified in the same block. All loads access the same underlying object, so we could perhaps use a location without size for all loads and retain the cache, but that would mean we loose precision. For now, just drop the cache. Fixes llvm#84807 PR: llvm#84835 (cherry picked from commit bba4a1d)

llvmbot · 2024-03-12T16:46:01Z

/pull-request #84945

Test case for llvm#84807, showing a mis-compile in ArgPromotion. (cherry picked from commit 31ffdb5)

The TranspBlocks set was used to cache aliasing decision for all processed loads in the parent loop. This is incorrect, because each load can access a different location, which means one load not being modified in a block doesn't translate to another load not being modified in the same block. All loads access the same underlying object, so we could perhaps use a location without size for all loads and retain the cache, but that would mean we loose precision. For now, just drop the cache. Fixes llvm#84807 PR: llvm#84835 (cherry picked from commit bba4a1d)

dtcxzyw added miscompilation llvm:optimizations llvm:analysis labels Mar 11, 2024

fhahn added a commit that referenced this issue Mar 11, 2024

[ArgPromotion] Add test case for #84807.

31ffdb5

Test case for #84807, showing a mis-compile in ArgPromotion.

fhahn mentioned this issue Mar 11, 2024

[ArgPromotion] Remove incorrect TranspBlocks set for loads. #84835

Merged

fhahn closed this as completed in #84835 Mar 12, 2024

nikic added this to the LLVM 18.X Release milestone Mar 12, 2024

EugeneZelenko added ipo Interprocedural optimizations and removed llvm:optimizations llvm:analysis labels Mar 12, 2024

llvmbot pushed a commit to llvmbot/llvm-project that referenced this issue Mar 12, 2024

[ArgPromotion] Add test case for llvm#84807.

4e79eba

Test case for llvm#84807, showing a mis-compile in ArgPromotion. (cherry picked from commit 31ffdb5)

EugeneZelenko added the release:backport label Mar 12, 2024

llvmbot pushed a commit to llvmbot/llvm-project that referenced this issue Mar 13, 2024

[ArgPromotion] Add test case for llvm#84807.

25a989c

Test case for llvm#84807, showing a mis-compile in ArgPromotion. (cherry picked from commit 31ffdb5)

pointhex mentioned this issue May 7, 2024

getStyleDiagHandler #91314

Closed

aemerson mentioned this issue May 9, 2024

release/18.x: [AArc64][GlobalISel] Fix legalizer assert for G_INSERT_VECTOR_ELT - manual merge #91672

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ArgPromotion] Miscompile at -O3 #84807

[ArgPromotion] Miscompile at -O3 #84807

dtcxzyw commented Mar 11, 2024 •

edited

dtcxzyw commented Mar 11, 2024

fhahn commented Mar 11, 2024

fhahn commented Mar 11, 2024

antoniofrighetto commented Mar 11, 2024

fhahn commented Mar 11, 2024

antoniofrighetto commented Mar 11, 2024

fhahn commented Mar 11, 2024

nikic commented Mar 12, 2024

llvmbot commented Mar 12, 2024

[ArgPromotion] Miscompile at -O3 #84807

[ArgPromotion] Miscompile at -O3 #84807

Comments

dtcxzyw commented Mar 11, 2024 • edited

dtcxzyw commented Mar 11, 2024

fhahn commented Mar 11, 2024

fhahn commented Mar 11, 2024

antoniofrighetto commented Mar 11, 2024

fhahn commented Mar 11, 2024

antoniofrighetto commented Mar 11, 2024

fhahn commented Mar 11, 2024

nikic commented Mar 12, 2024

llvmbot commented Mar 12, 2024

dtcxzyw commented Mar 11, 2024 •

edited