Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscompilation after InstCombinePass #74890

Closed
cbeuw opened this issue Dec 8, 2023 · 4 comments
Closed

Miscompilation after InstCombinePass #74890

cbeuw opened this issue Dec 8, 2023 · 4 comments

Comments

@cbeuw
Copy link

cbeuw commented Dec 8, 2023

Reproduction:

; ModuleID = 'repro.48d8976a07162e6d-cgu.0'
source_filename = "repro.48d8976a07162e6d-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%Adt58 = type { i8, [103 x i8] }
%"Adt58::Variant1" = type { [1 x i64], { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr } }
%"Adt58::Variant2" = type { [1 x i8], [8 x i8], [1 x i8], [8 x i16], [3 x i16], [2 x i64] }
%Adt65 = type { { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, [2 x i64] }

@vtable.0 = private unnamed_addr constant <{ ptr, [16 x i8], ptr, ptr, ptr }> <{ ptr @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h9468bf8cd9285ca4E", [16 x i8] c"\08\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00", ptr @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hb49b164c8a336846E", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h2ae8767ae801fedeE", ptr @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h2ae8767ae801fedeE" }>, align 8
@alloc_87551382a9de3243abbfdbda2f0b586b = private unnamed_addr constant <{ [4 x i8] }> <{ [4 x i8] c"%d\0A\00" }>, align 1

; std::sys_common::backtrace::__rust_begin_short_backtrace
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7f191982774c7ca6E(ptr %f) unnamed_addr #0 {
start:
; call core::ops::function::FnOnce::call_once
  call void @_ZN4core3ops8function6FnOnce9call_once17h5bea907e86c354bfE(ptr %f)
  call void asm sideeffect "", "~{memory}"(), !srcloc !4
  ret void
}

; std::rt::lang_start::{{closure}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h2ae8767ae801fedeE"(ptr align 8 %_1) unnamed_addr #1 {
start:
  %self = alloca i8, align 1
  %_4 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call std::sys_common::backtrace::__rust_begin_short_backtrace
  call void @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h7f191982774c7ca6E(ptr %_4)
; call <() as std::process::Termination>::report
  %0 = call i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hb7858f79eb84fda8E"()
  store i8 %0, ptr %self, align 1
  %_6 = load i8, ptr %self, align 1, !noundef !5
  %_0 = zext i8 %_6 to i32
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once{{vtable.shim}}
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @"_ZN4core3ops8function6FnOnce40call_once$u7b$$u7b$vtable.shim$u7d$$u7d$17hb49b164c8a336846E"(ptr %_1) unnamed_addr #1 {
start:
  %_2 = alloca {}, align 1
  %0 = load ptr, ptr %_1, align 8, !nonnull !5, !noundef !5
; call core::ops::function::FnOnce::call_once
  %_0 = call i32 @_ZN4core3ops8function6FnOnce9call_once17h27b28c7115d0cffdE(ptr %0)
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal i32 @_ZN4core3ops8function6FnOnce9call_once17h27b28c7115d0cffdE(ptr %0) unnamed_addr #1 personality ptr @rust_eh_personality {
start:
  %1 = alloca { ptr, i32 }, align 8
  %_2 = alloca {}, align 1
  %_1 = alloca ptr, align 8
  store ptr %0, ptr %_1, align 8
; invoke std::rt::lang_start::{{closure}}
  %_0 = invoke i32 @"_ZN3std2rt10lang_start28_$u7b$$u7b$closure$u7d$$u7d$17h2ae8767ae801fedeE"(ptr align 8 %_1)
          to label %bb1 unwind label %cleanup

bb3:                                              ; preds = %cleanup
  %2 = load ptr, ptr %1, align 8, !noundef !5
  %3 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
  %4 = load i32, ptr %3, align 8, !noundef !5
  %5 = insertvalue { ptr, i32 } poison, ptr %2, 0
  %6 = insertvalue { ptr, i32 } %5, i32 %4, 1
  resume { ptr, i32 } %6

cleanup:                                          ; preds = %start
  %7 = landingpad { ptr, i32 }
          cleanup
  %8 = extractvalue { ptr, i32 } %7, 0
  %9 = extractvalue { ptr, i32 } %7, 1
  %10 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 0
  store ptr %8, ptr %10, align 8
  %11 = getelementptr inbounds { ptr, i32 }, ptr %1, i32 0, i32 1
  store i32 %9, ptr %11, align 8
  br label %bb3

bb1:                                              ; preds = %start
  ret i32 %_0
}

; core::ops::function::FnOnce::call_once
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core3ops8function6FnOnce9call_once17h5bea907e86c354bfE(ptr %_1) unnamed_addr #1 {
start:
  %_2 = alloca {}, align 1
  call void %_1()
  ret void
}

; core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @"_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h9468bf8cd9285ca4E"(ptr align 8 %_1) unnamed_addr #1 {
start:
  ret void
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core4hint9black_box17h3a52c818116069c6E(ptr sret({ [2 x i64], i16, [1 x i16], i32 }) align 8 %_0, ptr align 8 %dummy) unnamed_addr #1 {
start:
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %dummy, i64 24, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr %_0), !srcloc !4
  ret void
}

; core::hint::black_box
; Function Attrs: inlinehint nonlazybind uwtable
define internal void @_ZN4core4hint9black_box17h43a8b18395b882e6E(ptr sret([7 x i64]) align 8 %_0, ptr align 8 %dummy) unnamed_addr #1 {
start:
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_0, ptr align 8 %dummy, i64 56, i1 false)
  call void asm sideeffect "", "r,~{memory}"(ptr %_0), !srcloc !4
  ret void
}

; <() as std::process::Termination>::report
; Function Attrs: inlinehint nonlazybind uwtable
define internal i8 @"_ZN54_$LT$$LP$$RP$$u20$as$u20$std..process..Termination$GT$6report17hb7858f79eb84fda8E"() unnamed_addr #1 {
start:
  ret i8 0
}

; repro::dump_var
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro8dump_var17h5f90de920b6ee1d3E(ptr align 8 %x) unnamed_addr #2 {
start:
  %_8 = load i16, ptr %x, align 8, !noundef !5
  %_7 = sext i16 %_8 to i32
  %_2 = call i32 (ptr, ...) @printf(ptr @alloc_87551382a9de3243abbfdbda2f0b586b, i32 %_7)
  ret void
}

; repro::black_box
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro9black_box17h58109cf990c39389E(ptr align 8 %v) unnamed_addr #2 {
start:
  %_2 = alloca { [2 x i64], i16, [1 x i16], i32 }, align 8
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17h3a52c818116069c6E(ptr sret({ [2 x i64], i16, [1 x i16], i32 }) align 8 %_2, ptr align 8 %v)
  ret void
}

; repro::fn4
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro3fn417hbcad5aa86ffc8eb2E() unnamed_addr #2 {
start:
  %_8 = alloca [7 x i64], align 8
  %_7 = alloca { i16, [3 x i16], [7 x i64] }, align 8
  %_6 = alloca { double, { i16, [3 x i16], [7 x i64] } }, align 8
  %_5 = alloca [2 x i64], align 8
  %_4 = alloca { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, align 8
  %_3 = alloca [2 x i64], align 8
  %i = alloca i128, align 8
  store i128 0, ptr %i, align 8
  %0 = getelementptr inbounds [2 x i64], ptr %_5, i64 0, i64 0
  call void @llvm.memset.p0.i64(ptr align 8 %0, i8 0, i64 16, i1 false)
  %1 = getelementptr inbounds [7 x i64], ptr %_8, i64 0, i64 0
  call void @llvm.memset.p0.i64(ptr align 8 %1, i8 0, i64 56, i1 false)
  store i16 0, ptr %_7, align 8
  %2 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %_7, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %2, ptr align 8 %_8, i64 56, i1 false)
  store double 0.000000e+00, ptr %_6, align 8
  %3 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %_6, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %3, ptr align 8 %_7, i64 64, i1 false)
  %4 = load <2 x i64>, ptr %_5, align 8
  store <2 x i64> %4, ptr %_4, align 8
  %5 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_4, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %5, ptr align 8 %_6, i64 72, i1 false)
  %6 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_4, i32 0, i32 2
  store ptr %i, ptr %6, align 8
; call repro::fn5
  call void @_ZN5repro3fn517h5d2910b0843c33f6E(ptr sret([2 x i64]) align 8 %_3, ptr align 8 %_4)
  ret void
}

; repro::fn5
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro3fn517h5d2910b0843c33f6E(ptr sret([2 x i64]) align 8 %_0, ptr align 8 %_1) unnamed_addr #2 {
start:
  %0 = alloca { double, { i16, [3 x i16], [7 x i64] } }, align 8
  %1 = alloca { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, align 8
  %_6 = alloca [3 x i64], align 8
  %_5 = alloca { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, align 8
  %_3 = alloca { [2 x i64], i16, [1 x i16], i32 }, align 8
  %_2 = alloca %Adt58, align 8
  %2 = getelementptr inbounds [2 x i64], ptr %_0, i64 0, i64 0
  store i64 -3700765774516778503, ptr %2, align 8
  %3 = getelementptr inbounds [2 x i64], ptr %_0, i64 0, i64 1
  store i64 -3642890134137765099, ptr %3, align 8
  %4 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_5, i32 0, i32 1
  %5 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %4, i32 0, i32 1
  store i16 0, ptr %5, align 8
  %6 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
  %7 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %6, i32 0, i32 1
  %8 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %7, i32 0, i32 2
  %9 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %5, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %9, ptr align 8 %8, i64 56, i1 false)
  %10 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_5, i32 0, i32 1
  store double 0.000000e+00, ptr %10, align 8
  %11 = getelementptr inbounds [2 x i64], ptr %_5, i64 0, i64 0
  store i64 -8075721443188218256, ptr %11, align 8
  %12 = getelementptr inbounds [2 x i64], ptr %_5, i64 0, i64 1
  store i64 -8075721443188218256, ptr %12, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %1, ptr align 8 %_1, i64 96, i1 false)
  %13 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_5, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %13, i64 72, i1 false)
; call repro::fn6
  call void @_ZN5repro3fn617ha4f26e5581c766d6E(ptr sret([3 x i64]) align 8 %_6, ptr align 8 %1, ptr align 8 %0)
  %14 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_5, i32 0, i32 2
  %15 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 2
  %16 = load ptr, ptr %15, align 8, !noundef !5
  store ptr %16, ptr %14, align 8
  %17 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_5, i32 0, i32 1
  %18 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %17, i32 0, i32 1
  %19 = load i16, ptr %18, align 8, !noundef !5
  %_7 = xor i16 %19, -1
  %20 = getelementptr inbounds %"Adt58::Variant1", ptr %_2, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %20, ptr align 8 %_5, i64 96, i1 false)
  store i8 1, ptr %_2, align 8
  %21 = getelementptr inbounds %"Adt58::Variant2", ptr %_2, i32 0, i32 5
  %22 = getelementptr inbounds [2 x i64], ptr %21, i64 0, i64 0
  store i64 -8075721443188218256, ptr %22, align 8
  %23 = getelementptr inbounds [2 x i64], ptr %21, i64 0, i64 1
  store i64 -8075721443188218256, ptr %23, align 8
  %24 = getelementptr inbounds %"Adt58::Variant2", ptr %_2, i32 0, i32 5
  %25 = load <2 x i64>, ptr %24, align 8
  store <2 x i64> %25, ptr %_3, align 8
  %26 = getelementptr inbounds { [2 x i64], i16, [1 x i16], i32 }, ptr %_3, i32 0, i32 1
  store i16 %_7, ptr %26, align 8
  %27 = getelementptr inbounds { [2 x i64], i16, [1 x i16], i32 }, ptr %_3, i32 0, i32 3
  store i32 396128, ptr %27, align 4
; call repro::black_box
  call void @_ZN5repro9black_box17h58109cf990c39389E(ptr align 8 %_3)
  ret void
}

; repro::fn6
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro3fn617ha4f26e5581c766d6E(ptr sret([3 x i64]) align 8 %_0, ptr align 8 %_1, ptr align 8 %_2) unnamed_addr #2 {
start:
  %0 = alloca { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, align 8
  %1 = alloca { i16, [3 x i16], [7 x i64] }, align 8
  %2 = alloca { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, align 8
  %3 = alloca { i16, [3 x i16], [7 x i64] }, align 8
  %_5 = alloca { i16, [3 x i16], [7 x i64] }, align 8
  %_3 = alloca %Adt65, align 8
  %4 = getelementptr inbounds [3 x i64], ptr %_0, i64 0, i64 0
  store i64 -7316204373706580326, ptr %4, align 8
  %5 = getelementptr inbounds [3 x i64], ptr %_0, i64 0, i64 1
  store i64 7601018311436077242, ptr %5, align 8
  %6 = getelementptr inbounds [3 x i64], ptr %_0, i64 0, i64 2
  store i64 4662124226323273036, ptr %6, align 8
  %7 = getelementptr inbounds %Adt65, ptr %_3, i32 0, i32 1
  %8 = getelementptr inbounds [2 x i64], ptr %7, i64 0, i64 0
  store i64 1, ptr %8, align 8
  %9 = getelementptr inbounds [2 x i64], ptr %7, i64 0, i64 1
  store i64 3, ptr %9, align 8
  %10 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_3, i32 0, i32 1
  %11 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %10, i32 0, i32 1
  %12 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
  %13 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %12, i32 0, i32 1
  %14 = load i16, ptr %13, align 8, !noundef !5
  store i16 %14, ptr %11, align 8
  %15 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %_2, i32 0, i32 1
  %16 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %15, i32 0, i32 2
  %17 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %11, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %17, ptr align 8 %16, i64 56, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_3, ptr align 8 %_1, i64 96, i1 false)
  %18 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_3, i32 0, i32 1
  %19 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
  %20 = load double, ptr %19, align 8, !noundef !5
  store double %20, ptr %18, align 8
  %21 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %_2, i32 0, i32 1
  %22 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %18, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %22, ptr align 8 %21, i64 64, i1 false)
  %23 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
  %24 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %23, i32 0, i32 1
  %25 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_3, i32 0, i32 1
  %26 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %25, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %24, ptr align 8 %26, i64 64, i1 false)
  %27 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %_2, i32 0, i32 1
  %28 = load i16, ptr %27, align 8, !noundef !5
  store i16 %28, ptr %_5, align 8
  %29 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_3, i32 0, i32 1
  %30 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %29, i32 0, i32 1
  %31 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %30, i32 0, i32 2
  %32 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %_5, i32 0, i32 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %32, ptr align 8 %31, i64 56, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %3, ptr align 8 %_5, i64 64, i1 false)
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %2, ptr align 8 %_1, i64 96, i1 false)
  %33 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %_2, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %1, ptr align 8 %33, i64 64, i1 false)
  %34 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
  %35 = load double, ptr %34, align 8, !noundef !5
  %36 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %_5, i32 0, i32 2
  %37 = load double, ptr %_2, align 8, !noundef !5
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %0, ptr align 8 %_1, i64 96, i1 false)
  %38 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_1, i32 0, i32 1
; call repro::fn13
  %_4 = call i8 @_ZN5repro4fn1317h60c8754b5808aaf4E(ptr align 8 %3, ptr align 8 %_3, ptr align 8 %2, ptr align 8 %1, double %35, ptr align 8 %36, double %37, ptr align 8 %0, ptr align 8 %38)
  ret void
}

; repro::black_box2
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro10black_box217he05ac446572f7e7dE(ptr align 8 %v) unnamed_addr #2 {
start:
  %_2 = alloca [7 x i64], align 8
; call core::hint::black_box
  call void @_ZN4core4hint9black_box17h43a8b18395b882e6E(ptr sret([7 x i64]) align 8 %_2, ptr align 8 %v)
  ret void
}

; repro::fn13
; Function Attrs: nonlazybind uwtable
define internal i8 @_ZN5repro4fn1317h60c8754b5808aaf4E(ptr align 8 %_1, ptr align 8 %_2, ptr align 8 %_3, ptr align 8 %_4, double %_5, ptr align 8 %_6, double %_7, ptr align 8 %_8, ptr align 8 %_9) unnamed_addr #2 {
start:
  %0 = getelementptr inbounds { [2 x i64], { double, { i16, [3 x i16], [7 x i64] } }, ptr }, ptr %_3, i32 0, i32 1
  %1 = getelementptr inbounds { double, { i16, [3 x i16], [7 x i64] } }, ptr %0, i32 0, i32 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %_4, ptr align 8 %1, i64 64, i1 false)
  %2 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %_4, i32 0, i32 2
  %3 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 0
  store i64 9223372036854775807, ptr %3, align 8
  %4 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 1
  store i64 -9223372036854775808, ptr %4, align 8
  %5 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 2
  store i64 -9223372036854775808, ptr %5, align 8
  %6 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 3
  store i64 -9223372036854775808, ptr %6, align 8
  %7 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 4
  store i64 79, ptr %7, align 8
  %8 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 5
  store i64 -75, ptr %8, align 8
  %9 = getelementptr inbounds [7 x i64], ptr %2, i64 0, i64 6
  store i64 -9223372036854775808, ptr %9, align 8
  %10 = getelementptr inbounds { i16, [3 x i16], [7 x i64] }, ptr %_1, i32 0, i32 2
; call repro::black_box2
  call void @_ZN5repro10black_box217he05ac446572f7e7dE(ptr align 8 %10)
; call repro::dump_var
  call void @_ZN5repro8dump_var17h5f90de920b6ee1d3E(ptr align 8 %_4)
  ret i8 -126
}

; repro::main
; Function Attrs: nonlazybind uwtable
define internal void @_ZN5repro4main17h9b5e493692628116E() unnamed_addr #2 {
start:
; call repro::fn4
  call void @_ZN5repro3fn417hbcad5aa86ffc8eb2E()
  ret void
}

; Function Attrs: nonlazybind uwtable
define i32 @rust_eh_personality(i32, i32, i64, ptr, ptr) unnamed_addr #2 {
start:
  ret i32 0
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3

; Function Attrs: nonlazybind uwtable
declare i32 @printf(ptr, ...) unnamed_addr #2

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4

; Function Attrs: nonlazybind
define i32 @main(i32 %0, ptr %1) unnamed_addr #5 {
start:
  call void @_ZN5repro3fn417hbcad5aa86ffc8eb2E()
  ret i32 0
}

attributes #0 = { noinline nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { inlinehint nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { nonlazybind "target-cpu"="x86-64" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 7, !"PIE Level", i32 2}
!2 = !{i32 2, !"RtLibUseGOT", i32 1}
!3 = !{!"rustc version 1.76.0-dev"}
!4 = !{i32 1623106}
!5 = !{}

clang -O1 prints 0, clang -O2 prints a random number https://godbolt.org/z/jq5sfYGnb (on trunk 7003e25)

Bisects down to InstCombinePass

BISECT: running pass (456) CorrelatedValuePropagationPass on _ZN5repro3fn517h5d2910b0843c33f6E
BISECT: running pass (457) SimplifyCFGPass on _ZN5repro3fn517h5d2910b0843c33f6E
BISECT: NOT running pass (458) InstCombinePass on _ZN5repro3fn517h5d2910b0843c33f6E
BISECT: NOT running pass (459) AggressiveInstCombinePass on _ZN5repro3fn517h5d2910b0843c33f6E
BISECT: NOT running pass (460) LibCallsShrinkWrapPass on _ZN5repro3fn517h5d2910b0843c33f6E
From Rust
#![feature(custom_mir, core_intrinsics)]
#![allow(unused_parens, unused_assignments, overflowing_literals)]
extern crate core;
use core::intrinsics::mir::*;

#[cfg(not(miri))]
fn dump_var(x: (i16, [isize; 7])) {
  extern "C" {
      fn printf(fmt: *const core::ffi::c_char, ...) -> core::ffi::c_int;
  }

  unsafe { printf(b"%d\n\0".as_ptr().cast(), x.0 as core::ffi::c_int); }
}

#[cfg(miri)]
fn dump_var(x: (i16, [isize; 7])) {
    println!("{}", x.0);
}

fn black_box(v: ([u64; 2], i16, char)) {
    core::hint::black_box(v);
}

fn fn4() {
    let mut i = 0_u128;
    let p = core::ptr::addr_of_mut!(i);
    fn5(([0; 2], (0., (0, [0; 7])), p));
}
#[custom_mir(dialect = "runtime", phase = "initial")]
fn fn5(mut _2: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128)) -> [u64; 2] {
    mir! {
    let _3: u64;
    let _6: Adt65;
    let _9: ([u64; 3],);
    let _11: char;
    let _14: ([u64; 2], i16, char);
    let _15: Adt58;
    let _19: ([u64; 2], i16, char);
    let _34: ();
    {
    RET = [14745978299192773113_u64,14803853939571786517_u64];
    _3 = 10371022630521333360_u64;
    _6.fld1.1.1 = (0, _2.1.1.1);
    _6.fld1.1.0 = 0.;
    _6.fld1.0 = [_3,_3];
    Call(_9.0 = fn6(_2, _6.fld1.1), bb4, UnwindUnreachable())
    }
    bb4 = {
    _6.fld1.2 = _2.2;
    _11 = '\u{60b60}';
    _14.1 = !_6.fld1.1.1.0;
    _15 = Adt58::Variant1 { fld0: _6.fld1 };
    place!(Field::<[u64; 2]>(Variant(_15, 2), 1)) = [_3,_3];
    _19 = (Field::<[u64; 2]>(Variant(_15, 2), 1), _14.1, _11);
    Call(_34 = black_box(Move(_19)), bb16, UnwindUnreachable())
    }
    bb16 = {
    Return()
    }
    }
}
#[custom_mir(dialect = "runtime", phase = "initial")]
pub fn fn6(
    mut _1: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128),
    mut _2: (f64, (i16, [isize; 7])),
) -> [u64; 3] {
    mir! {
    let _3: Adt65;
    let _8: i8;
    let _13: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128);
    {
    RET = [11130539700002971290_u64,7601018311436077242_u64,4662124226323273036_u64];
    _3.fld0 = [1_usize,3_usize];
    _3.fld1.1.1 = (_1.1.1.0, _2.1.1);
    _3.fld1 = _1;
    _3.fld1.1 = (_1.1.0, _2.1);
    _1.1.1 = _3.fld1.1.1;
    _13.1.1 = (_2.1.0, _3.fld1.1.1.1);
    Call(_8 = fn13(_13.1.1, Move(_3), _1, _2.1, _1.1.0, _13.1.1.1, _2.0,  _1, _1.1), bb6, UnwindUnreachable())
    }
    bb6 = {
    Return()
    }

    }
}

fn black_box2(v: [isize; 7]) {
    core::hint::black_box(v);
}
#[custom_mir(dialect = "runtime", phase = "initial")]
pub fn fn13(
    mut _2: (i16, [isize; 7]),
    mut _6: Adt65,
    mut _7: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128),
    mut _8: (i16, [isize; 7]),
    mut _9: f64,
    mut _10: [isize; 7],
    mut _11: f64,
    mut _13: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128),
    mut _14: (f64, (i16, [isize; 7])),
) -> i8 {
    mir! {
    let _41: ();
    {
    RET = 5275127988543426690_u64 as i8;
    _8 = _7.1.1;
    _8.1 = [9223372036854775807_isize,(-9223372036854775808_isize),(-9223372036854775808_isize),(-9223372036854775808_isize),79_isize,(-75_isize),(-9223372036854775808_isize)];
    Goto(bb5)
    }
    bb5 = {
    Call(_41 = black_box2(Move(_2.1)), bb6, UnwindUnreachable())
    }
    bb6 = {
    Call(_41 = dump_var(Move(_8)), bb8, UnwindUnreachable())
    }
    bb8 = {
    Return()
    }

    }
}
pub fn main() {
    fn4();
}
#[derive(Debug, Copy, Clone)]
pub enum Adt58 {
    Variant0 {},
    Variant1 {
        fld0: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128),
    },
    Variant2 {
        fld0: [u16; 8],
        fld1: [u64; 2],
        fld2: [i8; 8],
    },
    Variant3 {},
}
#[derive(Debug)]
pub struct Adt65 {
    fld0: [usize; 2],
    fld1: ([u64; 2], (f64, (i16, [isize; 7])), *mut u128),
}
@antoniofrighetto
Copy link
Contributor

I reduced this to two related miscompilation issues, but happening in different parts in SimplifyDemandedVectorElts.
1:

define <4 x i16> @src(ptr align 8 %_0) {
start:
  %_5.sroa.5.32.copyload = load <3 x i16>, ptr %_0, align 8
  %_5.sroa.5.32.vecblend = shufflevector <3 x i16> <i16 0, i16 undef, i16 undef>, <3 x i16> %_5.sroa.5.32.copyload, <4 x i32> <i32 0, i32 1, i32 3, i32 5>
  ret <4 x i16> %_5.sroa.5.32.vecblend
}

Alive2: https://alive2.llvm.org/ce/z/3AMDi4

2:

define <8 x i16> @src(ptr align 8 %_0, ptr align 8 %_1) unnamed_addr #0 {
start:
  %_5.sroa.5.32.copyload = load <8 x i16>, ptr %_1, align 8
  %_5.sroa.5.32.vecblend = shufflevector <8 x i16> %_5.sroa.5.32.copyload, <8 x i16> <i16 poison, i16 undef, i16 undef, i16 undef, i16 poison, i16 poison, i16 poison, i16 poison>, <8 x i32> <i32 poison, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %_5.sroa.5.32.vecblend
}

Alive2: https://alive2.llvm.org/ce/z/Ha-Sga

There seems to be no out-of-bound access in both shuffle vector constant, I don't think we are free to miscompile here (which I think it would be the case if we were to access it out-of-bound), @nikic could you kindly confirm this too?

@nikic
Copy link
Contributor

nikic commented Dec 16, 2023

I don't really understand your question, but shufflevector semantics have been changed to use poison rather than undef, and we haven't really propagated this change to transforms yet. The UndefElts in SimplifyDemandedVectorElts need to be switched to PoisonElts.

@antoniofrighetto
Copy link
Contributor

@nikic, sorry, my doubt was related to what extent we may allow miscompilation (if any?), if the concatenated shufflevector was being accessed out-of-bound (via OOB index in the mask). The UndefElts in SimplifyDemandedVectorElts need indeed to be switched to PoisonElts, but SimplifyDemandedVectorElts already creates a poison when encountering undef element, if I'm not wrong. I think the issue here may stem from being too aggressive in how move from undef to poison. Conservatively not setting UndefElts when handling undef demanded elements (in order to prevent them from being poisoned later):

@@ -1378,8 +1378,10 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       if (!Elt) return nullptr;
 
       Elts.push_back(Elt);
+#if 0
       if (isa<UndefValue>(Elt))   // Already undef or poison.
         UndefElts.setBit(i);
+#endif
     }

Seems to solve 1) and fixes https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/InstCombine/insert-const-shuf.ll#L95-L104 test (which is currently wrong, https://alive2.llvm.org/ce/z/5TUcHp).

@antoniofrighetto
Copy link
Contributor

Thanks for reporting. Closing this as it has been fixed on latest clang via @nikic shufflevector semantics rework and 151ddf0.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

4 participants