-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Description
| Bugzilla Link | 37240 |
| Version | trunk |
| OS | All |
| Blocks | #37076 |
| Reporter | LLVM Bugzilla Contributor |
| CC | @Arnaud-de-Grandmaison-ARM,@francisvm,@froydnj,@hiraditya,@jmorse,@OCHyams,@pogo59,@smithp35,@yuanfang-chen |
Extended Description
When -g (or just -gline-tables-only) is passed to clang, CFI_INSTRUCTIONs are added to the function prologue by the Prologue/Epilogue insertion pass. These instructions act as scheduling region barriers when the PostRA scheduler is run. This can lead to different schedules for the prologue block, which can further lead to differences in tail merging/duplication.
A simple example the illustrates the problem:
target triple = "aarch64-linux-gnu"
@X1 = global i32 0, align 4
@X2 = global i32 0, align 4
@X3 = global i32 0, align 4
@X4 = global i32 0, align 4
define void @test(i32 %i) #0 {
entry:
%0 = load i32, i32* @X1, align 4
%x1 = add i32 %0, 1
%x2 = add i32 %0, 2
%x3 = add i32 %0, 3
%x4 = add i32 %0, 4
tail call void @foo()
store i32 %x1, i32* @X1, align 4
store i32 %x2, i32* @X2, align 4
store i32 %x3, i32* @X3, align 4
store i32 %x4, i32* @X4, align 4
ret void
}
declare void @foo()
attributes #0 = { nounwind }
!llvm.dbg.cu = !{#0}
!llvm.module.flags = !{#3, !4, !5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0 (trunk 330790) (llvm/trunk 330787)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
when compiled as is (i.e. with debug info), produces the following code:
stp x23, x22, [sp, #-48]! // 8-byte Folded Spill
stp x21, x20, [sp, #​16] // 8-byte Folded Spill
stp x19, x30, [sp, #​32] // 8-byte Folded Spill
.cfi_def_cfa_offset 48
.cfi_offset w30, -8
.cfi_offset w19, -16
.cfi_offset w20, -24
.cfi_offset w21, -32
.cfi_offset w22, -40
.cfi_offset w23, -48
adrp x19, X1
ldr w8, [x19, :lo12:X1]
add w20, w8, #​1 // =1
add w21, w8, #​2 // =2
add w22, w8, #​3 // =3
add w23, w8, #​4 // =4
bl foo
adrp x8, X2
str w20, [x19, :lo12:X1]
str w21, [x8, :lo12:X2]
ldp x19, x30, [sp, #​32] // 8-byte Folded Reload
ldp x21, x20, [sp, #​16] // 8-byte Folded Reload
adrp x9, X3
adrp x10, X4
str w22, [x9, :lo12:X3]
str w23, [x10, :lo12:X4]
ldp x23, x22, [sp], #​48 // 8-byte Folded Reload
ret
but when the debug metadata is commented out, it produces the following code instead:
stp x23, x22, [sp, #-48]! // 8-byte Folded Spill
stp x19, x30, [sp, #​32] // 8-byte Folded Spill
adrp x19, X1
ldr w8, [x19, :lo12:X1]
stp x21, x20, [sp, #​16] // 8-byte Folded Spill
add w20, w8, #​1 // =1
add w21, w8, #​2 // =2
add w22, w8, #​3 // =3
add w23, w8, #​4 // =4
bl foo
adrp x8, X2
str w20, [x19, :lo12:X1]
str w21, [x8, :lo12:X2]
ldp x19, x30, [sp, #​32] // 8-byte Folded Reload
ldp x21, x20, [sp, #​16] // 8-byte Folded Reload
adrp x9, X3
adrp x10, X4
str w22, [x9, :lo12:X3]
str w23, [x10, :lo12:X4]
ldp x23, x22, [sp], #​48 // 8-byte Folded Reload
ret
Note the different position in the schedule of the first adrp and ldr instructions.