Skip to content

Commit

Permalink
[MC] Omit fill value if it's zero when emitting code alignment
Browse files Browse the repository at this point in the history
Previously, we were generating zeroes when generating code alignments for AArch64, but now we should omit the value and let the assembler choose to generate nops or zeroes.

Reviewed By: efriedma, MaskRay

Differential Revision: https://reviews.llvm.org/D132508
  • Loading branch information
steplong committed Aug 25, 2022
1 parent ec495b5 commit 525af9f
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 61 deletions.
38 changes: 29 additions & 9 deletions llvm/lib/MC/MCAsmStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ class MCAsmStreamer final : public MCStreamer {
void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
SMLoc Loc = SMLoc()) override;

void emitAlignmentDirective(unsigned ByteAlignment, Optional<int64_t> Value,
unsigned ValueSize, unsigned MaxBytesToEmit);

void emitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) override;
Expand Down Expand Up @@ -1394,9 +1397,10 @@ void MCAsmStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
EmitEOL();
}

void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
void MCAsmStreamer::emitAlignmentDirective(unsigned ByteAlignment,
Optional<int64_t> Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
if (MAI->useDotAlignForAlignment()) {
if (!isPowerOf2_32(ByteAlignment))
report_fatal_error("Only power-of-two alignments are supported "
Expand Down Expand Up @@ -1428,9 +1432,13 @@ void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,

OS << Log2_32(ByteAlignment);

if (Value || MaxBytesToEmit) {
OS << ", 0x";
OS.write_hex(truncateToSize(Value, ValueSize));
if (Value.has_value() || MaxBytesToEmit) {
if (Value.has_value()) {
OS << ", 0x";
OS.write_hex(truncateToSize(Value.value(), ValueSize));
} else {
OS << ", ";
}

if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
Expand All @@ -1450,18 +1458,30 @@ void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
}

OS << ' ' << ByteAlignment;
OS << ", " << truncateToSize(Value, ValueSize);
if (Value.has_value())
OS << ", " << truncateToSize(Value.value(), ValueSize);
else if (MaxBytesToEmit)
OS << ", ";
if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
EmitEOL();
}

void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
emitAlignmentDirective(ByteAlignment, Value, ValueSize, MaxBytesToEmit);
}

void MCAsmStreamer::emitCodeAlignment(unsigned ByteAlignment,
const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {
// Emit with a text fill value.
emitValueToAlignment(ByteAlignment, MAI->getTextAlignFillValue(),
1, MaxBytesToEmit);
if (MAI->getTextAlignFillValue())
emitAlignmentDirective(ByteAlignment, MAI->getTextAlignFillValue(), 1,
MaxBytesToEmit);
else
emitAlignmentDirective(ByteAlignment, None, 1, MaxBytesToEmit);
}

void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@

define i32 @a(i32 %x, i32* nocapture readonly %y, i32* nocapture readonly %z) {
; CHECK-DEFAULT: .p2align 5
; CHECK-8: .p2align 4, 0x0, 8
; CHECK-16: .p2align 5, 0x0, 16
; CHECK-8: .p2align 4, , 8
; CHECK-16: .p2align 5, , 16
; CHECK-NEXT: .LBB0_5: // %vector.body
; CHECK-DEFAULT: .p2align 5
; CHECK-8: .p2align 4, 0x0, 8
; CHECK-16: .p2align 5, 0x0, 16
; CHECK-8: .p2align 4, , 8
; CHECK-16: .p2align 5, , 16
; CHECK-NEXT: .LBB0_8: // %for.body
entry:
%cmp10 = icmp sgt i32 %x, 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
; should be the same, at different locations)
define i32 @a(i32 %x, i32* nocapture readonly %y, i32* nocapture readonly %z) {
; CHECK-LABEL: a:
; CHECK-EXPLICIT: .p2align 5, 0x0, 8
; CHECK-EXPLICIT: .p2align 5, , 8
; CHECK-IMPLICIT: .p2align 5
; CHECK-NEXT: .LBB0_5: // %vector.body
; CHECK-EXPLICIT: .p2align 5, 0x0, 8
; CHECK-EXPLICIT: .p2align 5, , 8
; CHECK-IMPLICIT: .p2align 5
; CHECK-NEXT: .LBB0_8: // %for.body
; CHECK-OBJ;Disassembly of section .text:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/aarch64-sched-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ define dso_local void @memset_unroll2(double* nocapture %array, i64 %size) {
; DEFAULT: // %bb.0: // %entry
; DEFAULT-NEXT: fmov v0.2d, #2.00000000
; DEFAULT-NEXT: add x8, x0, #64
; DEFAULT-NEXT: .p2align 4, 0x0, 8
; DEFAULT-NEXT: .p2align 4, , 8
; DEFAULT-NEXT: .LBB0_1: // %vector.body
; DEFAULT-NEXT: // =>This Inner Loop Header: Depth=1
; DEFAULT-NEXT: stur q0, [x8, #-64]
Expand All @@ -30,7 +30,7 @@ define dso_local void @memset_unroll2(double* nocapture %array, i64 %size) {
; ASCEND: // %bb.0: // %entry
; ASCEND-NEXT: fmov v0.2d, #2.00000000
; ASCEND-NEXT: add x8, x0, #64
; ASCEND-NEXT: .p2align 4, 0x0, 8
; ASCEND-NEXT: .p2align 4, , 8
; ASCEND-NEXT: .LBB0_1: // %vector.body
; ASCEND-NEXT: // =>This Inner Loop Header: Depth=1
; ASCEND-NEXT: stur q0, [x8, #-64]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios14.0.0"

define [1 x <4 x float>] @test1() {
; CHECK-LABEL: .p2align 4 ; -- Begin function test1
; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test1
; CHECK-NEXT: lCPI0_0:
; CHECK-NEXT: .quad 0 ; 0x0
; CHECK-NEXT: .quad 4575657221408423936 ; 0x3f80000000000000
Expand All @@ -28,7 +28,7 @@ define [1 x <4 x float>] @test1() {
}

define [1 x <4 x float>] @test2() {
; CHECK-LABEL: .p2align 4 ; -- Begin function test2
; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test2
; CHECK-NEXT: lCPI1_0:
; CHECK-NEXT: .long 0x00000000 ; float 0
; CHECK-NEXT: .long 0x00000000 ; float 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/merge-store-dependency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
; A53-NEXT: // %bb.1:
; A53-NEXT: ldr w8, [x19]
; A53-NEXT: ldr w9, [x9]
; A53-NEXT: .p2align 4, 0x0, 8
; A53-NEXT: .p2align 4, , 8
; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us
; A53-NEXT: // =>This Inner Loop Header: Depth=1
; A53-NEXT: lsl w9, w9, #1
Expand All @@ -51,7 +51,7 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
; A53-NEXT: .cfi_restore w19
; A53-NEXT: .cfi_restore w30
; A53-NEXT: ret
; A53-NEXT: .p2align 4, 0x0, 8
; A53-NEXT: .p2align 4, , 8
; A53-NEXT: .LBB0_4: // %while.body.i.split
; A53-NEXT: // =>This Inner Loop Header: Depth=1
; A53-NEXT: .cfi_restore_state
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AArch64/p2align-zero-fillvalue.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s

%struct.A = type { i8, i32 }
@foo = dso_local global %struct.A zeroinitializer, align 4

; CHECK: .bss
; CHECK-NEXT: .globl foo
; CHECK-NEXT: .p2align 2, 0x0{{$}}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/ARM/byval_load_align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; CHECK: ldr r2, [r[[REG]], #4]
; CHECK: ldr r3, [r[[REG]], #8]
; CHECK-NOT: ldm
; CHECK: .p2align 1 @ @sID
; CHECK: .p2align 1, 0x0 @ @sID

%struct.ModuleID = type { [32 x i8], [32 x i8], i16 }

Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/WebAssembly/global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
}

; CHECK: .type .Lg,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: .Lg:
; CHECK-NEXT: .int32 1337{{$}}
; CHECK-NEXT: .size .Lg, 4{{$}}
Expand All @@ -41,140 +41,140 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
@ud = internal global i32 undef

; CHECK: .type nil,@object
; CHECK: .p2align 2
; CHECK: .p2align 2, 0x0
; CHECK: nil:
; CHECK: .int32 0
; CHECK: .size nil, 4
@nil = internal global i32 zeroinitializer

; CHECK: .type z,@object
; CHECK: .p2align 2
; CHECK: .p2align 2, 0x0
; CHECK: z:
; CHECK: .int32 0
; CHECK: .size z, 4
@z = internal global i32 0

; CHECK: .type one,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: one:
; CHECK-NEXT: .int32 1{{$}}
; CHECK-NEXT: .size one, 4{{$}}
@one = internal global i32 1

; CHECK: .type answer,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: answer:
; CHECK-NEXT: .int32 42{{$}}
; CHECK-NEXT: .size answer, 4{{$}}
@answer = internal global i32 42

; CHECK: .type u32max,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: u32max:
; CHECK-NEXT: .int32 4294967295{{$}}
; CHECK-NEXT: .size u32max, 4{{$}}
@u32max = internal global i32 -1

; CHECK: .type ud64,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: ud64:
; CHECK-NEXT: .skip 8{{$}}
; CHECK-NEXT: .size ud64, 8{{$}}
@ud64 = internal global i64 undef

; CHECK: .type nil64,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: nil64:
; CHECK-NEXT: .int64 0{{$}}
; CHECK-NEXT: .size nil64, 8{{$}}
@nil64 = internal global i64 zeroinitializer

; CHECK: .type z64,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: z64:
; CHECK-NEXT: .int64 0{{$}}
; CHECK-NEXT: .size z64, 8{{$}}
@z64 = internal global i64 0

; CHECK: .type twoP32,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: twoP32:
; CHECK-NEXT: .int64 4294967296{{$}}
; CHECK-NEXT: .size twoP32, 8{{$}}
@twoP32 = internal global i64 4294967296

; CHECK: .type u64max,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: u64max:
; CHECK-NEXT: .int64 -1{{$}}
; CHECK-NEXT: .size u64max, 8{{$}}
@u64max = internal global i64 -1

; CHECK: .type f32ud,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: f32ud:
; CHECK-NEXT: .skip 4{{$}}
; CHECK-NEXT: .size f32ud, 4{{$}}
@f32ud = internal global float undef

; CHECK: .type f32nil,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: f32nil:
; CHECK-NEXT: .int32 0x00000000{{$}}
; CHECK-NEXT: .size f32nil, 4{{$}}
@f32nil = internal global float zeroinitializer

; CHECK: .type f32z,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: f32z:
; CHECK-NEXT: .int32 0x00000000{{$}}
; CHECK-NEXT: .size f32z, 4{{$}}
@f32z = internal global float 0.0

; CHECK: .type f32nz,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK: f32nz:
; CHECK: .int32 0x80000000{{$}}
; CHECK: .size f32nz, 4{{$}}
@f32nz = internal global float -0.0

; CHECK: .type f32two,@object
; CHECK: .p2align 2{{$}}
; CHECK: .p2align 2, 0x0{{$}}
; CHECK-NEXT: f32two:
; CHECK-NEXT: .int32 0x40000000{{$}}
; CHECK-NEXT: .size f32two, 4{{$}}
@f32two = internal global float 2.0

; CHECK: .type f64ud,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: f64ud:
; CHECK-NEXT: .skip 8{{$}}
; CHECK-NEXT: .size f64ud, 8{{$}}
@f64ud = internal global double undef

; CHECK: .type f64nil,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: f64nil:
; CHECK-NEXT: .int64 0x0000000000000000{{$}}
; CHECK-NEXT: .size f64nil, 8{{$}}
@f64nil = internal global double zeroinitializer

; CHECK: .type f64z,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: f64z:
; CHECK-NEXT: .int64 0x0000000000000000{{$}}
; CHECK-NEXT: .size f64z, 8{{$}}
@f64z = internal global double 0.0

; CHECK: .type f64nz,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: f64nz:
; CHECK-NEXT: .int64 0x8000000000000000{{$}}
; CHECK-NEXT: .size f64nz, 8{{$}}
@f64nz = internal global double -0.0

; CHECK: .type f64two,@object
; CHECK: .p2align 3{{$}}
; CHECK: .p2align 3, 0x0{{$}}
; CHECK-NEXT: f64two:
; CHECK-NEXT: .int64 0x4000000000000000{{$}}
; CHECK-NEXT: .size f64two, 8{{$}}
Expand All @@ -193,7 +193,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .type rom,@object{{$}}
; CHECK: .section .rodata.rom,""
; CHECK: .globl rom{{$}}
; CHECK: .p2align 4{{$}}
; CHECK: .p2align 4, 0x0{{$}}
; CHECK: rom:
; CHECK: .skip 512{{$}}
; CHECK: .size rom, 512{{$}}
Expand All @@ -206,7 +206,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .type pointer_to_array,@object
; CHECK-NEXT: .section .rodata.pointer_to_array,""
; CHECK-NEXT: .globl pointer_to_array
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .p2align 2, 0x0
; CHECK-NEXT: pointer_to_array:
; CHECK-NEXT: .int32 array+4
; CHECK-NEXT: .size pointer_to_array, 4
Expand Down
Loading

0 comments on commit 525af9f

Please sign in to comment.