Skip to content

Commit

Permalink
[AArch64][GlobalISel] Reorder stack up-adjustment and register copies
Browse files Browse the repository at this point in the history
This change reorders the stack up-adjustment and return value copying phases of
machine-ir generation on Aarch64. Doing so prevents a bug observed for fastcc
calls with >8 arguments, where the up-adjustment required from making that call
is placed in the wrong place relative to spill and reloading code.

See: #60972 for full issue
reproduction and context.

Patch contributed by Bruce Collie

Differential Revision: https://reviews.llvm.org/D144791
  • Loading branch information
aemerson committed Feb 27, 2023
1 parent 06daa51 commit 31d6a57
Show file tree
Hide file tree
Showing 24 changed files with 166 additions and 117 deletions.
22 changes: 11 additions & 11 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Expand Up @@ -1312,6 +1312,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);

uint64_t CalleePopBytes =
doesCalleeRestoreStack(Info.CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt)
? alignTo(Assigner.StackOffset, 16)
: 0;

CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
.addImm(Assigner.StackOffset)
.addImm(CalleePopBytes);

// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
// constraint of that instruction.
Expand Down Expand Up @@ -1344,17 +1355,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}

uint64_t CalleePopBytes =
doesCalleeRestoreStack(Info.CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt)
? alignTo(Assigner.StackOffset, 16)
: 0;

CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
.addImm(Assigner.StackOffset)
.addImm(CalleePopBytes);

if (!Info.CanLowerReturn) {
insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
Info.DemoteRegister, Info.DemoteStackIndex);
Expand Down
Expand Up @@ -90,8 +90,8 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: $x6 = COPY [[C6]](s64)
; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: BL @i8i16callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
Expand Up @@ -219,8 +219,8 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: $x6 = COPY [[C6]](s64)
; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand Down
Expand Up @@ -802,17 +802,17 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) {
; CHECK-NEXT: $x0 = COPY [[COPY]](p0)
; CHECK-NEXT: $x1 = COPY [[LOAD]](p0)
; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: G_BR %bb.59
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.57.bb62:
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $x0 = COPY [[COPY]](p0)
; CHECK-NEXT: $x1 = COPY [[COPY2]](p0)
; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: G_BR %bb.59
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.58.bb64:
Expand All @@ -825,8 +825,8 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) {
; CHECK-NEXT: $x0 = COPY [[COPY]](p0)
; CHECK-NEXT: $x1 = COPY [[FRAME_INDEX]](p0)
; CHECK-NEXT: BL @wibble, csr_aarch64_aapcs_thisreturn, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; CHECK-NEXT: G_BR %bb.59
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.59.bb68:
Expand Down Expand Up @@ -1414,8 +1414,8 @@ define ptr @test_range_phi_switch_cycle() {
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $w0 = COPY [[PHI]](s32)
; CHECK-NEXT: BL @ham, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $x0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
bb:
br label %bb1

Expand Down
Expand Up @@ -147,8 +147,8 @@ define i32 @caller_signext_i1() {
; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s8)
; CHECK-NEXT: $w0 = COPY [[SEXT1]](s32)
; CHECK-NEXT: BL @callee_signext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = call i32 @callee_signext_i1(i1 signext true)
Expand Down
Expand Up @@ -56,11 +56,11 @@ define void @test_return_v3f32() {
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $s0 = COPY [[DEF]](s32)
; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
%call = call <3 x float> @bar(float undef)
ret void
Expand Down
Expand Up @@ -141,8 +141,8 @@ define i32 @caller_zeroext_i1() {
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s8)
; CHECK-NEXT: $w0 = COPY [[ZEXT1]](s32)
; CHECK-NEXT: BL @callee_zeroext_i1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%r = call i32 @callee_zeroext_i1(i1 zeroext true)
Expand Down
Expand Up @@ -120,8 +120,8 @@ define i32 @test_too_big_stack() {
; DARWIN-NEXT: $x6 = COPY [[DEF]](s64)
; DARWIN-NEXT: $x7 = COPY [[DEF]](s64)
; DARWIN-NEXT: BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; DARWIN-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; DARWIN-NEXT: $w0 = COPY [[COPY1]](s32)
; DARWIN-NEXT: RET_ReallyLR implicit $w0
; WINDOWS-LABEL: name: test_too_big_stack
Expand All @@ -146,8 +146,8 @@ define i32 @test_too_big_stack() {
; WINDOWS-NEXT: $x6 = COPY [[DEF]](s64)
; WINDOWS-NEXT: $x7 = COPY [[DEF]](s64)
; WINDOWS-NEXT: BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; WINDOWS-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; WINDOWS-NEXT: $w0 = COPY [[COPY1]](s32)
; WINDOWS-NEXT: RET_ReallyLR implicit $w0
entry:
Expand Down Expand Up @@ -420,8 +420,8 @@ define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) {
; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20
; DARWIN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; DARWIN-NEXT: BL @pluto, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
; DARWIN-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
; DARWIN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; DARWIN-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
; DARWIN-NEXT: $x20 = COPY [[COPY]](p0)
; DARWIN-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x20
; WINDOWS-LABEL: name: swiftself_indirect_tail
Expand All @@ -431,8 +431,8 @@ define hidden swiftcc i64 @swiftself_indirect_tail(ptr swiftself %arg) {
; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x20
; WINDOWS-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; WINDOWS-NEXT: BL @pluto, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $x0
; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
; WINDOWS-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:tcgpr64(p0) = COPY $x0
; WINDOWS-NEXT: $x20 = COPY [[COPY]](p0)
; WINDOWS-NEXT: TCRETURNri [[COPY1]](p0), 0, csr_aarch64_aapcs, implicit $sp, implicit $x20
%tmp = call ptr @pluto()
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll
Expand Up @@ -372,10 +372,10 @@ define i32 @test_zext_return_from_callee() {
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand All @@ -391,10 +391,10 @@ define i32 @test_zext_return_from_callee2() {
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: BL @has_zext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand All @@ -411,10 +411,10 @@ define i32 @test_sext_return_from_callee() {
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand All @@ -430,10 +430,10 @@ define i32 @test_sext_return_from_callee2() {
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: BL @has_sext_return, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
Expand Down
Expand Up @@ -20,8 +20,8 @@ define { ptr, i32 } @bar() personality ptr @__gxx_personality_v0 {
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit-def $w0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: EH_LABEL <mcsymbol >
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-cos.mir
Expand Up @@ -19,32 +19,32 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
; CHECK-NEXT: BL &cosf
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
; CHECK-NEXT: BL &cosf
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
; CHECK-NEXT: BL &cosf
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
; CHECK-NEXT: BL &cosf
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
Expand Down Expand Up @@ -217,8 +217,8 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
; CHECK-NEXT: BL &cosf
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32)
%0:_(s16) = COPY $h0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-exp.mir
Expand Up @@ -19,32 +19,32 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
; CHECK-NEXT: BL &expf
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
; CHECK-NEXT: BL &expf
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
; CHECK-NEXT: BL &expf
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
; CHECK-NEXT: BL &expf
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
Expand Down Expand Up @@ -217,8 +217,8 @@ body: |
; CHECK-NEXT: ADJCALLSTACKDOWN
; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
; CHECK-NEXT: BL &expf
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: ADJCALLSTACKUP
; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[RES:%[0-9]+]]:_(s16) = G_FPTRUNC [[REG2]](s32)
%0:_(s16) = COPY $h0
Expand Down

0 comments on commit 31d6a57

Please sign in to comment.