diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fe06bb8b3e5f0e..7e84dba6a2b643 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31684,17 +31684,28 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, testMBB->addSuccessor(blockMBB); testMBB->addSuccessor(tailMBB); - // allocate a block and touch it + // Touch the block then extend it. This is done on the opposite side of + // static probe where we allocate then touch, to avoid the need of probing the + // tail of the static alloca. Possible scenarios are: + // + // + ---- <- ------------ <- ------------- <- ------------ + + // | | + // [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] -> [dyn probe] -> [tail alloc] + + // | | + // + <- ----------- <- ------------ <- ----------- <- ------------ + + // + // The property we want to enforce is to never have more than [page alloc] between two probes. + + const unsigned MovMIOpc = + TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi; + addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) + .addImm(0); BuildMI(blockMBB, DL, TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) .addReg(physSPReg) .addImm(ProbeSize); - const unsigned MovMIOpc = - TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi; - addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) - .addImm(0); BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); blockMBB->addSuccessor(testMBB); diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll index 140da42fc6fbbc..928b7a9cba024d 100644 --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -26,8 +26,8 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-64-NEXT: cmpq %rax, %rsp ; CHECK-X86-64-NEXT: jl .LBB0_3 ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: cmpq %rax, %rsp ; CHECK-X86-64-NEXT: jge .LBB0_2 ; CHECK-X86-64-NEXT: .LBB0_3: @@ -56,8 +56,8 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-32-NEXT: cmpl %eax, %esp ; CHECK-X86-32-NEXT: jl .LBB0_3 ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: cmpl %eax, %esp ; CHECK-X86-32-NEXT: jge .LBB0_2 ; CHECK-X86-32-NEXT: .LBB0_3: