Skip to content

Commit

Permalink
[ARM] Deliberately prevent inline asm in low overhead loops. NFC
Browse files Browse the repository at this point in the history
This was already something that was handled by one of the "else"
branches in maybeLoweredToCall, so this patch is an NFC but makes it
explicit and adds a test. We may in the future want to support this
under certain situations but for the moment just don't try and create
low overhead loops with inline asm in them.

Differential Revision: https://reviews.llvm.org/D91257
  • Loading branch information
davemgreen committed Nov 19, 2020
1 parent 1407833 commit 006b3bd
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 1 deletion.
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Expand Up @@ -1694,7 +1694,8 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
auto ScanLoop = [&](Loop *L) {
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I)) {
if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
isa<InlineAsm>(I)) {
LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
return false;
}
Expand Down
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll
@@ -0,0 +1,96 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s

define i32 @test(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: blt .LBB0_4
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov lr, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh r3, [r1], #2
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: ldrh r12, [lr], #2
; CHECK-NEXT: @APP
; CHECK-NEXT: add r3, r12
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: bne .LBB0_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.body, %entry
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %s.0.lcssa

for.body: ; preds = %entry, %for.body
%s.011 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010
%0 = load i16, i16* %arrayidx, align 2
%arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.010
%1 = load i16, i16* %arrayidx1, align 2
%2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i16 %0, i16 %1) #1
%add = add nsw i32 %2, %s.011
%inc = add nuw nsw i32 %i.010, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

define i32 @testlr(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) {
; CHECK-LABEL: testlr:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: blt .LBB1_4
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB1_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh r4, [r1], #2
; CHECK-NEXT: subs r2, #1
; CHECK-NEXT: ldrh r12, [r3], #2
; CHECK-NEXT: @APP
; CHECK-NEXT: add r4, r12
; CHECK-NEXT: @NO_APP
; CHECK-NEXT: add r0, r4
; CHECK-NEXT: bne .LBB1_2
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: pop {r4, pc}
entry:
%cmp9 = icmp sgt i32 %n, 0
br i1 %cmp9, label %for.body, label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.body, %entry
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %s.0.lcssa

for.body: ; preds = %entry, %for.body
%s.011 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010
%0 = load i16, i16* %arrayidx, align 2
%arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.010
%1 = load i16, i16* %arrayidx1, align 2
%2 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{lr}"(i16 %0, i16 %1) #1
%add = add nsw i32 %2, %s.011
%inc = add nuw nsw i32 %i.010, 1
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

0 comments on commit 006b3bd

Please sign in to comment.