Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM] Deliberately prevent inline asm in low overhead loops. NFC
This was already something that was handled by one of the "else" branches in maybeLoweredToCall, so this patch is an NFC but makes it explicit and adds a test. We may in the future want to support this under certain situations but for the moment just don't try and create low overhead loops with inline asm in them. Differential Revision: https://reviews.llvm.org/D91257
- Loading branch information
1 parent
1407833
commit 006b3bd
Showing
2 changed files
with
98 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s | ||
|
||
define i32 @test(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) { | ||
; CHECK-LABEL: test: | ||
; CHECK: @ %bb.0: @ %entry | ||
; CHECK-NEXT: .save {r7, lr} | ||
; CHECK-NEXT: push {r7, lr} | ||
; CHECK-NEXT: cmp r2, #1 | ||
; CHECK-NEXT: blt .LBB0_4 | ||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader | ||
; CHECK-NEXT: mov lr, r0 | ||
; CHECK-NEXT: movs r0, #0 | ||
; CHECK-NEXT: .LBB0_2: @ %for.body | ||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: ldrh r3, [r1], #2 | ||
; CHECK-NEXT: subs r2, #1 | ||
; CHECK-NEXT: ldrh r12, [lr], #2 | ||
; CHECK-NEXT: @APP | ||
; CHECK-NEXT: add r3, r12 | ||
; CHECK-NEXT: @NO_APP | ||
; CHECK-NEXT: add r0, r3 | ||
; CHECK-NEXT: bne .LBB0_2 | ||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup | ||
; CHECK-NEXT: pop {r7, pc} | ||
; CHECK-NEXT: .LBB0_4: | ||
; CHECK-NEXT: movs r0, #0 | ||
; CHECK-NEXT: pop {r7, pc} | ||
entry: | ||
%cmp9 = icmp sgt i32 %n, 0 | ||
br i1 %cmp9, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: ; preds = %for.body, %entry | ||
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] | ||
ret i32 %s.0.lcssa | ||
|
||
for.body: ; preds = %entry, %for.body | ||
%s.011 = phi i32 [ %add, %for.body ], [ 0, %entry ] | ||
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] | ||
%arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010 | ||
%0 = load i16, i16* %arrayidx, align 2 | ||
%arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.010 | ||
%1 = load i16, i16* %arrayidx1, align 2 | ||
%2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i16 %0, i16 %1) #1 | ||
%add = add nsw i32 %2, %s.011 | ||
%inc = add nuw nsw i32 %i.010, 1 | ||
%exitcond.not = icmp eq i32 %inc, %n | ||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | ||
} | ||
|
||
define i32 @testlr(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) { | ||
; CHECK-LABEL: testlr: | ||
; CHECK: @ %bb.0: @ %entry | ||
; CHECK-NEXT: .save {r4, lr} | ||
; CHECK-NEXT: push {r4, lr} | ||
; CHECK-NEXT: cmp r2, #1 | ||
; CHECK-NEXT: blt .LBB1_4 | ||
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader | ||
; CHECK-NEXT: mov r3, r0 | ||
; CHECK-NEXT: movs r0, #0 | ||
; CHECK-NEXT: .LBB1_2: @ %for.body | ||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 | ||
; CHECK-NEXT: ldrh r4, [r1], #2 | ||
; CHECK-NEXT: subs r2, #1 | ||
; CHECK-NEXT: ldrh r12, [r3], #2 | ||
; CHECK-NEXT: @APP | ||
; CHECK-NEXT: add r4, r12 | ||
; CHECK-NEXT: @NO_APP | ||
; CHECK-NEXT: add r0, r4 | ||
; CHECK-NEXT: bne .LBB1_2 | ||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup | ||
; CHECK-NEXT: pop {r4, pc} | ||
; CHECK-NEXT: .LBB1_4: | ||
; CHECK-NEXT: movs r0, #0 | ||
; CHECK-NEXT: pop {r4, pc} | ||
entry: | ||
%cmp9 = icmp sgt i32 %n, 0 | ||
br i1 %cmp9, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: ; preds = %for.body, %entry | ||
%s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] | ||
ret i32 %s.0.lcssa | ||
|
||
for.body: ; preds = %entry, %for.body | ||
%s.011 = phi i32 [ %add, %for.body ], [ 0, %entry ] | ||
%i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] | ||
%arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010 | ||
%0 = load i16, i16* %arrayidx, align 2 | ||
%arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.010 | ||
%1 = load i16, i16* %arrayidx1, align 2 | ||
%2 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{lr}"(i16 %0, i16 %1) #1 | ||
%add = add nsw i32 %2, %s.011 | ||
%inc = add nuw nsw i32 %i.010, 1 | ||
%exitcond.not = icmp eq i32 %inc, %n | ||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | ||
} |