190 changes: 95 additions & 95 deletions llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -261,41 +261,41 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: fold_urem_vec_2:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lhu a7, 12(a1)
; RV32IM-NEXT: lhu a3, 8(a1)
; RV32IM-NEXT: lhu a6, 12(a1)
; RV32IM-NEXT: lhu a7, 8(a1)
; RV32IM-NEXT: lhu a4, 0(a1)
; RV32IM-NEXT: lhu a1, 4(a1)
; RV32IM-NEXT: lui a5, 364242
; RV32IM-NEXT: addi a6, a5, 777
; RV32IM-NEXT: mulhu a5, a4, a6
; RV32IM-NEXT: sub a2, a4, a5
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a2, a2, a5
; RV32IM-NEXT: addi a5, a5, 777
; RV32IM-NEXT: mulhu a2, a4, a5
; RV32IM-NEXT: sub a3, a4, a2
; RV32IM-NEXT: srli a3, a3, 1
; RV32IM-NEXT: add a2, a3, a2
; RV32IM-NEXT: srli a2, a2, 6
; RV32IM-NEXT: addi a5, zero, 95
; RV32IM-NEXT: mul a2, a2, a5
; RV32IM-NEXT: addi a3, zero, 95
; RV32IM-NEXT: mul a2, a2, a3
; RV32IM-NEXT: sub t0, a4, a2
; RV32IM-NEXT: mulhu a4, a1, a6
; RV32IM-NEXT: mulhu a4, a1, a5
; RV32IM-NEXT: sub a2, a1, a4
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a2, a2, a4
; RV32IM-NEXT: srli a2, a2, 6
; RV32IM-NEXT: mul a2, a2, a5
; RV32IM-NEXT: mul a2, a2, a3
; RV32IM-NEXT: sub a1, a1, a2
; RV32IM-NEXT: mulhu a2, a3, a6
; RV32IM-NEXT: sub a4, a3, a2
; RV32IM-NEXT: mulhu a2, a7, a5
; RV32IM-NEXT: sub a4, a7, a2
; RV32IM-NEXT: srli a4, a4, 1
; RV32IM-NEXT: add a2, a4, a2
; RV32IM-NEXT: srli a2, a2, 6
; RV32IM-NEXT: mul a2, a2, a5
; RV32IM-NEXT: sub a2, a3, a2
; RV32IM-NEXT: mulhu a3, a7, a6
; RV32IM-NEXT: sub a4, a7, a3
; RV32IM-NEXT: srli a4, a4, 1
; RV32IM-NEXT: add a3, a4, a3
; RV32IM-NEXT: srli a3, a3, 6
; RV32IM-NEXT: mul a3, a3, a5
; RV32IM-NEXT: sub a3, a7, a3
; RV32IM-NEXT: mul a2, a2, a3
; RV32IM-NEXT: sub a2, a7, a2
; RV32IM-NEXT: mulhu a4, a6, a5
; RV32IM-NEXT: sub a5, a6, a4
; RV32IM-NEXT: srli a5, a5, 1
; RV32IM-NEXT: add a4, a5, a4
; RV32IM-NEXT: srli a4, a4, 6
; RV32IM-NEXT: mul a3, a4, a3
; RV32IM-NEXT: sub a3, a6, a3
; RV32IM-NEXT: sh a3, 6(a0)
; RV32IM-NEXT: sh a2, 4(a0)
; RV32IM-NEXT: sh a1, 2(a0)
Expand Down Expand Up @@ -348,8 +348,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: fold_urem_vec_2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: lhu a7, 24(a1)
; RV64IM-NEXT: lhu a3, 16(a1)
; RV64IM-NEXT: lhu a6, 24(a1)
; RV64IM-NEXT: lhu a7, 16(a1)
; RV64IM-NEXT: lhu a4, 8(a1)
; RV64IM-NEXT: lhu a1, 0(a1)
; RV64IM-NEXT: lui a5, 1423
Expand All @@ -359,36 +359,36 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV64IM-NEXT: slli a5, a5, 13
; RV64IM-NEXT: addi a5, a5, -1811
; RV64IM-NEXT: slli a5, a5, 12
; RV64IM-NEXT: addi a6, a5, 561
; RV64IM-NEXT: mulhu a5, a1, a6
; RV64IM-NEXT: sub a2, a1, a5
; RV64IM-NEXT: srli a2, a2, 1
; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: addi a5, a5, 561
; RV64IM-NEXT: mulhu a2, a1, a5
; RV64IM-NEXT: sub a3, a1, a2
; RV64IM-NEXT: srli a3, a3, 1
; RV64IM-NEXT: add a2, a3, a2
; RV64IM-NEXT: srli a2, a2, 6
; RV64IM-NEXT: addi a5, zero, 95
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: addi a3, zero, 95
; RV64IM-NEXT: mul a2, a2, a3
; RV64IM-NEXT: sub t0, a1, a2
; RV64IM-NEXT: mulhu a2, a4, a6
; RV64IM-NEXT: mulhu a2, a4, a5
; RV64IM-NEXT: sub a1, a4, a2
; RV64IM-NEXT: srli a1, a1, 1
; RV64IM-NEXT: add a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 6
; RV64IM-NEXT: mul a1, a1, a5
; RV64IM-NEXT: mul a1, a1, a3
; RV64IM-NEXT: sub a1, a4, a1
; RV64IM-NEXT: mulhu a2, a3, a6
; RV64IM-NEXT: sub a4, a3, a2
; RV64IM-NEXT: mulhu a2, a7, a5
; RV64IM-NEXT: sub a4, a7, a2
; RV64IM-NEXT: srli a4, a4, 1
; RV64IM-NEXT: add a2, a4, a2
; RV64IM-NEXT: srli a2, a2, 6
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: sub a2, a3, a2
; RV64IM-NEXT: mulhu a3, a7, a6
; RV64IM-NEXT: sub a4, a7, a3
; RV64IM-NEXT: srli a4, a4, 1
; RV64IM-NEXT: add a3, a4, a3
; RV64IM-NEXT: srli a3, a3, 6
; RV64IM-NEXT: mul a3, a3, a5
; RV64IM-NEXT: sub a3, a7, a3
; RV64IM-NEXT: mul a2, a2, a3
; RV64IM-NEXT: sub a2, a7, a2
; RV64IM-NEXT: mulhu a4, a6, a5
; RV64IM-NEXT: sub a5, a6, a4
; RV64IM-NEXT: srli a5, a5, 1
; RV64IM-NEXT: add a4, a5, a4
; RV64IM-NEXT: srli a4, a4, 6
; RV64IM-NEXT: mul a3, a4, a3
; RV64IM-NEXT: sub a3, a6, a3
; RV64IM-NEXT: sh a3, 6(a0)
; RV64IM-NEXT: sh a2, 4(a0)
; RV64IM-NEXT: sh a1, 2(a0)
Expand Down Expand Up @@ -475,44 +475,44 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: combine_urem_udiv:
; RV32IM: # %bb.0:
; RV32IM-NEXT: lhu a7, 0(a1)
; RV32IM-NEXT: lhu a3, 4(a1)
; RV32IM-NEXT: lhu a6, 0(a1)
; RV32IM-NEXT: lhu a7, 4(a1)
; RV32IM-NEXT: lhu a4, 12(a1)
; RV32IM-NEXT: lhu a1, 8(a1)
; RV32IM-NEXT: lui a5, 364242
; RV32IM-NEXT: addi a6, a5, 777
; RV32IM-NEXT: mulhu a5, a4, a6
; RV32IM-NEXT: sub a2, a4, a5
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a2, a2, a5
; RV32IM-NEXT: addi a5, a5, 777
; RV32IM-NEXT: mulhu a2, a4, a5
; RV32IM-NEXT: sub a3, a4, a2
; RV32IM-NEXT: srli a3, a3, 1
; RV32IM-NEXT: add a2, a3, a2
; RV32IM-NEXT: srli t3, a2, 6
; RV32IM-NEXT: addi t0, zero, 95
; RV32IM-NEXT: mul a5, t3, t0
; RV32IM-NEXT: sub t1, a4, a5
; RV32IM-NEXT: mulhu a5, a1, a6
; RV32IM-NEXT: sub a4, a1, a5
; RV32IM-NEXT: srli a4, a4, 1
; RV32IM-NEXT: add a4, a4, a5
; RV32IM-NEXT: srli a4, a4, 6
; RV32IM-NEXT: mul a5, a4, t0
; RV32IM-NEXT: sub t2, a1, a5
; RV32IM-NEXT: mulhu a5, a3, a6
; RV32IM-NEXT: sub a1, a3, a5
; RV32IM-NEXT: mul a3, t3, t0
; RV32IM-NEXT: sub t1, a4, a3
; RV32IM-NEXT: mulhu a4, a1, a5
; RV32IM-NEXT: sub a3, a1, a4
; RV32IM-NEXT: srli a3, a3, 1
; RV32IM-NEXT: add a3, a3, a4
; RV32IM-NEXT: srli a3, a3, 6
; RV32IM-NEXT: mul a4, a3, t0
; RV32IM-NEXT: sub t2, a1, a4
; RV32IM-NEXT: mulhu a4, a7, a5
; RV32IM-NEXT: sub a1, a7, a4
; RV32IM-NEXT: srli a1, a1, 1
; RV32IM-NEXT: add a1, a1, a5
; RV32IM-NEXT: add a1, a1, a4
; RV32IM-NEXT: srli a1, a1, 6
; RV32IM-NEXT: mul a5, a1, t0
; RV32IM-NEXT: sub a3, a3, a5
; RV32IM-NEXT: mulhu a5, a7, a6
; RV32IM-NEXT: sub a2, a7, a5
; RV32IM-NEXT: mul a4, a1, t0
; RV32IM-NEXT: sub a4, a7, a4
; RV32IM-NEXT: mulhu a5, a6, a5
; RV32IM-NEXT: sub a2, a6, a5
; RV32IM-NEXT: srli a2, a2, 1
; RV32IM-NEXT: add a2, a2, a5
; RV32IM-NEXT: srli a2, a2, 6
; RV32IM-NEXT: mul a5, a2, t0
; RV32IM-NEXT: sub a5, a7, a5
; RV32IM-NEXT: sub a5, a6, a5
; RV32IM-NEXT: add a2, a5, a2
; RV32IM-NEXT: add a1, a3, a1
; RV32IM-NEXT: add a3, t2, a4
; RV32IM-NEXT: add a1, a4, a1
; RV32IM-NEXT: add a3, t2, a3
; RV32IM-NEXT: add a4, t1, t3
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: sh a3, 4(a0)
Expand Down Expand Up @@ -594,8 +594,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: combine_urem_udiv:
; RV64IM: # %bb.0:
; RV64IM-NEXT: lhu a7, 0(a1)
; RV64IM-NEXT: lhu a3, 8(a1)
; RV64IM-NEXT: lhu a6, 0(a1)
; RV64IM-NEXT: lhu a7, 8(a1)
; RV64IM-NEXT: lhu a4, 16(a1)
; RV64IM-NEXT: lhu a1, 24(a1)
; RV64IM-NEXT: lui a5, 1423
Expand All @@ -605,38 +605,38 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV64IM-NEXT: slli a5, a5, 13
; RV64IM-NEXT: addi a5, a5, -1811
; RV64IM-NEXT: slli a5, a5, 12
; RV64IM-NEXT: addi a6, a5, 561
; RV64IM-NEXT: mulhu a5, a1, a6
; RV64IM-NEXT: sub a2, a1, a5
; RV64IM-NEXT: srli a2, a2, 1
; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: addi a5, a5, 561
; RV64IM-NEXT: mulhu a2, a1, a5
; RV64IM-NEXT: sub a3, a1, a2
; RV64IM-NEXT: srli a3, a3, 1
; RV64IM-NEXT: add a2, a3, a2
; RV64IM-NEXT: srli t3, a2, 6
; RV64IM-NEXT: addi t0, zero, 95
; RV64IM-NEXT: mul a5, t3, t0
; RV64IM-NEXT: sub t1, a1, a5
; RV64IM-NEXT: mulhu a5, a4, a6
; RV64IM-NEXT: sub a1, a4, a5
; RV64IM-NEXT: mul a3, t3, t0
; RV64IM-NEXT: sub t1, a1, a3
; RV64IM-NEXT: mulhu a3, a4, a5
; RV64IM-NEXT: sub a1, a4, a3
; RV64IM-NEXT: srli a1, a1, 1
; RV64IM-NEXT: add a1, a1, a5
; RV64IM-NEXT: add a1, a1, a3
; RV64IM-NEXT: srli a1, a1, 6
; RV64IM-NEXT: mul a5, a1, t0
; RV64IM-NEXT: sub t2, a4, a5
; RV64IM-NEXT: mulhu a5, a3, a6
; RV64IM-NEXT: sub a4, a3, a5
; RV64IM-NEXT: srli a4, a4, 1
; RV64IM-NEXT: add a4, a4, a5
; RV64IM-NEXT: srli a4, a4, 6
; RV64IM-NEXT: mul a5, a4, t0
; RV64IM-NEXT: sub a3, a3, a5
; RV64IM-NEXT: mulhu a5, a7, a6
; RV64IM-NEXT: sub a2, a7, a5
; RV64IM-NEXT: mul a3, a1, t0
; RV64IM-NEXT: sub t2, a4, a3
; RV64IM-NEXT: mulhu a4, a7, a5
; RV64IM-NEXT: sub a3, a7, a4
; RV64IM-NEXT: srli a3, a3, 1
; RV64IM-NEXT: add a3, a3, a4
; RV64IM-NEXT: srli a3, a3, 6
; RV64IM-NEXT: mul a4, a3, t0
; RV64IM-NEXT: sub a4, a7, a4
; RV64IM-NEXT: mulhu a5, a6, a5
; RV64IM-NEXT: sub a2, a6, a5
; RV64IM-NEXT: srli a2, a2, 1
; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: srli a2, a2, 6
; RV64IM-NEXT: mul a5, a2, t0
; RV64IM-NEXT: sub a5, a7, a5
; RV64IM-NEXT: sub a5, a6, a5
; RV64IM-NEXT: add a2, a5, a2
; RV64IM-NEXT: add a3, a3, a4
; RV64IM-NEXT: add a3, a4, a3
; RV64IM-NEXT: add a1, t2, a1
; RV64IM-NEXT: add a4, t1, t3
; RV64IM-NEXT: sh a4, 6(a0)
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/Thumb/dyn-stackalloc.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC

%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
Expand Down Expand Up @@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
; CHECK: sub sp, #
; CHECK: mov r[[R0:[0-9]+]], sp
; CHECK: str r{{[0-9+]}}, [r[[R0]]
; CHECK: str r{{[0-9+]}}, [r[[R0]]
; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
; RA_BASIC: stm r[[R0]]!
; CHECK-NOT: ldr r0, [sp
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
; ENABLED-LABEL: check_option:
; ENABLED: @ %bb.0: @ %entry
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; ENABLED-NEXT: cmp r3, #1
; ENABLED-NEXT: blt .LBB0_4
; ENABLED-NEXT: @ %bb.1: @ %vector.ph.preheader
Expand All @@ -32,11 +32,11 @@ define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias noca
; ENABLED-NEXT: letp lr, .LBB0_3
; ENABLED-NEXT: b .LBB0_2
; ENABLED-NEXT: .LBB0_4: @ %for.cond.cleanup
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
;
; DISABLED-LABEL: check_option:
; DISABLED: @ %bb.0: @ %entry
; DISABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
; DISABLED-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; DISABLED-NEXT: cmp r3, #1
; DISABLED-NEXT: blt .LBB0_4
; DISABLED-NEXT: @ %bb.1: @ %vector.ph.preheader
Expand All @@ -48,7 +48,7 @@ define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias noca
; DISABLED-NEXT: .LBB0_2: @ %vector.ph
; DISABLED-NEXT: @ =>This Loop Header: Depth=1
; DISABLED-NEXT: @ Child Loop BB0_3 Depth 2
; DISABLED-NEXT: mov r9, r8
; DISABLED-NEXT: mov r7, r8
; DISABLED-NEXT: mov r12, r0
; DISABLED-NEXT: mov r4, r2
; DISABLED-NEXT: mov r5, r1
Expand All @@ -57,9 +57,9 @@ define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias noca
; DISABLED-NEXT: .LBB0_3: @ %vector.body
; DISABLED-NEXT: @ Parent Loop BB0_2 Depth=1
; DISABLED-NEXT: @ => This Inner Loop Header: Depth=2
; DISABLED-NEXT: mov lr, r9
; DISABLED-NEXT: mov lr, r7
; DISABLED-NEXT: vctp.32 r6
; DISABLED-NEXT: sub.w r9, r9, #1
; DISABLED-NEXT: subs r7, #1
; DISABLED-NEXT: subs r6, #4
; DISABLED-NEXT: vpstt
; DISABLED-NEXT: vldrwt.u32 q0, [r5], #16
Expand All @@ -70,7 +70,7 @@ define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias noca
; DISABLED-NEXT: le lr, .LBB0_3
; DISABLED-NEXT: b .LBB0_2
; DISABLED-NEXT: .LBB0_4: @ %for.cond.cleanup
; DISABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
; DISABLED-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%cmp8 = icmp sgt i32 %N, 0
%0 = add i32 %N, 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@
define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr {
; ENABLED-LABEL: varying_outer_2d_reduction:
; ENABLED: @ %bb.0: @ %entry
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; ENABLED-NEXT: sub sp, #4
; ENABLED-NEXT: cmp r3, #1
; ENABLED-NEXT: str r0, [sp] @ 4-byte Spill
; ENABLED-NEXT: blt .LBB0_8
; ENABLED-NEXT: @ %bb.1: @ %for.body.lr.ph
; ENABLED-NEXT: mov r11, r0
; ENABLED-NEXT: ldr r0, [sp, #32]
; ENABLED-NEXT: add.w r9, r2, #3
; ENABLED-NEXT: mov.w r12, #0
; ENABLED-NEXT: mov r10, r11
; ENABLED-NEXT: ldr r0, [sp, #36]
; ENABLED-NEXT: add.w r12, r2, #3
; ENABLED-NEXT: ldr.w r10, [sp] @ 4-byte Reload
; ENABLED-NEXT: mov.w r8, #0
; ENABLED-NEXT: mov r9, r12
; ENABLED-NEXT: uxth r0, r0
; ENABLED-NEXT: rsbs r5, r0, #0
; ENABLED-NEXT: b .LBB0_4
Expand All @@ -35,32 +37,31 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLED-NEXT: lsrs r0, r0, #16
; ENABLED-NEXT: sub.w r9, r9, #1
; ENABLED-NEXT: strh.w r0, [r1, r12, lsl #1]
; ENABLED-NEXT: add.w r12, r12, #1
; ENABLED-NEXT: strh.w r0, [r1, r8, lsl #1]
; ENABLED-NEXT: add.w r8, r8, #1
; ENABLED-NEXT: add.w r10, r10, #2
; ENABLED-NEXT: cmp r12, r3
; ENABLED-NEXT: cmp r8, r3
; ENABLED-NEXT: beq .LBB0_8
; ENABLED-NEXT: .LBB0_4: @ %for.body
; ENABLED-NEXT: @ =>This Loop Header: Depth=1
; ENABLED-NEXT: @ Child Loop BB0_6 Depth 2
; ENABLED-NEXT: cmp r2, r12
; ENABLED-NEXT: cmp r2, r8
; ENABLED-NEXT: ble .LBB0_2
; ENABLED-NEXT: @ %bb.5: @ %vector.ph
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLED-NEXT: bic r0, r9, #3
; ENABLED-NEXT: movs r7, #1
; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: sub.w r4, r2, r12
; ENABLED-NEXT: sub.w r4, r2, r8
; ENABLED-NEXT: vmov.i32 q1, #0x0
; ENABLED-NEXT: add.w r6, r7, r0, lsr #2
; ENABLED-NEXT: adds r0, r2, #3
; ENABLED-NEXT: sub.w r0, r0, r12
; ENABLED-NEXT: sub.w r0, r12, r8
; ENABLED-NEXT: bic r0, r0, #3
; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: add.w r0, r7, r0, lsr #2
; ENABLED-NEXT: mov r7, r10
; ENABLED-NEXT: dls lr, r0
; ENABLED-NEXT: mov r0, r11
; ENABLED-NEXT: ldr r0, [sp] @ 4-byte Reload
; ENABLED-NEXT: .LBB0_6: @ %vector.body
; ENABLED-NEXT: @ Parent Loop BB0_4 Depth=1
; ENABLED-NEXT: @ => This Inner Loop Header: Depth=2
Expand All @@ -82,19 +83,22 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; ENABLED-NEXT: vaddv.u32 r0, q0
; ENABLED-NEXT: b .LBB0_3
; ENABLED-NEXT: .LBB0_8: @ %for.end17
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc}
; ENABLED-NEXT: add sp, #4
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
;
; NOREDUCTIONS-LABEL: varying_outer_2d_reduction:
; NOREDUCTIONS: @ %bb.0: @ %entry
; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; NOREDUCTIONS-NEXT: sub sp, #4
; NOREDUCTIONS-NEXT: cmp r3, #1
; NOREDUCTIONS-NEXT: str r0, [sp] @ 4-byte Spill
; NOREDUCTIONS-NEXT: blt .LBB0_8
; NOREDUCTIONS-NEXT: @ %bb.1: @ %for.body.lr.ph
; NOREDUCTIONS-NEXT: mov r11, r0
; NOREDUCTIONS-NEXT: ldr r0, [sp, #32]
; NOREDUCTIONS-NEXT: add.w r9, r2, #3
; NOREDUCTIONS-NEXT: mov.w r12, #0
; NOREDUCTIONS-NEXT: mov r10, r11
; NOREDUCTIONS-NEXT: ldr r0, [sp, #36]
; NOREDUCTIONS-NEXT: add.w r12, r2, #3
; NOREDUCTIONS-NEXT: ldr.w r10, [sp] @ 4-byte Reload
; NOREDUCTIONS-NEXT: mov.w r8, #0
; NOREDUCTIONS-NEXT: mov r9, r12
; NOREDUCTIONS-NEXT: uxth r0, r0
; NOREDUCTIONS-NEXT: rsbs r5, r0, #0
; NOREDUCTIONS-NEXT: b .LBB0_4
Expand All @@ -104,32 +108,31 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
; NOREDUCTIONS-NEXT: lsrs r0, r0, #16
; NOREDUCTIONS-NEXT: sub.w r9, r9, #1
; NOREDUCTIONS-NEXT: strh.w r0, [r1, r12, lsl #1]
; NOREDUCTIONS-NEXT: add.w r12, r12, #1
; NOREDUCTIONS-NEXT: strh.w r0, [r1, r8, lsl #1]
; NOREDUCTIONS-NEXT: add.w r8, r8, #1
; NOREDUCTIONS-NEXT: add.w r10, r10, #2
; NOREDUCTIONS-NEXT: cmp r12, r3
; NOREDUCTIONS-NEXT: cmp r8, r3
; NOREDUCTIONS-NEXT: beq .LBB0_8
; NOREDUCTIONS-NEXT: .LBB0_4: @ %for.body
; NOREDUCTIONS-NEXT: @ =>This Loop Header: Depth=1
; NOREDUCTIONS-NEXT: @ Child Loop BB0_6 Depth 2
; NOREDUCTIONS-NEXT: cmp r2, r12
; NOREDUCTIONS-NEXT: cmp r2, r8
; NOREDUCTIONS-NEXT: ble .LBB0_2
; NOREDUCTIONS-NEXT: @ %bb.5: @ %vector.ph
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
; NOREDUCTIONS-NEXT: bic r0, r9, #3
; NOREDUCTIONS-NEXT: movs r7, #1
; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: sub.w r4, r2, r12
; NOREDUCTIONS-NEXT: sub.w r4, r2, r8
; NOREDUCTIONS-NEXT: vmov.i32 q1, #0x0
; NOREDUCTIONS-NEXT: add.w r6, r7, r0, lsr #2
; NOREDUCTIONS-NEXT: adds r0, r2, #3
; NOREDUCTIONS-NEXT: sub.w r0, r0, r12
; NOREDUCTIONS-NEXT: sub.w r0, r12, r8
; NOREDUCTIONS-NEXT: bic r0, r0, #3
; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: add.w r0, r7, r0, lsr #2
; NOREDUCTIONS-NEXT: mov r7, r10
; NOREDUCTIONS-NEXT: dls lr, r0
; NOREDUCTIONS-NEXT: mov r0, r11
; NOREDUCTIONS-NEXT: ldr r0, [sp] @ 4-byte Reload
; NOREDUCTIONS-NEXT: .LBB0_6: @ %vector.body
; NOREDUCTIONS-NEXT: @ Parent Loop BB0_4 Depth=1
; NOREDUCTIONS-NEXT: @ => This Inner Loop Header: Depth=2
Expand All @@ -151,7 +154,8 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input
; NOREDUCTIONS-NEXT: vaddv.u32 r0, q0
; NOREDUCTIONS-NEXT: b .LBB0_3
; NOREDUCTIONS-NEXT: .LBB0_8: @ %for.end17
; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, pc}
; NOREDUCTIONS-NEXT: add sp, #4
; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = sext i16 %N to i32
%cmp36 = icmp sgt i16 %N, 0
Expand Down
67 changes: 35 additions & 32 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,31 @@ define dso_local i32 @b(i32* %c, i32 %d, i32 %e, i32* %n) "frame-pointer"="all"
; CHECK-NEXT: add r7, sp, #12
; CHECK-NEXT: .save {r8, r9, r10, r11}
; CHECK-NEXT: push.w {r8, r9, r10, r11}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: wls lr, r1, .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: adds r4, r3, #4
; CHECK-NEXT: mov r4, r2
; CHECK-NEXT: adds r2, r3, #4
; CHECK-NEXT: add.w r9, r0, #4
; CHECK-NEXT: mvn r11, #1
; CHECK-NEXT: @ implicit-def: $r6
; CHECK-NEXT: @ implicit-def: $r12
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r4, [sp] @ 4-byte Spill
; CHECK-NEXT: .LBB2_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r1, [r9, #-4]
; CHECK-NEXT: ldr.w r10, [r4]
; CHECK-NEXT: ldr.w r10, [r2]
; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: muls r1, r3, r1
; CHECK-NEXT: adds.w r8, r1, #-2147483648
; CHECK-NEXT: asr.w r5, r1, #31
; CHECK-NEXT: adc r1, r5, #0
; CHECK-NEXT: mul r5, r10, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: ldr.w r2, [r11, #4]
; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: add.w r5, r5, #-2147483648
; CHECK-NEXT: asrl r8, r1, r5
; CHECK-NEXT: smull r4, r5, r10, r8
Expand All @@ -189,48 +193,47 @@ define dso_local i32 @b(i32* %c, i32 %d, i32 %e, i32* %n) "frame-pointer"="all"
; CHECK-NEXT: mov r4, r5
; CHECK-NEXT: lsll r4, r1, r10
; CHECK-NEXT: lsll r4, r1, #30
; CHECK-NEXT: ldrd r4, r8, [r11]
; CHECK-NEXT: ldr.w r4, [r11]
; CHECK-NEXT: asrs r5, r1, #31
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: muls r4, r6, r4
; CHECK-NEXT: adds r2, r4, #2
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: lsll r4, r5, r2
; CHECK-NEXT: add.w r1, r4, #-2147483648
; CHECK-NEXT: adds r4, #2
; CHECK-NEXT: lsll r8, r5, r4
; CHECK-NEXT: ldr r4, [r9], #4
; CHECK-NEXT: asr.w r5, r12, #31
; CHECK-NEXT: add.w r8, r8, #-2147483648
; CHECK-NEXT: muls r4, r3, r4
; CHECK-NEXT: adds r3, #4
; CHECK-NEXT: adds.w r2, r12, r4
; CHECK-NEXT: adds.w r1, r12, r4
; CHECK-NEXT: adc.w r5, r5, r4, asr #31
; CHECK-NEXT: smull r6, r4, r8, r6
; CHECK-NEXT: adds.w r2, r2, #-2147483648
; CHECK-NEXT: adc r2, r5, #0
; CHECK-NEXT: asrs r5, r2, #31
; CHECK-NEXT: subs r6, r2, r6
; CHECK-NEXT: smull r6, r4, r2, r6
; CHECK-NEXT: adds.w r1, r1, #-2147483648
; CHECK-NEXT: adc r1, r5, #0
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: asrs r5, r1, #31
; CHECK-NEXT: subs r6, r1, r6
; CHECK-NEXT: sbcs r5, r4
; CHECK-NEXT: adds.w r6, r6, #-2147483648
; CHECK-NEXT: adc r5, r5, #0
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: asrl r6, r5, r1
; CHECK-NEXT: movs r1, #2
; CHECK-NEXT: asrl r6, r5, r8
; CHECK-NEXT: lsrl r6, r5, #2
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: str r6, [r1]
; CHECK-NEXT: ldr r1, [r11], #-4
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: mls r1, r1, r10, r2
; CHECK-NEXT: movs r5, #2
; CHECK-NEXT: str r6, [r5]
; CHECK-NEXT: ldr r5, [r11], #-4
; CHECK-NEXT: mls r1, r5, r10, r1
; CHECK-NEXT: adds.w r12, r1, #-2147483648
; CHECK-NEXT: asr.w r2, r1, #31
; CHECK-NEXT: adc r1, r2, #0
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: asr.w r4, r1, #31
; CHECK-NEXT: adc r1, r4, #0
; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload
; CHECK-NEXT: lsrl r12, r1, #2
; CHECK-NEXT: rsb.w r1, r12, #0
; CHECK-NEXT: str r1, [r2]
; CHECK-NEXT: str r1, [r4, #-4]
; CHECK-NEXT: adds r4, #4
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: str r1, [r4]
; CHECK-NEXT: str r1, [r2, #-4]
; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: .LBB2_3: @ %while.end
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: pop.w {r8, r9, r10, r11}
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
Expand Down
30 changes: 17 additions & 13 deletions llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %s
; CHECK-NEXT: ldrd r8, lr, [r7, #20]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: ldm.w r10, {r4, r6, r10}
; CHECK-NEXT: ldrd r12, r9, [r7, #28]
; CHECK-NEXT: ldm.w r10, {r4, r9, r10}
; CHECK-NEXT: ldr.w r12, [r7, #28]
; CHECK-NEXT: ittt ne
; CHECK-NEXT: addne sp, #292
; CHECK-NEXT: popne.w {r8, r10, r11}
Expand All @@ -46,25 +46,29 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %s
; CHECK-NEXT: @ %bb.3: @ %bb420
; CHECK-NEXT: movw r5, :lower16:(L_zz_hold$non_lazy_ptr-(LPC0_0+4))
; CHECK-NEXT: movt r5, :upper16:(L_zz_hold$non_lazy_ptr-(LPC0_0+4))
; CHECK-NEXT: movw r11, :lower16:(L_zz_res$non_lazy_ptr-(LPC0_1+4))
; CHECK-NEXT: LPC0_0:
; CHECK-NEXT: add r5, pc
; CHECK-NEXT: ldr.w r11, [r5]
; CHECK-NEXT: str.w r11, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: movw r5, :lower16:(L_zz_res$non_lazy_ptr-(LPC0_1+4))
; CHECK-NEXT: movt r5, :upper16:(L_zz_res$non_lazy_ptr-(LPC0_1+4))
; CHECK-NEXT: movt r11, :upper16:(L_zz_res$non_lazy_ptr-(LPC0_1+4))
; CHECK-NEXT: LPC0_1:
; CHECK-NEXT: add r5, pc
; CHECK-NEXT: add r11, pc
; CHECK-NEXT: ldr r5, [r5]
; CHECK-NEXT: str r5, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: ldr.w r5, [r11]
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: str r5, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: str.w r11, [r5]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: str.w r5, [r11]
; CHECK-NEXT: ldr.w r11, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: str.w r5, [r11]
; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: str r5, [r6]
; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: str r0, [r5]
; CHECK-NEXT: stm.w sp, {r4, r6, r10}
; CHECK-NEXT: ldr r0, [r7, #32]
; CHECK-NEXT: stm.w sp, {r4, r9, r10}
; CHECK-NEXT: strd r8, lr, [sp, #12]
; CHECK-NEXT: strd r12, r9, [sp, #20]
; CHECK-NEXT: str.w r12, [sp, #20]
; CHECK-NEXT: str r0, [sp, #24]
; CHECK-NEXT: bl _Manifest
; CHECK-NEXT: trap
; CHECK-NEXT: LBB0_4: @ %bb20
Expand Down
82 changes: 42 additions & 40 deletions llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1049,10 +1049,10 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #20
; CHECK-NEXT: sub sp, #20
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: cmp r3, #8
; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: blo.w .LBB16_12
; CHECK-NEXT: @ %bb.1: @ %entry
; CHECK-NEXT: lsrs.w r12, r3, #2
Expand All @@ -1072,43 +1072,45 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: str r1, [sp] @ 4-byte Spill
; CHECK-NEXT: subs r1, r7, #2
; CHECK-NEXT: rsbs r7, r4, #0
; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: add.w r7, r3, #16
; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: b .LBB16_5
; CHECK-NEXT: .LBB16_3: @ %for.end
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: wls lr, r0, .LBB16_4
; CHECK-NEXT: b .LBB16_9
; CHECK-NEXT: .LBB16_4: @ %while.end
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: subs.w r12, r12, #1
; CHECK-NEXT: vstrb.8 q0, [r2], #8
; CHECK-NEXT: add.w r0, r6, r0, lsl #1
; CHECK-NEXT: add.w r0, r5, r0, lsl #1
; CHECK-NEXT: add.w r5, r0, #8
; CHECK-NEXT: beq.w .LBB16_12
; CHECK-NEXT: .LBB16_5: @ %while.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB16_7 Depth 2
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldrh.w lr, [r3, #14]
; CHECK-NEXT: vldrw.u32 q0, [r0], #8
; CHECK-NEXT: ldrh.w r10, [r3, #12]
; CHECK-NEXT: ldrh.w r8, [r3, #12]
; CHECK-NEXT: ldrh r7, [r3, #10]
; CHECK-NEXT: ldrh r4, [r3, #8]
; CHECK-NEXT: ldrh r6, [r3, #6]
; CHECK-NEXT: ldrh.w r9, [r3, #4]
; CHECK-NEXT: ldrh.w r11, [r3, #2]
; CHECK-NEXT: ldrh.w r8, [r3]
; CHECK-NEXT: ldrh.w r10, [r3]
; CHECK-NEXT: vstrb.8 q0, [r1], #8
; CHECK-NEXT: vldrw.u32 q0, [r5]
; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: adds r0, r5, #2
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmul.f16 q0, q0, r8
; CHECK-NEXT: vmul.f16 q0, q0, r10
; CHECK-NEXT: adds r0, r5, #6
; CHECK-NEXT: vfma.f16 q0, q1, r11
; CHECK-NEXT: vldrw.u32 q1, [r5, #4]
Expand All @@ -1117,77 +1119,77 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: add.w r0, r5, #10
; CHECK-NEXT: vfma.f16 q0, q1, r6
; CHECK-NEXT: vldrw.u32 q1, [r5, #8]
; CHECK-NEXT: add.w r6, r5, #16
; CHECK-NEXT: vfma.f16 q0, q1, r4
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: add.w r0, r5, #14
; CHECK-NEXT: vfma.f16 q0, q1, r7
; CHECK-NEXT: vldrw.u32 q1, [r5, #12]
; CHECK-NEXT: vfma.f16 q0, q1, r10
; CHECK-NEXT: adds r5, #16
; CHECK-NEXT: vfma.f16 q0, q1, r8
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vfma.f16 q0, q1, lr
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: blo .LBB16_8
; CHECK-NEXT: @ %bb.6: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: add.w r5, r3, #16
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: .LBB16_7: @ %for.body
; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh r0, [r5], #16
; CHECK-NEXT: vldrw.u32 q1, [r6]
; CHECK-NEXT: adds r4, r6, #2
; CHECK-NEXT: ldrh r0, [r6], #16
; CHECK-NEXT: vldrw.u32 q1, [r5]
; CHECK-NEXT: adds r4, r5, #2
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: ldrh r0, [r5, #-14]
; CHECK-NEXT: adds r4, r6, #6
; CHECK-NEXT: ldrh r0, [r6, #-14]
; CHECK-NEXT: adds r4, r5, #6
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r5, #-12]
; CHECK-NEXT: vldrw.u32 q1, [r6, #4]
; CHECK-NEXT: ldrh r0, [r6, #-12]
; CHECK-NEXT: vldrw.u32 q1, [r5, #4]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: ldrh r0, [r5, #-10]
; CHECK-NEXT: add.w r4, r6, #10
; CHECK-NEXT: ldrh r0, [r6, #-10]
; CHECK-NEXT: add.w r4, r5, #10
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r5, #-8]
; CHECK-NEXT: vldrw.u32 q1, [r6, #8]
; CHECK-NEXT: ldrh r0, [r6, #-8]
; CHECK-NEXT: vldrw.u32 q1, [r5, #8]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: ldrh r0, [r5, #-6]
; CHECK-NEXT: ldrh r4, [r5, #-2]
; CHECK-NEXT: ldrh r0, [r6, #-6]
; CHECK-NEXT: ldrh r4, [r6, #-2]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r5, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r6, #12]
; CHECK-NEXT: ldrh r0, [r6, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r5, #12]
; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: add.w r0, r6, #14
; CHECK-NEXT: add.w r0, r5, #14
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: adds r6, #16
; CHECK-NEXT: adds r5, #16
; CHECK-NEXT: vfma.f16 q0, q1, r4
; CHECK-NEXT: le lr, .LBB16_7
; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: add.w r5, r3, #16
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: .LBB16_10: @ %while.body76
; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh r4, [r5], #2
; CHECK-NEXT: ldrh r4, [r6], #2
; CHECK-NEXT: vldrh.u16 q1, [r0], #2
; CHECK-NEXT: vfma.f16 q0, q1, r4
; CHECK-NEXT: le lr, .LBB16_10
; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: add.w r6, r6, r0, lsl #1
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: add.w r5, r5, r0, lsl #1
; CHECK-NEXT: b .LBB16_4
; CHECK-NEXT: .LBB16_12: @ %if.end
; CHECK-NEXT: add sp, #20
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, %struct.arm_fir_instance_f32* %S, i32 0, i32 1
Expand Down
98 changes: 50 additions & 48 deletions llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1044,125 +1044,127 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: .pad #24
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: cmp r3, #8
; CHECK-NEXT: blo.w .LBB16_12
; CHECK-NEXT: @ %bb.1: @ %entry
; CHECK-NEXT: lsrs.w r12, r3, #2
; CHECK-NEXT: beq.w .LBB16_12
; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph
; CHECK-NEXT: ldrh r6, [r0]
; CHECK-NEXT: movs r4, #1
; CHECK-NEXT: ldrd r7, r10, [r0, #4]
; CHECK-NEXT: movs r5, #1
; CHECK-NEXT: ldrd r4, r10, [r0, #4]
; CHECK-NEXT: sub.w r0, r6, #8
; CHECK-NEXT: add.w r3, r0, r0, lsr #29
; CHECK-NEXT: and r0, r0, #7
; CHECK-NEXT: asrs r5, r3, #3
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: asrs r7, r3, #3
; CHECK-NEXT: cmp r7, #1
; CHECK-NEXT: it gt
; CHECK-NEXT: asrgt r4, r3, #3
; CHECK-NEXT: add.w r3, r7, r6, lsl #2
; CHECK-NEXT: asrgt r5, r3, #3
; CHECK-NEXT: add.w r3, r4, r6, lsl #2
; CHECK-NEXT: sub.w r9, r3, #4
; CHECK-NEXT: rsbs r3, r6, #0
; CHECK-NEXT: str r4, [sp] @ 4-byte Spill
; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: add.w r3, r10, #32
; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: b .LBB16_5
; CHECK-NEXT: .LBB16_3: @ %for.end
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldrd r0, r9, [sp, #12] @ 8-byte Folded Reload
; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
; CHECK-NEXT: wls lr, r0, .LBB16_4
; CHECK-NEXT: b .LBB16_9
; CHECK-NEXT: .LBB16_4: @ %while.end
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: subs.w r12, r12, #1
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: add.w r0, r7, r0, lsl #2
; CHECK-NEXT: add.w r7, r0, #16
; CHECK-NEXT: add.w r0, r4, r0, lsl #2
; CHECK-NEXT: add.w r4, r0, #16
; CHECK-NEXT: beq .LBB16_12
; CHECK-NEXT: .LBB16_5: @ %while.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB16_7 Depth 2
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
; CHECK-NEXT: add.w lr, r10, #8
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
; CHECK-NEXT: ldrd r3, r4, [r10]
; CHECK-NEXT: ldrd r3, r7, [r10]
; CHECK-NEXT: ldm.w lr, {r0, r5, r6, lr}
; CHECK-NEXT: ldrd r11, r8, [r10, #24]
; CHECK-NEXT: vstrb.8 q0, [r9], #16
; CHECK-NEXT: vldrw.u32 q0, [r7], #32
; CHECK-NEXT: strd r9, r1, [sp, #16] @ 8-byte Folded Spill
; CHECK-NEXT: vldrw.u32 q1, [r7, #-28]
; CHECK-NEXT: vldrw.u32 q0, [r4], #32
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vldrw.u32 q1, [r4, #-28]
; CHECK-NEXT: vmul.f32 q0, q0, r3
; CHECK-NEXT: vldrw.u32 q6, [r7, #-24]
; CHECK-NEXT: vldrw.u32 q4, [r7, #-20]
; CHECK-NEXT: vfma.f32 q0, q1, r4
; CHECK-NEXT: vldrw.u32 q5, [r7, #-16]
; CHECK-NEXT: vldrw.u32 q6, [r4, #-24]
; CHECK-NEXT: vldrw.u32 q4, [r4, #-20]
; CHECK-NEXT: vfma.f32 q0, q1, r7
; CHECK-NEXT: vldrw.u32 q5, [r4, #-16]
; CHECK-NEXT: vfma.f32 q0, q6, r0
; CHECK-NEXT: vldrw.u32 q2, [r7, #-12]
; CHECK-NEXT: vldrw.u32 q2, [r4, #-12]
; CHECK-NEXT: vfma.f32 q0, q4, r5
; CHECK-NEXT: vldrw.u32 q3, [r7, #-8]
; CHECK-NEXT: vldrw.u32 q3, [r4, #-8]
; CHECK-NEXT: vfma.f32 q0, q5, r6
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: vldrw.u32 q1, [r7, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r4, #-4]
; CHECK-NEXT: vfma.f32 q0, q3, r11
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: blo .LBB16_8
; CHECK-NEXT: @ %bb.6: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: add.w r4, r10, #32
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: .LBB16_7: @ %for.body
; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldm.w r4, {r0, r3, r5, r6, r8, r11}
; CHECK-NEXT: vldrw.u32 q1, [r7], #32
; CHECK-NEXT: vldrw.u32 q6, [r7, #-24]
; CHECK-NEXT: vldrw.u32 q4, [r7, #-20]
; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11}
; CHECK-NEXT: vldrw.u32 q1, [r4], #32
; CHECK-NEXT: vldrw.u32 q6, [r4, #-24]
; CHECK-NEXT: vldrw.u32 q4, [r4, #-20]
; CHECK-NEXT: vfma.f32 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r7, #-28]
; CHECK-NEXT: vldrw.u32 q5, [r7, #-16]
; CHECK-NEXT: vldrw.u32 q2, [r7, #-12]
; CHECK-NEXT: vldrw.u32 q1, [r4, #-28]
; CHECK-NEXT: vldrw.u32 q5, [r4, #-16]
; CHECK-NEXT: vldrw.u32 q2, [r4, #-12]
; CHECK-NEXT: vfma.f32 q0, q1, r3
; CHECK-NEXT: ldrd r9, r1, [r4, #24]
; CHECK-NEXT: ldrd r9, r1, [r7, #24]
; CHECK-NEXT: vfma.f32 q0, q6, r5
; CHECK-NEXT: vldrw.u32 q3, [r7, #-8]
; CHECK-NEXT: vldrw.u32 q3, [r4, #-8]
; CHECK-NEXT: vfma.f32 q0, q4, r6
; CHECK-NEXT: vldrw.u32 q1, [r7, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r4, #-4]
; CHECK-NEXT: vfma.f32 q0, q5, r8
; CHECK-NEXT: adds r4, #32
; CHECK-NEXT: adds r7, #32
; CHECK-NEXT: vfma.f32 q0, q2, r11
; CHECK-NEXT: vfma.f32 q0, q3, r9
; CHECK-NEXT: vfma.f32 q0, q1, r1
; CHECK-NEXT: le lr, .LBB16_7
; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: add.w r4, r10, #32
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: .LBB16_10: @ %while.body76
; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldr r0, [r4], #4
; CHECK-NEXT: ldr r0, [r7], #4
; CHECK-NEXT: vldrw.u32 q1, [r3], #4
; CHECK-NEXT: vfma.f32 q0, q1, r0
; CHECK-NEXT: le lr, .LBB16_10
; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: add.w r7, r7, r0, lsl #2
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: add.w r4, r4, r0, lsl #2
; CHECK-NEXT: b .LBB16_4
; CHECK-NEXT: .LBB16_12: @ %if.end
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
Expand Down
529 changes: 287 additions & 242 deletions llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/addcarry.ll
Original file line number Diff line number Diff line change
Expand Up @@ -432,24 +432,24 @@ define i32 @add_U320_without_i128_add(%struct.U320* nocapture dereferenceable(40
; CHECK-NEXT: adcq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rax, %rdx
; CHECK-NEXT: adcq %rcx, %rdx
; CHECK-NEXT: movq 24(%rdi), %r14
; CHECK-NEXT: leaq (%r8,%r14), %r11
; CHECK-NEXT: movq 24(%rdi), %r11
; CHECK-NEXT: leaq (%r8,%r11), %r14
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: cmpq %r10, %rdx
; CHECK-NEXT: setb %bl
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: adcq %r11, %rbx
; CHECK-NEXT: movq 32(%rdi), %rcx
; CHECK-NEXT: leaq (%r9,%rcx), %r10
; CHECK-NEXT: adcq %r14, %rbx
; CHECK-NEXT: movq 32(%rdi), %r10
; CHECK-NEXT: leaq (%r9,%r10), %rcx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: cmpq %r11, %rbx
; CHECK-NEXT: cmpq %r14, %rbx
; CHECK-NEXT: setb %sil
; CHECK-NEXT: addq %r14, %r8
; CHECK-NEXT: adcq %r10, %rsi
; CHECK-NEXT: addq %r11, %r8
; CHECK-NEXT: adcq %rcx, %rsi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq %r10, %rsi
; CHECK-NEXT: cmpq %rcx, %rsi
; CHECK-NEXT: setb %al
; CHECK-NEXT: addq %rcx, %r9
; CHECK-NEXT: addq %r10, %r9
; CHECK-NEXT: movq %rdx, 16(%rdi)
; CHECK-NEXT: movq %rbx, 24(%rdi)
; CHECK-NEXT: movq %rsi, 32(%rdi)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/callbr-asm-blockplacement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ define i32 @foo(i32 %arg, i32 (i8*)* %arg3) nounwind {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_5
; CHECK-NEXT: # %bb.1: # %bb5
; CHECK-NEXT: movq %rsi, %r12
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movslq %edi, %rbp
; CHECK-NEXT: leaq (,%rbp,8), %rax
; CHECK-NEXT: leaq global(%rax,%rax,2), %r14
; CHECK-NEXT: leaq global+4(%rax,%rax,2), %r15
; CHECK-NEXT: leaq global(%rax,%rax,2), %r15
; CHECK-NEXT: leaq global+4(%rax,%rax,2), %r12
; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %bb8
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: callq bar@PLT
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: movq %rax, %rdi
; CHECK-NEXT: callq *%r12
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: callq hoge@PLT
; CHECK-NEXT: callq *%r14
; CHECK-NEXT: movq %r15, %rdi
; CHECK-NEXT: callq hoge@PLT
; CHECK-NEXT: movq %r12, %rdi
; CHECK-NEXT: callq hoge@PLT
; CHECK-NEXT: testb %r13b, %r13b
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.3: # %bb15
Expand Down
17 changes: 9 additions & 8 deletions llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: movl (%r15), %eax
; CHECK-NEXT: leal 8(,%rcx,8), %ecx
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: leaq 8(%r12), %rcx
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: leaq 32(%r12), %rbx
; CHECK-NEXT: shlq $3, %r13
; CHECK-NEXT: xorl %esi, %esi
Expand Down Expand Up @@ -187,17 +189,16 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: leaq 8(%r12), %rax
; CHECK-NEXT: addq %rax, %r10
; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
; CHECK-NEXT: leaq (%r10,%r11,8), %rax
; CHECK-NEXT: cmpq %rcx, %rax
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: leaq -4(%r8), %r10
; CHECK-NEXT: movq %r10, %rax
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: btl $2, %r10d
; CHECK-NEXT: leaq -4(%r8), %rax
; CHECK-NEXT: movq %rax, %r10
; CHECK-NEXT: shrq $2, %r10
; CHECK-NEXT: btl $2, %eax
; CHECK-NEXT: jb .LBB1_8
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand All @@ -206,12 +207,12 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: movdqu %xmm0, (%rdi,%r9,8)
; CHECK-NEXT: movdqu %xmm0, 16(%rdi,%r9,8)
; CHECK-NEXT: movl $4, %r11d
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: testq %r10, %r10
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r11d, %r11d
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: testq %r10, %r10
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/inalloca-invoke.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ blah:
; CHECK: pushl %eax
; CHECK: subl $20, %esp
; CHECK: movl %esp, %[[beg:[^ ]*]]
; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]

call void @begin(%Iter* sret(%Iter) %temp.lvalue)
; CHECK: calll _begin
Expand All @@ -32,7 +33,6 @@ blah:
to label %invoke.cont unwind label %lpad

; Uses end as sret param.
; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
; CHECK: pushl %[[end]]
; CHECK: calll _plus

Expand Down
28 changes: 2 additions & 26 deletions llvm/test/CodeGen/X86/licm-regpressure.ll
Original file line number Diff line number Diff line change
@@ -1,34 +1,10 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux -stop-after=early-machinelicm -o - | FileCheck %s -check-prefix=MIR

; This tests should fail as MachineLICM does not compute register pressure
; This tests currently fails as MachineLICM does not compute register pressure
; correctly. More details: llvm.org/PR23143

; It however does not show any spills because leaq is rematerialized instead
; of spilling.

; Stopping after MachineLICM however exposes all ADD64ri8 instructions
; to be hoisted which still has to be avoided.

; XFAIL: *

; MachineLICM should take register pressure into account.
; CHECK-LABEL: {{^}}test:
; CHECK-NOT: Spill
; CHECK-COUNT-4: leaq
; CHECK-NOT: Spill
; CHECK: [[LOOP:\.LBB[0-9_]+]]:
; CHECK-NOT: Reload
; CHECK-COUNT-2: leaq
; CHECK-NOT: Reload
; CHECK: jne [[LOOP]]

; MIR-LABEL: name: test
; MIR: bb.0.entry:
; MIR-COUNT-4: ADD64ri8
; MIR: bb.1.loop-body:
; MIR-COUNT-2: ADD64ri8
; MIR: JCC_1 %bb.1
; CHECK-NOT: Spill

%struct.A = type { i32, i32, i32, i32, i32, i32, i32 }

Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -91,48 +91,48 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## %bb.10: ## %do.end
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: testb %r13b, %r13b
; CHECK-NEXT: jne LBB0_11
; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx
; CHECK-NEXT: leaq LJTI0_1(%rip), %r13
; CHECK-NEXT: leaq LJTI0_1(%rip), %rbx
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: jmp LBB0_13
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_20: ## %sw.bb256
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: movl %r12d, %r14d
; CHECK-NEXT: movl %r13d, %r14d
; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: decl %r15d
; CHECK-NEXT: testl %r15d, %r15d
; CHECK-NEXT: movl %r14d, %r12d
; CHECK-NEXT: movl %r14d, %r13d
; CHECK-NEXT: jle LBB0_22
; CHECK-NEXT: LBB0_13: ## %while.body200
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_29 Depth 2
; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
; CHECK-NEXT: leal -268(%r12), %eax
; CHECK-NEXT: leal -268(%r13), %eax
; CHECK-NEXT: cmpl $105, %eax
; CHECK-NEXT: ja LBB0_14
; CHECK-NEXT: ## %bb.56: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: movslq (%r13,%rax,4), %rax
; CHECK-NEXT: addq %r13, %rax
; CHECK-NEXT: movslq (%rbx,%rax,4), %rax
; CHECK-NEXT: addq %rbx, %rax
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: movl %r12d, %r14d
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: movl %r13d, %r14d
; CHECK-NEXT: jne LBB0_21
; CHECK-NEXT: jmp LBB0_55
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_14: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: leal 1(%r12), %eax
; CHECK-NEXT: leal 1(%r13), %eax
; CHECK-NEXT: cmpl $21, %eax
; CHECK-NEXT: ja LBB0_20
; CHECK-NEXT: ## %bb.15: ## %while.body200
Expand All @@ -147,12 +147,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: LBB0_26: ## %sw.bb474
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: ## implicit-def: $rbp
; CHECK-NEXT: jne LBB0_34
; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: ## implicit-def: $rbp
; CHECK-NEXT: jne LBB0_34
; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
Expand All @@ -163,7 +163,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: leaq 1(%rbp), %rax
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: je LBB0_33
; CHECK-NEXT: LBB0_29: ## %land.rhs485
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
Expand All @@ -173,13 +173,13 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: movq %rax, %rbp
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_32
; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
; CHECK-NEXT: callq ___maskrune
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_32
; CHECK-NEXT: jmp LBB0_34
; CHECK-NEXT: LBB0_45: ## %sw.bb1134
Expand Down Expand Up @@ -229,13 +229,13 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_38: ## %for.cond534
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_38
; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: movb $0, (%rbp)
; CHECK-NEXT: movl %r12d, %r14d
; CHECK-NEXT: movl %r13d, %r14d
; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx
; CHECK-NEXT: jmp LBB0_21
; CHECK-NEXT: .p2align 4, 0x90
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/sdiv_fix.ll
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-NEXT: movq %r12, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %r13
; X64-NEXT: decq %rax
; X64-NEXT: movq %rax, (%rsp) # 8-byte Spill
; X64-NEXT: testq %rbx, %rbx
; X64-NEXT: sets %al
; X64-NEXT: testq %r12, %r12
Expand All @@ -289,8 +291,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind {
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: testb %bpl, %al
; X64-NEXT: leaq -1(%r13), %rax
; X64-NEXT: cmovneq %rax, %r13
; X64-NEXT: cmovneq (%rsp), %r13 # 8-byte Folded Reload
; X64-NEXT: movq %r13, %rax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
Expand Down