142 changes: 142 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=WAVE64 %s
; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck -check-prefix=WAVE32 %s

; This was mishandling the constant true and false values used as a
; scalar branch condition.

define void @br_false() {
; WAVE64-LABEL: br_false:
; WAVE64: ; %bb.0: ; %.exit
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-NEXT: .LBB0_1: ; %bb0
; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE64-NEXT: s_mov_b32 s4, 1
; WAVE64-NEXT: s_cmp_lg_u32 s4, 0
; WAVE64-NEXT: s_cbranch_scc1 .LBB0_1
; WAVE64-NEXT: ; %bb.2: ; %.exit5
; WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-LABEL: br_false:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB0_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: s_mov_b32 s4, 1
; WAVE32-NEXT: s_cmp_lg_u32 s4, 0
; WAVE32-NEXT: s_cbranch_scc1 .LBB0_1
; WAVE32-NEXT: ; %bb.2: ; %.exit5
; WAVE32-NEXT: s_setpc_b64 s[30:31]
.exit:
br label %bb0

bb0:
br i1 false, label %.exit5, label %bb0

.exit5:
ret void
}

define void @br_true() {
; WAVE64-LABEL: br_true:
; WAVE64: ; %bb.0: ; %.exit
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-NEXT: .LBB1_1: ; %bb0
; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE64-NEXT: s_mov_b32 s4, 0
; WAVE64-NEXT: s_cmp_lg_u32 s4, 0
; WAVE64-NEXT: s_cbranch_scc1 .LBB1_1
; WAVE64-NEXT: ; %bb.2: ; %.exit5
; WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-LABEL: br_true:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB1_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: s_mov_b32 s4, 0
; WAVE32-NEXT: s_cmp_lg_u32 s4, 0
; WAVE32-NEXT: s_cbranch_scc1 .LBB1_1
; WAVE32-NEXT: ; %bb.2: ; %.exit5
; WAVE32-NEXT: s_setpc_b64 s[30:31]
.exit:
br label %bb0

bb0:
br i1 true, label %.exit5, label %bb0

.exit5:
ret void
}

define void @br_undef() {
; WAVE64-LABEL: br_undef:
; WAVE64: ; %bb.0: ; %.exit
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-NEXT: .LBB2_1: ; %bb0
; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE64-NEXT: ; implicit-def: $sgpr4
; WAVE64-NEXT: s_and_b32 s4, s4, 1
; WAVE64-NEXT: s_cmp_lg_u32 s4, 0
; WAVE64-NEXT: s_cbranch_scc1 .LBB2_1
; WAVE64-NEXT: ; %bb.2: ; %.exit5
; WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-LABEL: br_undef:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB2_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: ; implicit-def: $sgpr4
; WAVE32-NEXT: s_and_b32 s4, s4, 1
; WAVE32-NEXT: s_cmp_lg_u32 s4, 0
; WAVE32-NEXT: s_cbranch_scc1 .LBB2_1
; WAVE32-NEXT: ; %bb.2: ; %.exit5
; WAVE32-NEXT: s_setpc_b64 s[30:31]
.exit:
br label %bb0

bb0:
br i1 undef, label %.exit5, label %bb0

.exit5:
ret void
}

define void @br_poison() {
; WAVE64-LABEL: br_poison:
; WAVE64: ; %bb.0: ; %.exit
; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-NEXT: .LBB3_1: ; %bb0
; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE64-NEXT: ; implicit-def: $sgpr4
; WAVE64-NEXT: s_and_b32 s4, s4, 1
; WAVE64-NEXT: s_cmp_lg_u32 s4, 0
; WAVE64-NEXT: s_cbranch_scc1 .LBB3_1
; WAVE64-NEXT: ; %bb.2: ; %.exit5
; WAVE64-NEXT: s_setpc_b64 s[30:31]
;
; WAVE32-LABEL: br_poison:
; WAVE32: ; %bb.0: ; %.exit
; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
; WAVE32-NEXT: .LBB3_1: ; %bb0
; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
; WAVE32-NEXT: ; implicit-def: $sgpr4
; WAVE32-NEXT: s_and_b32 s4, s4, 1
; WAVE32-NEXT: s_cmp_lg_u32 s4, 0
; WAVE32-NEXT: s_cbranch_scc1 .LBB3_1
; WAVE32-NEXT: ; %bb.2: ; %.exit5
; WAVE32-NEXT: s_setpc_b64 s[30:31]
.exit:
br label %bb0

bb0:
br i1 poison, label %.exit5, label %bb0

.exit5:
ret void
}
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
Original file line number Diff line number Diff line change
Expand Up @@ -584,3 +584,53 @@ body: |
%7:vgpr(p999) = G_CONSTANT i64 18446744004990098135
S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
...

---
name: zext_sgpr_s1_to_sgpr_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
; WAVE64-LABEL: name: zext_sgpr_s1_to_sgpr_s32
; WAVE64: bb.0:
; WAVE64-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]]
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
; WAVE64-NEXT: $scc = COPY [[S_AND_B32_]]
; WAVE64-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
; WAVE64-NEXT: S_BRANCH %bb.2
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: bb.1:
; WAVE64-NEXT: successors: %bb.2(0x80000000)
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: {{ $}}
; WAVE64-NEXT: bb.2:
; WAVE32-LABEL: name: zext_sgpr_s1_to_sgpr_s32
; WAVE32: bb.0:
; WAVE32-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 1, implicit-def $scc
; WAVE32-NEXT: $scc = COPY [[S_AND_B32_]]
; WAVE32-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
; WAVE32-NEXT: S_BRANCH %bb.2
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: bb.1:
; WAVE32-NEXT: successors: %bb.2(0x80000000)
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: {{ $}}
; WAVE32-NEXT: bb.2:
bb.0:
%0:sgpr(s1) = G_CONSTANT i1 true
%1:sgpr(s32) = G_ZEXT %0
G_BRCOND %1, %bb.1
G_BR %bb.2
bb.1:
bb.2:
...
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
; GCN-NEXT: s_buffer_load_dword s2, s[4:7], 0x0
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_mov_b32 s4, 1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s2, 56
; GCN-NEXT: s_cselect_b32 s2, 1, 0
Expand All @@ -63,19 +64,16 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
; GCN-NEXT: s_branch .LBB2_3
; GCN-NEXT: .LBB2_2:
; GCN-NEXT: s_mov_b32 s4, -1
; GCN-NEXT: .LBB2_3: ; %Flow
; GCN-NEXT: s_xor_b32 s2, s4, -1
; GCN-NEXT: .LBB2_2: ; %Flow
; GCN-NEXT: s_xor_b32 s2, s4, 1
; GCN-NEXT: s_and_b32 s2, s2, 1
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cbranch_scc1 .LBB2_5
; GCN-NEXT: ; %bb.4: ; %.zero
; GCN-NEXT: s_cbranch_scc1 .LBB2_4
; GCN-NEXT: ; %bb.3: ; %.zero
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: .LBB2_5: ; %.exit
; GCN-NEXT: .LBB2_4: ; %.exit
; GCN-NEXT: s_endpgm
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
%cmp = icmp eq i32 %val, 56
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-LABEL: localize_constants:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s0, -1
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, -1
; GFX9-NEXT: s_xor_b32 s1, s1, 1
; GFX9-NEXT: s_and_b32 s1, s1, 1
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-NEXT: s_cbranch_scc0 .LBB0_2
Expand All @@ -35,7 +35,7 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: .LBB0_2: ; %Flow
; GFX9-NEXT: s_xor_b32 s0, s0, -1
; GFX9-NEXT: s_xor_b32 s0, s0, 1
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB0_4
Expand Down Expand Up @@ -96,9 +96,9 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-LABEL: localize_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s0, -1
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, -1
; GFX9-NEXT: s_xor_b32 s1, s1, 1
; GFX9-NEXT: s_and_b32 s1, s1, 1
; GFX9-NEXT: s_cmp_lg_u32 s1, 0
; GFX9-NEXT: s_cbranch_scc0 .LBB1_2
Expand All @@ -120,7 +120,7 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: .LBB1_2: ; %Flow
; GFX9-NEXT: s_xor_b32 s0, s0, -1
; GFX9-NEXT: s_xor_b32 s0, s0, 1
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB1_4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ legalized: true
body: |
bb.0:
; CHECK-LABEL: name: kill_constant_true
; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1)
%0:_(s1) = G_CONSTANT i1 true
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0
Expand All @@ -65,8 +66,9 @@ legalized: true
body: |
bb.0:
; CHECK-LABEL: name: kill_constant_false
; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), [[COPY]](s1)
%0:_(s1) = G_CONSTANT i1 false
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.kill), %0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ legalized: true
body: |
bb.0:
; CHECK-LABEL: name: wqm_demote_constant_true
; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1)
%0:_(s1) = G_CONSTANT i1 true
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0
Expand All @@ -65,8 +66,9 @@ legalized: true
body: |
bb.0:
; CHECK-LABEL: name: wqm_demote_constant_false
; CHECK: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), [[COPY]](s1)
%0:_(s1) = G_CONSTANT i1 false
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.wqm.demote), %0
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir
Original file line number Diff line number Diff line change
Expand Up @@ -833,8 +833,9 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C1]](s1)
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[COPY2]]
; CHECK-NEXT: S_NOP 0, implicit [[XOR]](s1)
%0:_(s32) = COPY $vgpr0
Expand Down
39 changes: 20 additions & 19 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,12 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-LABEL: s_sdiv_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_or_b64 s[6:7], s[2:3], s[4:5]
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_mov_b32 s1, -1
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[0:1]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
; CHECK-NEXT: s_mov_b32 s7, -1
; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[0:1], 0
; CHECK-NEXT: s_mov_b32 s0, 1
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: s_ashr_i32 s6, s3, 31
Expand Down Expand Up @@ -326,12 +327,12 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9]
; CHECK-NEXT: v_xor_b32_e32 v0, s0, v0
; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
; CHECK-NEXT: s_mov_b32 s1, 0
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s1, -1
; CHECK-NEXT: s_xor_b32 s0, s0, 1
; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
Expand Down Expand Up @@ -1091,7 +1092,7 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2]
; CHECK-NEXT: v_mov_b32_e32 v6, 0x1000
; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
Expand All @@ -1104,7 +1105,7 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
; CHECK-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5
; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; CHECK-NEXT: v_mov_b32_e32 v6, s4
Expand Down Expand Up @@ -1404,7 +1405,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: s_movk_i32 s7, 0x1000
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
Expand Down Expand Up @@ -1517,7 +1518,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_mov_b32_e32 v15, s4
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
Expand Down Expand Up @@ -1622,7 +1623,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
Expand All @@ -1632,7 +1633,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v11
; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CGP-NEXT: v_mov_b32_e32 v5, s4
Expand Down Expand Up @@ -1755,7 +1756,7 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], 0, v5, v[1:2]
; CHECK-NEXT: v_mov_b32_e32 v6, 0x12d8fb
; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
; CHECK-NEXT: s_bfe_i32 s6, 1, 0x10000
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
Expand All @@ -1768,7 +1769,7 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5]
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v5
; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc
; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000
; CHECK-NEXT: s_bfe_i32 s4, 1, 0x10000
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; CHECK-NEXT: v_mov_b32_e32 v6, s4
Expand Down Expand Up @@ -2068,7 +2069,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: s_mov_b32 s7, 0x12d8fb
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s8, 1, 0x10000
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; CGP-NEXT: v_trunc_f32_e32 v6, v5
Expand Down Expand Up @@ -2181,7 +2182,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_mac_f32_e32 v1, 0xcf800000, v6
; CGP-NEXT: v_cvt_u32_f32_e32 v13, v1
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
; CGP-NEXT: v_mov_b32_e32 v15, s4
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v13, 0
Expand Down Expand Up @@ -2286,7 +2287,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s6, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
Expand All @@ -2296,7 +2297,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5]
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v11
; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
; CGP-NEXT: s_bfe_i32 s4, 1, 0x10000
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
; CGP-NEXT: v_mov_b32_e32 v5, s4
Expand Down
527 changes: 256 additions & 271 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll

Large diffs are not rendered by default.

19 changes: 10 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,11 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-LABEL: s_udiv_i64:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s4, 1
; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3]
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_mov_b32 s5, -1
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: s_mov_b32 s9, -1
; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
; CHECK-NEXT: s_cbranch_vccz .LBB1_2
Expand Down Expand Up @@ -316,12 +317,12 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_branch .LBB1_3
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s1, s5, -1
; CHECK-NEXT: s_xor_b32 s1, s4, 1
; CHECK-NEXT: s_and_b32 s1, s1, 1
; CHECK-NEXT: s_cmp_lg_u32 s1, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
Expand Down Expand Up @@ -1971,10 +1972,10 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; GISEL-NEXT: v_add_i32_e32 v20, vcc, v20, v6
; GISEL-NEXT: v_and_b32_e32 v6, 0xffffff, v0
; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v2
; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s6, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s7, -1, 0x10000
; GISEL-NEXT: s_bfe_i32 s4, 1, 0x10000
; GISEL-NEXT: s_bfe_i32 s5, 1, 0x10000
; GISEL-NEXT: s_bfe_i32 s6, 1, 0x10000
; GISEL-NEXT: s_bfe_i32 s7, 1, 0x10000
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v18, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
Expand Down
1,185 changes: 584 additions & 601 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

Large diffs are not rendered by default.