198 changes: 198 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

---
name: zext_trunc_s32_s16_s32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: zext_trunc_s32_s16_s32
; GCN: liveins: $vgpr0
; GCN: %var:_(s32) = COPY $vgpr0
; GCN: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN: %low_bits:_(s32) = G_AND %var, %c3FFF
; GCN: $vgpr0 = COPY %low_bits(s32)
%var:_(s32) = COPY $vgpr0
%c3FFF:_(s32) = G_CONSTANT i32 16383
%low_bits:_(s32) = G_AND %var, %c3FFF
%trunc:_(s16) = G_TRUNC %low_bits(s32)
%zext:_(s32) = G_ZEXT %trunc(s16)
$vgpr0 = COPY %zext(s32)
...

---
name: zext_trunc_s32_s16_s32_unknown_high_bits
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: zext_trunc_s32_s16_s32_unknown_high_bits
; GCN: liveins: $vgpr0
; GCN: %var:_(s32) = COPY $vgpr0
; GCN: %cFFFFF:_(s32) = G_CONSTANT i32 1048575
; GCN: %low_bits:_(s32) = G_AND %var, %cFFFFF
; GCN: %trunc:_(s16) = G_TRUNC %low_bits(s32)
; GCN: %zext:_(s32) = G_ZEXT %trunc(s16)
; GCN: $vgpr0 = COPY %zext(s32)
%var:_(s32) = COPY $vgpr0
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
%low_bits:_(s32) = G_AND %var, %cFFFFF
%trunc:_(s16) = G_TRUNC %low_bits(s32)
%zext:_(s32) = G_ZEXT %trunc(s16)
$vgpr0 = COPY %zext(s32)
...

---
name: zext_trunc_s64_s16_s32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: zext_trunc_s64_s16_s32
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(s64) = COPY $vgpr0_vgpr1
; GCN: %c3FFF:_(s64) = G_CONSTANT i64 16383
; GCN: %low_bits:_(s64) = G_AND %var, %c3FFF
; GCN: %trunc:_(s16) = G_TRUNC %low_bits(s64)
; GCN: %zext:_(s32) = G_ZEXT %trunc(s16)
; GCN: $vgpr0 = COPY %zext(s32)
%var:_(s64) = COPY $vgpr0_vgpr1
%c3FFF:_(s64) = G_CONSTANT i64 16383
%low_bits:_(s64) = G_AND %var, %c3FFF
%trunc:_(s16) = G_TRUNC %low_bits(s64)
%zext:_(s32) = G_ZEXT %trunc(s16)
$vgpr0 = COPY %zext(s32)
...

---
name: zext_trunc_s32_s16_s64
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; GCN-LABEL: name: zext_trunc_s32_s16_s64
; GCN: liveins: $vgpr0
; GCN: %var:_(s32) = COPY $vgpr0
; GCN: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN: %low_bits:_(s32) = G_AND %var, %c3FFF
; GCN: %trunc:_(s16) = G_TRUNC %low_bits(s32)
; GCN: %zext:_(s64) = G_ZEXT %trunc(s16)
; GCN: $vgpr0_vgpr1 = COPY %zext(s64)
%var:_(s32) = COPY $vgpr0
%c3FFF:_(s32) = G_CONSTANT i32 16383
%low_bits:_(s32) = G_AND %var, %c3FFF
%trunc:_(s16) = G_TRUNC %low_bits(s32)
%zext:_(s64) = G_ZEXT %trunc(s16)
$vgpr0_vgpr1 = COPY %zext(s64)
...

---
name: zext_trunc_v2s32_v2s16_v2s32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s32
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GCN: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN: %c7FFF:_(s32) = G_CONSTANT i32 32767
; GCN: %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32)
; GCN: %low_bits:_(<2 x s32>) = G_AND %var, %c
; GCN: $vgpr0_vgpr1 = COPY %low_bits(<2 x s32>)
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
%c3FFF:_(s32) = G_CONSTANT i32 16383
%c7FFF:_(s32) = G_CONSTANT i32 32767
%c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32)
%low_bits:_(<2 x s32>) = G_AND %var, %c
%trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
%zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
$vgpr0_vgpr1 = COPY %zext(<2 x s32>)
...

---
name: zext_trunc_v2s32_v2s16_v2s32_unknown_high_bits
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s32_unknown_high_bits
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GCN: %cFFFFF:_(s32) = G_CONSTANT i32 1048575
; GCN: %c7FFF:_(s32) = G_CONSTANT i32 32767
; GCN: %c:_(<2 x s32>) = G_BUILD_VECTOR %cFFFFF(s32), %c7FFF(s32)
; GCN: %low_bits:_(<2 x s32>) = G_AND %var, %c
; GCN: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
; GCN: %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
; GCN: $vgpr0_vgpr1 = COPY %zext(<2 x s32>)
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
%c7FFF:_(s32) = G_CONSTANT i32 32767
%c:_(<2 x s32>) = G_BUILD_VECTOR %cFFFFF(s32), %c7FFF(s32)
%low_bits:_(<2 x s32>) = G_AND %var, %c
%trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
%zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
$vgpr0_vgpr1 = COPY %zext(<2 x s32>)
...

---
name: zext_trunc_v2s64_v2s16_v2s32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GCN-LABEL: name: zext_trunc_v2s64_v2s16_v2s32
; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GCN: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GCN: %c3FFF:_(s64) = G_CONSTANT i64 16383
; GCN: %c7FFF:_(s64) = G_CONSTANT i64 32767
; GCN: %c:_(<2 x s64>) = G_BUILD_VECTOR %c3FFF(s64), %c7FFF(s64)
; GCN: %low_bits:_(<2 x s64>) = G_AND %var, %c
; GCN: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s64>)
; GCN: %zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
; GCN: $vgpr0_vgpr1 = COPY %zext(<2 x s32>)
%var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%c3FFF:_(s64) = G_CONSTANT i64 16383
%c7FFF:_(s64) = G_CONSTANT i64 32767
%c:_(<2 x s64>) = G_BUILD_VECTOR %c3FFF(s64), %c7FFF(s64)
%low_bits:_(<2 x s64>) = G_AND %var, %c
%trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s64>)
%zext:_(<2 x s32>) = G_ZEXT %trunc(<2 x s16>)
$vgpr0_vgpr1 = COPY %zext(<2 x s32>)
...

---
name: zext_trunc_v2s32_v2s16_v2s64
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GCN-LABEL: name: zext_trunc_v2s32_v2s16_v2s64
; GCN: liveins: $vgpr0_vgpr1
; GCN: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GCN: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN: %c7FFF:_(s32) = G_CONSTANT i32 32767
; GCN: %c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32)
; GCN: %low_bits:_(<2 x s32>) = G_AND %var, %c
; GCN: %trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
; GCN: %zext:_(<2 x s64>) = G_ZEXT %trunc(<2 x s16>)
; GCN: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x s64>)
%var:_(<2 x s32>) = COPY $vgpr0_vgpr1
%c3FFF:_(s32) = G_CONSTANT i32 16383
%c7FFF:_(s32) = G_CONSTANT i32 32767
%c:_(<2 x s32>) = G_BUILD_VECTOR %c3FFF(s32), %c7FFF(s32)
%low_bits:_(<2 x s32>) = G_AND %var, %c
%trunc:_(<2 x s16>) = G_TRUNC %low_bits(<2 x s32>)
%zext:_(<2 x s64>) = G_ZEXT %trunc(<2 x s16>)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %zext(<2 x s64>)
...
6 changes: 0 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ define void @constrained_if_register_class() {
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
; CHECK-NEXT: s_and_b32 s4, s4, 1
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 BB4_4
; CHECK-NEXT: ; %bb.1: ; %bb2
; CHECK-NEXT: s_getpc_b64 s[6:7]
Expand All @@ -161,9 +158,6 @@ define void @constrained_if_register_class() {
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: BB4_3: ; %bb8
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
; CHECK-NEXT: s_and_b32 s4, s4, 1
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc0 BB4_5
; CHECK-NEXT: BB4_4: ; %bb12
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
; GCN-NEXT: s_load_dword s0, s[4:5], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s1, 0
; GCN-NEXT: s_cselect_b32 s1, 1, 0
; GCN-NEXT: s_and_b32 s1, s1, 1
; GCN-NEXT: s_cmp_lg_u32 s1, 0
; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %mid
; GCN-NEXT: v_mov_b32_e32 v0, 0
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) {
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cselect_b32 s2, 1, 0
; GCN-NEXT: s_and_b32 s2, s2, 1
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cbranch_scc1 BB0_2
; GCN-NEXT: ; %bb.1: ; %mid
; GCN-NEXT: v_mov_b32_e32 v0, 0
Expand Down
6 changes: 0 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
; CI-NEXT: s_load_dword s0, s[4:5], 0x11
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cselect_b32 s0, 1, 0
; CI-NEXT: s_and_b32 s0, s0, 1
; CI-NEXT: s_cmp_lg_u32 s0, 0
; CI-NEXT: s_cbranch_scc1 BB1_2
; CI-NEXT: ; %bb.1: ; %bb0
; CI-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -71,9 +68,6 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
Expand Down
6 changes: 0 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
; CI-NEXT: s_load_dword s0, s[4:5], 0x10
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_cmp_lg_u32 s1, s0
; CI-NEXT: s_cselect_b32 s0, 1, 0
; CI-NEXT: s_and_b32 s0, s0, 1
; CI-NEXT: s_cmp_lg_u32 s0, 0
; CI-NEXT: s_cbranch_scc1 BB1_2
; CI-NEXT: ; %bb.1: ; %bb0
; CI-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -71,9 +68,6 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 16, 16)
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
; GFX9-NEXT: s_cmp_lg_u32 s1, s0
; GFX9-NEXT: s_cselect_b32 s0, 1, 0
; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 BB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,18 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
; GCN-LABEL: set_inactive_scc:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_buffer_load_dword s2, s[4:7], 0x0
; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s2, 56
; GCN-NEXT: s_buffer_load_dword s1, s[8:11], 0x0
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s1, 56
; GCN-NEXT: s_cselect_b32 s0, 1, 0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 42
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_and_b32 s0, s0, 1
; GCN-NEXT: s_cmp_lg_u32 s0, 0
; GCN-NEXT: s_cbranch_scc0 BB2_2
; GCN-NEXT: ; %bb.1: ; %.one
Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,12 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cselect_b32 s6, 1, 0
; GCN-NEXT: s_and_b32 s6, s6, 1
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_cbranch_scc1 BB0_3
; GCN-NEXT: ; %bb.1: ; %bb.0
; GCN-NEXT: s_load_dword s6, s[4:5], 0xc
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cselect_b32 s6, 1, 0
; GCN-NEXT: s_and_b32 s6, s6, 1
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cbranch_scc1 BB0_3
; GCN-NEXT: ; %bb.2: ; %bb.1
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
Expand Down Expand Up @@ -102,9 +96,6 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cselect_b32 s6, 1, 0
; GCN-NEXT: s_and_b32 s6, s6, 1
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_cbranch_scc1 BB1_2
; GCN-NEXT: ; %bb.1: ; %bb.0
Expand Down
260 changes: 118 additions & 142 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll

Large diffs are not rendered by default.

13 changes: 3 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -404,18 +404,14 @@ define amdgpu_ps i32 @s_shl_i32_zext_i16(i16 inreg %x) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff
; GFX8-NEXT: s_bfe_u32 s1, 2, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, s1
; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX8-NEXT: s_lshl_b32 s0, s0, 2
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_shl_i32_zext_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff
; GFX9-NEXT: s_bfe_u32 s1, 2, 0x100000
; GFX9-NEXT: s_lshl_b32 s0, s0, s1
; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: ; return to shader part epilog
%and = and i16 %x, 16383
%ext = zext i16 %and to i32
Expand Down Expand Up @@ -464,13 +460,10 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX8-LABEL: s_shl_v2i32_zext_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_movk_i32 s2, 0x3fff
; GFX8-NEXT: s_mov_b32 s4, 0xffff
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: s_mov_b32 s3, s2
; GFX8-NEXT: s_and_b32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_mov_b32 s5, s4
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX8-NEXT: s_lshl_b32 s0, s0, 2
; GFX8-NEXT: s_lshl_b32 s1, s1, 2
; GFX8-NEXT: ; return to shader part epilog
Expand Down
260 changes: 118 additions & 142 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll

Large diffs are not rendered by default.

46 changes: 7 additions & 39 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -215,45 +215,13 @@ define i32 @v_urem_i32_pow2k_denom(i32 %num) {
}

define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
; GISEL-LABEL: v_urem_v2i32_pow2k_denom:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_movk_i32 s4, 0x1000
; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4
; GISEL-NEXT: s_sub_i32 s5, 0, s4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2
; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2
; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2
; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3
; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i32_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_add_i32 s4, 0x1000, -1
; CGP-NEXT: v_and_b32_e32 v0, s4, v0
; CGP-NEXT: v_and_b32_e32 v1, s4, v1
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_add_i32 s4, 0x1000, -1
; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
; CHECK-NEXT: v_and_b32_e32 v1, s4, v1
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i32> %num, <i32 4096, i32 4096>
ret <2 x i32> %result
}
Expand Down
299 changes: 19 additions & 280 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -962,286 +962,25 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) {
}

define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
; GISEL-LABEL: v_urem_v2i64_pow2k_denom:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: s_movk_i32 s10, 0x1000
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10
; GISEL-NEXT: s_sub_u32 s8, 0, s10
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, v4
; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s9, 0, 0
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4
; GISEL-NEXT: s_sub_u32 s11, 0, s10
; GISEL-NEXT: s_cselect_b32 s4, 1, 0
; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
; GISEL-NEXT: v_trunc_f32_e32 v6, v6
; GISEL-NEXT: s_and_b32 s4, s4, 1
; GISEL-NEXT: v_trunc_f32_e32 v7, v7
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
; GISEL-NEXT: s_cmp_lg_u32 s4, 0
; GISEL-NEXT: s_subb_u32 s6, 0, 0
; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7
; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10
; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10
; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13
; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13
; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8
; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8
; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9
; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16
; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18
; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc
; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4
; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4
; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4
; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13
; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5]
; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9
; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5
; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5
; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10
; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8
; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8
; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8
; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16
; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9
; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19
; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9
; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12
; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15
; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11
; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19
; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11
; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11
; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11
; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12
; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12
; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12
; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14
; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9]
; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19
; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9]
; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14
; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18
; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12
; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16
; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14
; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12
; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11
; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc
; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5]
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4
; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc
; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5
; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6
; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6
; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6
; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7
; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7
; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7
; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5
; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8
; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4
; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4
; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5
; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5
; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6
; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5]
; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5
; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7]
; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7]
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc
; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8
; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc
; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9
; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc
; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5]
; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5]
; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; CGP-LABEL: v_urem_v2i64_pow2k_denom:
; CGP: ; %bb.0:
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CGP-NEXT: s_movk_i32 s4, 0x1000
; CGP-NEXT: s_add_u32 s5, s4, -1
; CGP-NEXT: s_cselect_b32 s6, 1, 0
; CGP-NEXT: s_and_b32 s6, s6, 1
; CGP-NEXT: s_cmp_lg_u32 s6, 0
; CGP-NEXT: s_addc_u32 s6, 0, -1
; CGP-NEXT: s_add_u32 s4, s4, -1
; CGP-NEXT: s_cselect_b32 s7, 1, 0
; CGP-NEXT: v_and_b32_e32 v0, s5, v0
; CGP-NEXT: s_and_b32 s5, s7, 1
; CGP-NEXT: v_and_b32_e32 v1, s6, v1
; CGP-NEXT: s_cmp_lg_u32 s5, 0
; CGP-NEXT: s_addc_u32 s5, 0, -1
; CGP-NEXT: v_and_b32_e32 v2, s4, v2
; CGP-NEXT: v_and_b32_e32 v3, s5, v3
; CGP-NEXT: s_setpc_b64 s[30:31]
; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_movk_i32 s4, 0x1000
; CHECK-NEXT: s_add_u32 s5, s4, -1
; CHECK-NEXT: s_cselect_b32 s6, 1, 0
; CHECK-NEXT: s_and_b32 s6, s6, 1
; CHECK-NEXT: s_cmp_lg_u32 s6, 0
; CHECK-NEXT: s_addc_u32 s6, 0, -1
; CHECK-NEXT: s_add_u32 s4, s4, -1
; CHECK-NEXT: s_cselect_b32 s7, 1, 0
; CHECK-NEXT: v_and_b32_e32 v0, s5, v0
; CHECK-NEXT: s_and_b32 s5, s7, 1
; CHECK-NEXT: v_and_b32_e32 v1, s6, v1
; CHECK-NEXT: s_cmp_lg_u32 s5, 0
; CHECK-NEXT: s_addc_u32 s5, 0, -1
; CHECK-NEXT: v_and_b32_e32 v2, s4, v2
; CHECK-NEXT: v_and_b32_e32 v3, s5, v3
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = urem <2 x i64> %num, <i64 4096, i64 4096>
ret <2 x i64> %result
}
Expand Down
1 change: 1 addition & 0 deletions llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ add_llvm_unittest(GlobalISelTests
GISelMITest.cpp
PatternMatchTest.cpp
KnownBitsTest.cpp
KnownBitsVectorTest.cpp
GISelUtilsTest.cpp
)
553 changes: 553 additions & 0 deletions llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp

Large diffs are not rendered by default.

1,527 changes: 1,527 additions & 0 deletions llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp

Large diffs are not rendered by default.