170 changes: 170 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,35 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN

---
name: select_s32_scc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; GCN-LABEL: name: select_s32_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY4]]
; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[COPY3]], implicit $scc
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = COPY $sgpr3
%4:sgpr(s32) = G_ICMP intpred(eq), %0, %1
%5:sgpr(s32) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...

---
name: select_s64_scc
legalized: true
Expand Down Expand Up @@ -30,6 +59,93 @@ body: |
...

---
name: select_p0_scc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; GCN-LABEL: name: select_p0_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY4]]
; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p0) = COPY $sgpr2_sgpr3
%3:sgpr(p0) = COPY $sgpr4_sgpr5
%4:sgpr(s32) = G_ICMP intpred(eq), %0, %1
%5:sgpr(p0) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...

---
name: select_p1_scc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; GCN-LABEL: name: select_p1_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY4]]
; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p1) = COPY $sgpr2_sgpr3
%3:sgpr(p1) = COPY $sgpr4_sgpr5
%4:sgpr(s32) = G_ICMP intpred(eq), %0, %1
%5:sgpr(p1) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...

---
name: select_p999_scc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; GCN-LABEL: name: select_p999_scc
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; GCN: [[COPY3:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY4]]
; GCN: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64 = S_CSELECT_B64 [[COPY2]], [[COPY3]], implicit $scc
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(p999) = COPY $sgpr2_sgpr3
%3:sgpr(p999) = COPY $sgpr4_sgpr5
%4:sgpr(s32) = G_ICMP intpred(eq), %0, %1
%5:sgpr(p999) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...

---
name: select_v4s16_scc
legalized: true
Expand Down Expand Up @@ -119,6 +235,33 @@ body: |
...

---
name: select_s32_vcc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: select_s32_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = COPY $vgpr3
%4:vcc(s1) = G_ICMP intpred(eq), %0, %1
%5:vgpr(s32) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...

---
name: select_s16_vcc
legalized: true
Expand Down Expand Up @@ -174,3 +317,30 @@ body: |
S_ENDPGM 0, implicit %5
...

---
name: select_p3_vcc
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GCN-LABEL: name: select_p3_vcc
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(p3) = COPY $vgpr2
%3:vgpr(p3) = COPY $vgpr3
%4:vcc(s1) = G_ICMP intpred(eq), %0, %1
%5:vgpr(p3) = G_SELECT %4, %2, %3
S_ENDPGM 0, implicit %5
...
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,38 @@ body: |
%5:_(p1) = G_SELECT %4, %2, %3
...

---
name: select_p999_scc_ss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; FAST-LABEL: name: select_p999_scc_ss
; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; FAST: [[COPY2:%[0-9]+]]:sgpr(p999) = COPY $sgpr2_sgpr3
; FAST: [[COPY3:%[0-9]+]]:sgpr(p999) = COPY $sgpr4_sgpr5
; FAST: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
; FAST: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
; FAST: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]]
; GREEDY-LABEL: name: select_p999_scc_ss
; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GREEDY: [[COPY2:%[0-9]+]]:sgpr(p999) = COPY $sgpr2_sgpr3
; GREEDY: [[COPY3:%[0-9]+]]:sgpr(p999) = COPY $sgpr4_sgpr5
; GREEDY: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
; GREEDY: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1)
; GREEDY: [[SELECT:%[0-9]+]]:sgpr(p999) = G_SELECT [[ZEXT]](s32), [[COPY2]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(p999) = COPY $sgpr2_sgpr3
%3:_(p999) = COPY $sgpr4_sgpr5
%4:_(s1) = G_ICMP intpred(ne), %0, %1
%5:_(p999) = G_SELECT %4, %2, %3
...

---
name: select_p1_scc_sv
legalized: true
Expand Down Expand Up @@ -1444,6 +1476,42 @@ body: |
%5:_(p1) = G_SELECT %4, %2, %3
...

---
name: select_p999_vcc_vv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; FAST-LABEL: name: select_p999_vcc_vv
; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; FAST: [[COPY2:%[0-9]+]]:vgpr(p999) = COPY $vgpr2_vgpr3
; FAST: [[COPY3:%[0-9]+]]:vgpr(p999) = COPY $vgpr4_vgpr5
; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; FAST: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p999)
; FAST: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p999)
; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
; FAST: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
; FAST: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
; GREEDY-LABEL: name: select_p999_vcc_vv
; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GREEDY: [[COPY2:%[0-9]+]]:vgpr(p999) = COPY $vgpr2_vgpr3
; GREEDY: [[COPY3:%[0-9]+]]:vgpr(p999) = COPY $vgpr4_vgpr5
; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p999)
; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p999)
; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]]
; GREEDY: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]]
; GREEDY: [[MV:%[0-9]+]]:vgpr(p999) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p999) = COPY $vgpr2_vgpr3
%3:_(p999) = COPY $vgpr4_vgpr5
%4:_(s1) = G_ICMP intpred(ne), %0, %1
%5:_(p999) = G_SELECT %4, %2, %3
...

---
name: select_s32_vgpr_vv
legalized: true
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(i32 addrspace(1)* %ou
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u16_e32 v2, 0xffffffc0, v3
; VI-NEXT: v_subrev_u16_e32 v2, 64, v3
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand All @@ -883,7 +883,7 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(i32 addrspace(1)* %ou
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_add_u16_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_subrev_u16_e32 v2, 64, v3
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1045,9 +1045,9 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(<2 x i16> addrspace(1)* %out
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u16_e32 v2, 0xffffffc0, v4
; VI-NEXT: v_sub_u16_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v2, v2, v3
; VI-NEXT: v_sub_u16_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_subrev_u16_e32 v3, 64, v4
; VI-NEXT: v_or_b32_e32 v2, v3, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1212,9 +1212,9 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %ou
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u16_e32 v2, 0xffffffc0, v4
; VI-NEXT: v_add_u16_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v2, v2, v3
; VI-NEXT: v_add_u16_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_subrev_u16_e32 v3, 64, v4
; VI-NEXT: v_or_b32_e32 v2, v3, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1614,9 +1614,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(<2 x i16> addrspace(1)
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u16_e32 v2, 0xffffffe0, v4
; VI-NEXT: v_sub_u16_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v2, v2, v3
; VI-NEXT: v_sub_u16_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_subrev_u16_e32 v3, 32, v4
; VI-NEXT: v_or_b32_e32 v2, v3, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1774,7 +1774,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(<2 x i16> addrspace(1)* %o
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_and_b32_e32 v2, 0xffff0000, v3
; VI-NEXT: v_add_u16_e32 v3, 0xffffffe0, v3
; VI-NEXT: v_subrev_u16_e32 v3, 32, v3
; VI-NEXT: v_or_b32_e32 v2, v3, v2
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
; VI-NEXT: v_add_u16_e32 v0, 0xffffffe0, v0
; VI-NEXT: v_subrev_u16_e32 v0, 32, v0
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
Expand Down