Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2480,6 +2480,22 @@ def : AMDGPUPatIgnoreCopies <
(i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;

// (z & ~x)
def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<and> i32:$z, (not_oneuse i32:$x)),
(V_BFI_B32_e64 VSrc_b32:$x, (i32 0), VSrc_b32:$z)
>;

// 64-bit version
def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<and> i64:$z, (not_oneuse i64:$x)),
(REG_SEQUENCE VReg_64,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)), (i32 0),
(i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)), (i32 0),
(i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;

// SHA-256 Ch function
// z ^ (x & (y ^ z))
def : AMDGPUPatIgnoreCopies <
Expand Down
42 changes: 14 additions & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,13 @@ define i32 @v_andn2_i32(i32 %src0, i32 %src1) {
; GCN-LABEL: v_andn2_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
; GCN-NEXT: v_bfi_b32 v0, v1, 0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_andn2_i32:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: v_bfi_b32 v0, v1, 0, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%not.src1 = xor i32 %src1, -1
%and = and i32 %src0, %not.src1
Expand All @@ -117,14 +115,12 @@ define i32 @v_andn2_i32(i32 %src0, i32 %src1) {
define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) {
; GCN-LABEL: v_andn2_i32_sv:
; GCN: ; %bb.0:
; GCN-NEXT: v_not_b32_e32 v0, v0
; GCN-NEXT: v_and_b32_e32 v0, s2, v0
; GCN-NEXT: v_bfi_b32 v0, v0, 0, s2
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i32_sv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: v_bfi_b32 v0, v0, 0, s2
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i32 %src1, -1
%and = and i32 %src0, %not.src1
Expand All @@ -135,14 +131,12 @@ define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) {
define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) {
; GCN-LABEL: v_andn2_i32_vs:
; GCN: ; %bb.0:
; GCN-NEXT: s_not_b32 s0, s2
; GCN-NEXT: v_and_b32_e32 v0, s0, v0
; GCN-NEXT: v_bfi_b32 v0, s2, 0, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i32_vs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_not_b32 s0, s2
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
; GFX10PLUS-NEXT: v_bfi_b32 v0, s2, 0, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i32 %src1, -1
%and = and i32 %src0, %not.src1
Expand Down Expand Up @@ -247,19 +241,15 @@ define i64 @v_andn2_i64(i64 %src0, i64 %src1) {
; GCN-LABEL: v_andn2_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_not_b32_e32 v2, v2
; GCN-NEXT: v_not_b32_e32 v3, v3
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: v_and_b32_e32 v1, v1, v3
; GCN-NEXT: v_bfi_b32 v0, v2, 0, v0
; GCN-NEXT: v_bfi_b32 v1, v3, 0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_andn2_i64:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
; GFX10PLUS-NEXT: v_bfi_b32 v0, v2, 0, v0
; GFX10PLUS-NEXT: v_bfi_b32 v1, v3, 0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%not.src1 = xor i64 %src1, -1
%and = and i64 %src0, %not.src1
Expand All @@ -269,18 +259,14 @@ define i64 @v_andn2_i64(i64 %src0, i64 %src1) {
define amdgpu_ps <2 x float> @v_andn2_i64_sv(i64 inreg %src0, i64 %src1) {
; GCN-LABEL: v_andn2_i64_sv:
; GCN: ; %bb.0:
; GCN-NEXT: v_not_b32_e32 v0, v0
; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_and_b32_e32 v0, s2, v0
; GCN-NEXT: v_and_b32_e32 v1, s3, v1
; GCN-NEXT: v_bfi_b32 v0, v0, 0, s2
; GCN-NEXT: v_bfi_b32 v1, v1, 0, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i64_sv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, s3, v1
; GFX10PLUS-NEXT: v_bfi_b32 v0, v0, 0, s2
; GFX10PLUS-NEXT: v_bfi_b32 v1, v1, 0, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i64 %src1, -1
%and = and i64 %src0, %not.src1
Expand Down
Loading
Loading