diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll similarity index 55% rename from llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll rename to llvm/test/CodeGen/AMDGPU/shlN_add.ll index 9f4a6f2f63f15..3e507a0c5889f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/shlN_add.ll @@ -1,4 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s + ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s @@ -7,6 +12,24 @@ ; Test gfx9+ s_shl[1-4]_add_u32 pattern matching define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl1_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl1_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl1_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1 @@ -28,6 +51,24 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl2_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl2_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl2_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1 @@ -49,6 +90,24 @@ define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl3_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl3_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl3_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1 @@ -70,6 +129,24 @@ define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl4_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl4_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl4_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1 @@ -102,6 +179,25 @@ define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) { } define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl1_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl1_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl1_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -126,6 +222,25 @@ define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl2_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl2_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl2_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -150,6 +265,25 @@ define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl3_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl3_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl3_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -174,6 +308,25 @@ define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl4_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl4_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl4_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +351,25 @@ define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl5_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl5_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl5_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl5_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -224,6 +396,22 @@ define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { ; FIXME: Use v_lshl_add_u32 ; shift is scalar, but add is vector. define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl1_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 @@ -248,6 +436,22 @@ define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl2_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -272,6 +476,22 @@ define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl3_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 @@ -296,6 +516,22 @@ define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl4_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 4 @@ -320,6 +556,22 @@ define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 5 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl5_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 5 @@ -344,6 +596,30 @@ define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl1_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2 @@ -369,6 +645,30 @@ define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl2_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 @@ -394,6 +694,30 @@ define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl3_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2 @@ -419,6 +743,30 @@ define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2 @@ -444,6 +792,30 @@ define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl_2_4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2