From 68a0de44f21e8e6aceab7796ec971e08c3085381 Mon Sep 17 00:00:00 2001 From: John Lu Date: Wed, 5 Nov 2025 14:51:52 -0600 Subject: [PATCH 1/3] Pre-commit shlN_add test results with sdag Signed-off-by: John Lu --- llvm/test/CodeGen/AMDGPU/{GlobalISel => }/shlN_add.ll | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/AMDGPU/{GlobalISel => }/shlN_add.ll (100%) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll similarity index 100% rename from llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll rename to llvm/test/CodeGen/AMDGPU/shlN_add.ll From 4b1eb7e20be63e74e4993d34fb32b81830df41f0 Mon Sep 17 00:00:00 2001 From: John Lu Date: Wed, 5 Nov 2025 14:54:03 -0600 Subject: [PATCH 2/3] Commit changes Signed-off-by: John Lu --- llvm/test/CodeGen/AMDGPU/shlN_add.ll | 373 +++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll index 9f4a6f2f63f15..da12153385640 100644 --- a/llvm/test/CodeGen/AMDGPU/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/shlN_add.ll @@ -1,4 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s + ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s @@ -7,6 +13,24 @@ ; Test gfx9+ s_shl[1-4]_add_u32 pattern matching define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl1_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl1_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl1_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s1 @@ -28,6 +52,24 @@ define amdgpu_ps i32 @s_shl1_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl2_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl2_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl2_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s1 @@ -49,6 +91,24 @@ define amdgpu_ps i32 @s_shl2_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl3_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl3_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl3_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s1 @@ -70,6 +130,24 @@ define amdgpu_ps i32 @s_shl3_add_u32(i32 inreg %src0, i32 inreg %src1) { } define amdgpu_ps i32 @s_shl4_add_u32(i32 inreg %src0, i32 inreg %src1) { +; GFX9-SDAG-LABEL: s_shl4_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl4_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl4_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s1 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s1 @@ -102,6 +180,25 @@ define amdgpu_ps i32 @s_shl5_add_u32(i32 inreg %src0, i32 inreg %src1) { } define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl1_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl1_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl1_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 1, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl1_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -126,6 +223,25 @@ define i32 @v_shl1_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl2_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl2_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl2_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl2_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -150,6 +266,25 @@ define i32 @v_shl2_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl3_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl3_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl3_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl3_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -174,6 +309,25 @@ define i32 @v_shl3_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl4_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl4_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl4_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 4, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl4_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,6 +352,25 @@ define i32 @v_shl4_add_u32(i32 %src0, i32 %src1) { } define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { +; GFX9-SDAG-LABEL: v_shl5_add_u32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_shl5_add_u32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_shl5_add_u32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, v0, 5, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX9-LABEL: v_shl5_add_u32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -224,6 +397,22 @@ define i32 @v_shl5_add_u32(i32 %src0, i32 %src1) { ; FIXME: Use v_lshl_add_u32 ; shift is scalar, but add is vector. define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl1_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 1, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl1_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 @@ -248,6 +437,22 @@ define amdgpu_ps float @shl1_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl2_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 2, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl2_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 @@ -272,6 +477,22 @@ define amdgpu_ps float @shl2_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl3_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 3, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl3_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 @@ -296,6 +517,22 @@ define amdgpu_ps float @shl3_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl4_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 4, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl4_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 4 @@ -320,6 +557,22 @@ define amdgpu_ps float @shl4_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { +; GFX9-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 5 +; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: shl5_add_u32_vgpr1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, v0 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: shl5_add_u32_vgpr1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl_b32 s0, s0, 5 @@ -344,6 +597,30 @@ define amdgpu_ps float @shl5_add_u32_vgpr1(i32 inreg %src0, i32 %src1) { } define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl1_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 1 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 1 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl1_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl1_add_u32 s0, s0, s2 @@ -369,6 +646,30 @@ define amdgpu_ps <2 x i32> @s_shl1_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl2_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl2_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 @@ -394,6 +695,30 @@ define amdgpu_ps <2 x i32> @s_shl2_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl3_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 3 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 3 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl3_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl3_add_u32 s0, s0, s2 @@ -419,6 +744,30 @@ define amdgpu_ps <2 x i32> @s_shl3_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl4_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 4 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl4_add_u32 s0, s0, s2 @@ -444,6 +793,30 @@ define amdgpu_ps <2 x i32> @s_shl4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> i } define amdgpu_ps <2 x i32> @s_shl_2_4_add_u32_v2(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GFX9-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX9-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX9-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX9-SDAG-NEXT: ; return to shader part epilog +; +; GFX8-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX8-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX8-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX8-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX8-SDAG-NEXT: ; return to shader part epilog +; +; GFX10-SDAG-LABEL: s_shl_2_4_add_u32_v2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10-SDAG-NEXT: s_lshl_b32 s1, s1, 4 +; GFX10-SDAG-NEXT: s_add_i32 s0, s0, s2 +; GFX10-SDAG-NEXT: s_add_i32 s1, s1, s3 +; GFX10-SDAG-NEXT: ; return to shader part epilog +; ; GFX9-LABEL: s_shl_2_4_add_u32_v2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_lshl2_add_u32 s0, s0, s2 From 66af1d03603b3e568885c0e3281925821836cf0a Mon Sep 17 00:00:00 2001 From: LU-JOHN Date: Fri, 7 Nov 2025 08:36:04 -0600 Subject: [PATCH 3/3] Update llvm/test/CodeGen/AMDGPU/shlN_add.ll Co-authored-by: Matt Arsenault --- llvm/test/CodeGen/AMDGPU/shlN_add.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/shlN_add.ll index da12153385640..3e507a0c5889f 100644 --- a/llvm/test/CodeGen/AMDGPU/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/shlN_add.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s