diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9999776b98260..bf56a43904f2b 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3083,8 +3083,10 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( return false; case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: - case TargetOpcode::G_ZEXT: { + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_TRUNC: { // Match: logic (ext X), (ext Y) --> ext (logic X, Y) + // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y) break; } case TargetOpcode::G_AND: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir index d23c939cc0d86..454802df8cebe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir @@ -14,11 +14,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $w0 = COPY [[AND]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s16) = G_TRUNC %0 @@ -37,10 +34,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>) + ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s16>) = G_TRUNC %0 @@ -60,11 +56,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $w0 = COPY [[OR]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s16) = G_TRUNC %0 @@ -83,10 +76,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: $x0 = COPY [[OR]](<4 x s16>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[OR]](<4 x s32>) + ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s16>) = G_TRUNC %0 @@ -106,11 +98,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $w0 = COPY [[XOR]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s16) = G_TRUNC %0 @@ -129,10 +118,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[XOR]](<4 x s32>) + ; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s16>) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index 2bf7e84a379ba..7d25bf6e1d3c1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -1,7 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s # RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s # REQUIRES: asserts + --- # select (c, x, x) -> x name: test_combine_select_same_res diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir index fb19cda303d36..9699d0cf7892c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir @@ -84,10 +84,9 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: %binop_lhs:_(s64) = COPY $x0 ; CHECK: %binop_rhs:_(s64) = COPY $x1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32) ; CHECK: $x0 = COPY [[ZEXT]](s64) ; CHECK: RET_ReallyLR implicit $x0 %binop_lhs:_(s64) = COPY $x0 @@ -131,10 +130,9 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK: %binop_lhs:_(s64) = COPY $x0 ; CHECK: %binop_rhs:_(s64) = COPY $x1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32) ; CHECK: $x0 = COPY [[ZEXT]](s64) ; CHECK: RET_ReallyLR implicit $x0 %binop_lhs:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir index 48fc042d7c737..7f2ae6ee24807 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir @@ -268,10 +268,9 @@ body: | ; CHECK: liveins: $w0, $w1 ; CHECK: %x_wide:_(s32) = COPY $w0 ; CHECK: %y_wide:_(s32) = COPY $w1 - ; CHECK: %x:_(s1) = G_TRUNC %x_wide(s32) - ; CHECK: %y:_(s1) = G_TRUNC %y_wide(s32) - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR %x, %y - ; CHECK: %logic_op:_(s64) = G_SEXT [[OR]](s1) + ; CHECK: %8:_(s32) = G_OR %x_wide, %y_wide + ; CHECK: %7:_(s1) = G_TRUNC %8(s32) + ; CHECK: %logic_op:_(s64) = G_SEXT %7(s1) ; CHECK: $x0 = COPY %logic_op(s64) ; CHECK: RET_ReallyLR implicit $x0 %x_wide:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll index dcd97597ae409..88bab4af95d64 100644 --- a/llvm/test/CodeGen/AArch64/pr58431.ll +++ b/llvm/test/CodeGen/AArch64/pr58431.ll @@ -4,7 +4,7 @@ define i32 @f(i64 %0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: mov w8, #10 // =0xa ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: udiv x10, x9, x8 ; CHECK-NEXT: msub x0, x10, x8, x9 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index f9b98059be0b3..06930388901b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) { define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshl_v2i24: ; GFX6: ; %bb.0: +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 -; GFX6-NEXT: s_lshr_b32 s7, s0, 24 -; GFX6-NEXT: s_and_b32 s9, s0, 0xff -; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: s_lshl_b32 s0, s0, 8 +; GFX6-NEXT: s_lshr_b32 s7, s1, 8 +; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008 +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: s_and_b32 s8, s0, 0xff +; GFX6-NEXT: s_lshl_b32 s9, s9, 8 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GFX6-NEXT: s_or_b32 s0, s9, s0 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: s_and_b32 s0, s7, 0xff +; GFX6-NEXT: s_or_b32 s8, s8, s9 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: s_lshr_b32 s8, s1, 8 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8 +; GFX6-NEXT: s_or_b32 s6, s8, s6 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: s_lshr_b32 s0, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s3, 8 +; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008 +; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3 +; GFX6-NEXT: s_and_b32 s7, s2, 0xff +; GFX6-NEXT: s_lshl_b32 s8, s8, 8 +; GFX6-NEXT: s_and_b32 s0, s0, 0xff +; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff -; GFX6-NEXT: s_or_b32 s0, s0, s6 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: s_or_b32 s1, s7, s1 -; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX6-NEXT: s_or_b32 s7, s7, s8 +; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24 +; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX6-NEXT: s_lshl_b32 s0, s0, 16 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s0, s7, s0 +; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 +; GFX6-NEXT: s_lshr_b32 s1, s4, 16 +; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: s_and_b32 s3, s4, 0xff +; GFX6-NEXT: s_lshl_b32 s7, s7, 8 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: s_or_b32 s3, s3, s7 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: s_or_b32 s1, s1, s6 -; GFX6-NEXT: s_lshr_b32 s6, s2, 16 -; GFX6-NEXT: s_lshr_b32 s7, s2, 24 -; GFX6-NEXT: s_and_b32 s9, s2, 0xff -; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: s_lshl_b32 s2, s2, 8 -; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: s_or_b32 s2, s9, s2 -; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 -; GFX6-NEXT: s_lshr_b32 s8, s3, 8 -; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_and_b32 s3, s3, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1 -; GFX6-NEXT: s_or_b32 s2, s2, s6 -; GFX6-NEXT: s_lshl_b32 s3, s3, 8 -; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: s_or_b32 s3, s7, s3 -; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s3, s3, s6 -; GFX6-NEXT: s_lshr_b32 s6, s4, 16 -; GFX6-NEXT: s_lshr_b32 s7, s4, 24 -; GFX6-NEXT: s_and_b32 s9, s4, 0xff -; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_lshl_b32 s4, s4, 8 -; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: s_or_b32 s4, s9, s4 -; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s4, s4, s6 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0 -; GFX6-NEXT: s_lshr_b32 s8, s5, 8 -; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: s_and_b32 s6, s8, 0xff -; GFX6-NEXT: s_or_b32 s5, s7, s5 -; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX6-NEXT: s_lshl_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s5, s5, s6 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 -; GFX6-NEXT: s_lshr_b32 s0, s2, 1 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2 +; GFX6-NEXT: s_lshr_b32 s2, s5, 8 +; GFX6-NEXT: s_and_b32 s3, s5, 0xff +; GFX6-NEXT: v_mov_b32_e32 v4, s4 +; GFX6-NEXT: s_and_b32 s2, s2, 0xff +; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24 +; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: v_or_b32_e32 v4, s2, v4 +; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX6-NEXT: s_lshr_b32 s0, s0, 1 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3 +; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v5 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX6-NEXT: s_lshr_b32 s0, s3, 1 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0 -; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index c8455665e7b40..ff93cddafc872 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -1815,113 +1815,110 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { ; GFX6-LABEL: s_fshr_v2i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GFX6-NEXT: s_lshr_b32 s7, s1, 8 +; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008 +; GFX6-NEXT: s_and_b32 s1, s1, 0xff +; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16 -; GFX6-NEXT: s_lshr_b32 s7, s0, 24 -; GFX6-NEXT: s_lshr_b32 s8, s1, 8 -; GFX6-NEXT: s_and_b32 s9, s0, 0xff -; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 +; GFX6-NEXT: s_and_b32 s8, s0, 0xff +; GFX6-NEXT: s_lshl_b32 s9, s9, 8 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GFX6-NEXT: s_and_b32 s0, s7, 0xff +; GFX6-NEXT: s_lshr_b32 s1, s2, 16 +; GFX6-NEXT: s_lshr_b32 s7, s3, 8 +; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008 +; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3 +; GFX6-NEXT: s_or_b32 s8, s8, s9 +; GFX6-NEXT: s_and_b32 s9, s2, 0xff +; GFX6-NEXT: s_lshl_b32 s10, s10, 8 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff -; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 -; GFX6-NEXT: s_lshl_b32 s0, s0, 8 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX6-NEXT: s_or_b32 s0, s9, s0 -; GFX6-NEXT: s_or_b32 s1, s7, s1 -; GFX6-NEXT: s_and_b32 s7, s8, 0xff -; GFX6-NEXT: s_lshr_b32 s8, s2, 16 -; GFX6-NEXT: s_lshr_b32 s9, s2, 24 -; GFX6-NEXT: s_and_b32 s11, s2, 0xff -; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 -; GFX6-NEXT: s_lshl_b32 s2, s2, 8 -; GFX6-NEXT: s_and_b32 s8, s8, 0xff -; GFX6-NEXT: s_or_b32 s2, s11, s2 -; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 -; GFX6-NEXT: s_lshr_b32 s10, s3, 8 +; GFX6-NEXT: s_and_b32 s3, s3, 0xff +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: s_and_b32 s2, s7, 0xff +; GFX6-NEXT: s_or_b32 s9, s9, s10 +; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24 +; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX6-NEXT: s_and_b32 s9, 0xffff, s9 +; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_or_b32 s1, s9, s1 +; GFX6-NEXT: v_or_b32_e32 v1, s2, v1 +; GFX6-NEXT: s_lshr_b32 s2, s4, 16 +; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008 +; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 +; GFX6-NEXT: s_and_b32 s7, s4, 0xff +; GFX6-NEXT: s_lshl_b32 s9, s9, 8 +; GFX6-NEXT: s_and_b32 s2, s2, 0xff +; GFX6-NEXT: s_or_b32 s7, s7, s9 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX6-NEXT: s_lshl_b32 s8, s8, 16 +; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_or_b32 s2, s7, s2 +; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GFX6-NEXT: v_mul_hi_u32 v3, s2, v2 +; GFX6-NEXT: s_lshr_b32 s3, s5, 8 +; GFX6-NEXT: s_and_b32 s5, s5, 0xff +; GFX6-NEXT: v_mov_b32_e32 v4, s4 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff -; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1 -; GFX6-NEXT: s_or_b32 s2, s2, s8 -; GFX6-NEXT: s_lshl_b32 s3, s3, 8 -; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: s_or_b32 s3, s9, s3 -; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 +; GFX6-NEXT: v_alignbit_b32 v4, s5, v4, 24 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_or_b32 s3, s3, s8 -; GFX6-NEXT: s_lshr_b32 s8, s4, 16 -; GFX6-NEXT: s_lshr_b32 s9, s4, 24 -; GFX6-NEXT: s_and_b32 s11, s4, 0xff -; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 -; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 -; GFX6-NEXT: s_lshl_b32 s4, s4, 8 -; GFX6-NEXT: s_and_b32 s8, s8, 0xff -; GFX6-NEXT: s_or_b32 s4, s11, s4 -; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 -; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_or_b32 s4, s4, s8 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0 -; GFX6-NEXT: s_lshr_b32 s10, s5, 8 -; GFX6-NEXT: s_and_b32 s5, s5, 0xff -; GFX6-NEXT: s_lshl_b32 s5, s5, 8 -; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 -; GFX6-NEXT: s_and_b32 s8, s10, 0xff -; GFX6-NEXT: s_or_b32 s5, s9, s5 -; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 -; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 -; GFX6-NEXT: s_lshl_b32 s8, s8, 16 -; GFX6-NEXT: s_or_b32 s5, s5, s8 -; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 -; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: v_or_b32_e32 v4, s3, v4 +; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2 +; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s2, v3 +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3 +; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2 +; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 -; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 -; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 -; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 -; GFX6-NEXT: s_lshl_b32 s4, s6, 17 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_or_b32 s0, s4, s0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 -; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s2, v1 -; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0 -; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 -; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 -; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0 -; GFX6-NEXT: s_lshl_b32 s0, s7, 17 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 +; GFX6-NEXT: s_lshl_b32 s2, s6, 17 +; GFX6-NEXT: s_lshl_b32 s3, s8, 1 +; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 +; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_lshl_b32_e32 v5, s2, v5 +; GFX6-NEXT: v_lshr_b32_e32 v3, s1, v3 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 +; GFX6-NEXT: s_lshl_b32 s0, s0, 17 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_or_b32_e32 v3, v5, v3 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 -; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: v_lshr_b32_e32 v0, s3, v0 -; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index 1821d29d4b050..788692c94b0cf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -192,9 +192,7 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: v_min_u32_e32 v2, v3, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v2i8: @@ -290,9 +288,9 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) { ; GFX6-NEXT: s_min_u32 s2, s3, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v2i8: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index a60370cd460f9..0042d34e235d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -186,9 +186,7 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { ; GFX6-NEXT: v_min_u32_e32 v2, v1, v2 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_v2i8: @@ -282,9 +280,9 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) { ; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 24 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v2i8: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll index cec73b7c3617b..6bb4e2d3dbe26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll @@ -26,13 +26,10 @@ entry: define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { ; GFX7-LABEL: scalar_xnor_v2i16_one_use: ; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff ; GFX7-NEXT: s_or_b32 s0, s1, s0 -; GFX7-NEXT: s_lshl_b32 s1, s3, 16 -; GFX7-NEXT: s_and_b32 s2, s2, 0xffff -; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_xor_b32 s0, s0, s1 ; GFX7-NEXT: s_xor_b32 s0, s0, -1 ; GFX7-NEXT: ; return to shader part epilog ; @@ -117,22 +114,17 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) { define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) { ; GFX7-LABEL: scalar_xnor_v4i16_one_use: ; GFX7: ; %bb.0: +; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] +; GFX7-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] ; GFX7-NEXT: s_lshl_b32 s1, s1, 16 ; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_mov_b32 s8, -1 ; GFX7-NEXT: s_or_b32 s0, s1, s0 ; GFX7-NEXT: s_lshl_b32 s1, s3, 16 ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff +; GFX7-NEXT: s_mov_b32 s9, s8 ; GFX7-NEXT: s_or_b32 s1, s1, s2 -; GFX7-NEXT: s_lshl_b32 s2, s5, 16 -; GFX7-NEXT: s_and_b32 s3, s4, 0xffff -; GFX7-NEXT: s_or_b32 s2, s2, s3 -; GFX7-NEXT: s_lshl_b32 s3, s7, 16 -; GFX7-NEXT: s_and_b32 s4, s6, 0xffff -; GFX7-NEXT: s_or_b32 s3, s3, s4 -; GFX7-NEXT: s_mov_b32 s4, -1 -; GFX7-NEXT: s_mov_b32 s5, s4 -; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] +; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[8:9] ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: scalar_xnor_v4i16_one_use: