Skip to content

Commit

Permalink
[GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) -> trunc(op(x,…
Browse files Browse the repository at this point in the history
… y))
  • Loading branch information
dc03-work committed May 14, 2024
1 parent 51349fb commit 94bf09a
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 264 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3083,8 +3083,10 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
return false;
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT: {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_TRUNC: {
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
break;
}
case TargetOpcode::G_AND:
Expand Down
42 changes: 15 additions & 27 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
Expand All @@ -37,10 +34,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
Expand All @@ -60,11 +56,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
Expand All @@ -83,10 +76,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: $x0 = COPY [[OR]](<4 x s16>)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[OR]](<4 x s32>)
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
Expand All @@ -106,11 +98,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = COPY $w1
%2:_(s16) = G_TRUNC %0
Expand All @@ -129,10 +118,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[COPY]], [[COPY1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[XOR]](<4 x s32>)
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s16>) = G_TRUNC %0
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
# REQUIRES: asserts

---
# select (c, x, x) -> x
name: test_combine_select_same_res
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
Expand Down Expand Up @@ -131,10 +130,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK: %binop_lhs:_(s64) = COPY $x0
; CHECK: %binop_rhs:_(s64) = COPY $x1
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64)
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK: $x0 = COPY [[ZEXT]](s64)
; CHECK: RET_ReallyLR implicit $x0
%binop_lhs:_(s64) = COPY $x0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,9 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK: %x_wide:_(s32) = COPY $w0
; CHECK: %y_wide:_(s32) = COPY $w1
; CHECK: %x:_(s1) = G_TRUNC %x_wide(s32)
; CHECK: %y:_(s1) = G_TRUNC %y_wide(s32)
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR %x, %y
; CHECK: %logic_op:_(s64) = G_SEXT [[OR]](s1)
; CHECK: %8:_(s32) = G_OR %x_wide, %y_wide
; CHECK: %7:_(s1) = G_TRUNC %8(s32)
; CHECK: %logic_op:_(s64) = G_SEXT %7(s1)
; CHECK: $x0 = COPY %logic_op(s64)
; CHECK: RET_ReallyLR implicit $x0
%x_wide:_(s32) = COPY $w0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/pr58431.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define i32 @f(i64 %0) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #10
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: udiv x10, x9, x8
; CHECK-NEXT: msub x0, x10, x8, x9
Expand Down
191 changes: 94 additions & 97 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
; GFX6-LABEL: s_fshl_v2i24:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
; GFX6-NEXT: s_lshr_b32 s7, s0, 24
; GFX6-NEXT: s_and_b32 s9, s0, 0xff
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_lshr_b32 s7, s1, 8
; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX6-NEXT: s_and_b32 s8, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: s_or_b32 s0, s9, s0
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: s_and_b32 s0, s7, 0xff
; GFX6-NEXT: s_or_b32 s8, s8, s9
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_lshr_b32 s8, s1, 8
; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
; GFX6-NEXT: s_or_b32 s6, s8, s6
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
; GFX6-NEXT: s_lshr_b32 s1, s3, 8
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
; GFX6-NEXT: s_and_b32 s7, s2, 0xff
; GFX6-NEXT: s_lshl_b32 s8, s8, 8
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
; GFX6-NEXT: v_mov_b32_e32 v1, s2
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: s_or_b32 s1, s7, s1
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: s_or_b32 s7, s7, s8
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s7, s0
; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
; GFX6-NEXT: s_lshr_b32 s1, s4, 16
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
; GFX6-NEXT: s_and_b32 s3, s4, 0xff
; GFX6-NEXT: s_lshl_b32 s7, s7, 8
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: s_or_b32 s3, s3, s7
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_or_b32 s1, s1, s6
; GFX6-NEXT: s_lshr_b32 s6, s2, 16
; GFX6-NEXT: s_lshr_b32 s7, s2, 24
; GFX6-NEXT: s_and_b32 s9, s2, 0xff
; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
; GFX6-NEXT: s_or_b32 s2, s9, s2
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX6-NEXT: s_lshr_b32 s8, s3, 8
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
; GFX6-NEXT: s_or_b32 s2, s2, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 8
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: s_or_b32 s3, s7, s3
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s3, s3, s6
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
; GFX6-NEXT: s_lshr_b32 s7, s4, 24
; GFX6-NEXT: s_and_b32 s9, s4, 0xff
; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: s_lshl_b32 s4, s4, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
; GFX6-NEXT: s_or_b32 s4, s9, s4
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s4, s4, s6
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
; GFX6-NEXT: s_lshr_b32 s8, s5, 8
; GFX6-NEXT: s_and_b32 s5, s5, 0xff
; GFX6-NEXT: s_lshl_b32 s5, s5, 8
; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: s_or_b32 s5, s7, s5
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s5, s5, s6
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
; GFX6-NEXT: s_lshr_b32 s0, s2, 1
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s1, s3, s1
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
; GFX6-NEXT: s_lshr_b32 s2, s5, 8
; GFX6-NEXT: s_and_b32 s3, s5, 0xff
; GFX6-NEXT: v_mov_b32_e32 v4, s4
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
; GFX6-NEXT: s_lshr_b32 s0, s3, 1
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
Expand Down
Loading

0 comments on commit 94bf09a

Please sign in to comment.