Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Fix import of integer med3
Browse files Browse the repository at this point in the history
This isn't too useful now, since nothing is currently trying to form
min/max from cmp+select.
  • Loading branch information
arsenm committed Jan 9, 2020
1 parent c66b2e1 commit 9ffd0ed
Show file tree
Hide file tree
Showing 6 changed files with 646 additions and 32 deletions.
24 changes: 0 additions & 24 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Expand Up @@ -737,30 +737,6 @@ class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(BIT_ALIGN $src0, $src0, $src1)
>;

multiclass IntMed3Pat<Instruction med3Inst,
SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator min_oneuse,
SDPatternOperator max_oneuse,
ValueType vt = i32> {

// This matches 16 permutations of
// min(max(a, b), max(min(a, b), c))
def : AMDGPUPat <
(min (max_oneuse vt:$src0, vt:$src1),
(max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst vt:$src0, vt:$src1, vt:$src2)
>;

// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : AMDGPUPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst $src0, $src1, $src2)
>;
}

// Special conversion patterns

def cvt_rpi_i32_f32 : PatFrag <
Expand Down
38 changes: 30 additions & 8 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -1972,6 +1972,29 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;

multiclass IntMed3Pat<Instruction med3Inst,
SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator min_oneuse,
SDPatternOperator max_oneuse> {

// This matches 16 permutations of
// min(max(a, b), max(min(a, b), c))
def : AMDGPUPat <
(min (max_oneuse i32:$src0, i32:$src1),
(max_oneuse (min_oneuse i32:$src0, i32:$src1), i32:$src2)),
(med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;

// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : AMDGPUPat <
(max (min_oneuse i32:$src0, i32:$src1),
(min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)),
(med3Inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;
}

defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;

Expand Down Expand Up @@ -2002,22 +2025,21 @@ multiclass Int16Med3Pat<Instruction med3Inst,
SDPatternOperator min,
SDPatternOperator max,
SDPatternOperator max_oneuse,
SDPatternOperator min_oneuse,
ValueType vt = i16> {
SDPatternOperator min_oneuse> {
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : GCNPat <
(max (min_oneuse vt:$src0, vt:$src1),
(min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
(max (min_oneuse i16:$src0, i16:$src1),
(min_oneuse (max_oneuse i16:$src0, i16:$src1), i16:$src2)),
(med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
>;

// This matches 16 permutations of
// min(max(a, b), max(min(a, b), c))
def : GCNPat <
(min (max_oneuse vt:$src0, vt:$src1),
(max_oneuse (min_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
(min (max_oneuse i16:$src0, i16:$src1),
(max_oneuse (min_oneuse i16:$src0, i16:$src1), i16:$src2)),
(med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
>;
}

Expand Down
140 changes: 140 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.mir
@@ -0,0 +1,140 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s

---
name: smed3_s32_vvv
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: smed3_s32_vvv
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[V_MED3_I32_:%[0-9]+]]:vgpr_32 = V_MED3_I32 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_MED3_I32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_SMAX %0, %1
%4:vgpr(s32) = G_SMIN %0, %1
%5:vgpr(s32) = G_SMAX %4, %2
%6:vgpr(s32) = G_SMIN %3, %5
S_ENDPGM 0, implicit %6
...

---

name: smed3_s32_sss
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
; GFX6-LABEL: name: smed3_s32_sss
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GFX6: [[S_MAX_I32_:%[0-9]+]]:sreg_32 = S_MAX_I32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX6: [[S_MIN_I32_:%[0-9]+]]:sreg_32 = S_MIN_I32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX6: [[S_MAX_I32_1:%[0-9]+]]:sreg_32 = S_MAX_I32 [[S_MIN_I32_]], [[COPY2]], implicit-def $scc
; GFX6: [[S_MIN_I32_1:%[0-9]+]]:sreg_32 = S_MIN_I32 [[S_MAX_I32_]], [[S_MAX_I32_1]], implicit-def $scc
; GFX6: S_ENDPGM 0, implicit [[S_MIN_I32_1]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_SMAX %0, %1
%4:sgpr(s32) = G_SMIN %0, %1
%5:sgpr(s32) = G_SMAX %4, %2
%6:sgpr(s32) = G_SMIN %3, %5
S_ENDPGM 0, implicit %6
...

---
name: smed3_s32_vvv_multiuse0
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: smed3_s32_vvv_multiuse0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec
; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_SMAX %0, %1
%4:vgpr(s32) = G_SMIN %0, %1
%5:vgpr(s32) = G_SMAX %4, %2
%6:vgpr(s32) = G_SMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %3
...

---
name: smed3_s32_vvv_multiuse1
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: smed3_s32_vvv_multiuse1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec
; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MIN_I32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_SMAX %0, %1
%4:vgpr(s32) = G_SMIN %0, %1
%5:vgpr(s32) = G_SMAX %4, %2
%6:vgpr(s32) = G_SMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %4
...

---
name: smed3_s32_vvv_multiuse2
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; GFX6-LABEL: name: smed3_s32_vvv_multiuse2
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[V_MAX_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MIN_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[COPY]], [[COPY1]], implicit $exec
; GFX6: [[V_MAX_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e64 [[V_MIN_I32_e64_]], [[COPY2]], implicit $exec
; GFX6: [[V_MIN_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e64 [[V_MAX_I32_e64_]], [[V_MAX_I32_e64_1]], implicit $exec
; GFX6: S_ENDPGM 0, implicit [[V_MIN_I32_e64_1]], implicit [[V_MAX_I32_e64_1]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_SMAX %0, %1
%4:vgpr(s32) = G_SMIN %0, %1
%5:vgpr(s32) = G_SMAX %4, %2
%6:vgpr(s32) = G_SMIN %3, %5
S_ENDPGM 0, implicit %6, implicit %5
...

0 comments on commit 9ffd0ed

Please sign in to comment.