Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Form CVT_F32_UBYTE0
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Mar 30, 2020
1 parent bcb643c commit b27d255
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 1 deletion.
9 changes: 8 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Expand Up @@ -20,6 +20,13 @@ def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
(apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;


def uchar_to_float : GICombineRule<
(defs root:$itofp),
(match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp,
[{ return matchUCharToFloat(*${itofp}, MRI, *MF, Helper); }]),
(apply [{ applyUCharToFloat(*${itofp}); }])>;


// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;

Expand All @@ -32,6 +39,6 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<

def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper", [all_combines,
gfx6gfx7_combines]> {
gfx6gfx7_combines, uchar_to_float]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Expand Up @@ -153,6 +153,11 @@ def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>;

def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE0, AMDGPUcvt_f32_ubyte0>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;

def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
Expand Up @@ -127,6 +127,43 @@ static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
MI.eraseFromParent();
}

static bool matchUCharToFloat(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineFunction &MF, CombinerHelper &Helper) {
Register DstReg = MI.getOperand(0).getReg();

// TODO: We could try to match extracting the higher bytes, which would be
// easier if i8 vectors weren't promoted to i32 vectors, particularly after
// types are legalized. v4i8 -> v4f32 is probably the only case to worry
// about in practice.
LLT Ty = MRI.getType(DstReg);
if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
const APInt Mask = APInt::getHighBitsSet(32, 24);
return Helper.getKnownBits()->maskedValueIsZero(MI.getOperand(1).getReg(),
Mask);
}

return false;
}

static void applyUCharToFloat(MachineInstr &MI) {
MachineIRBuilder B(MI);

const LLT S32 = LLT::scalar(32);

Register DstReg = MI.getOperand(0).getReg();
LLT Ty = B.getMRI()->getType(DstReg);

if (Ty == S32) {
B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
{MI.getOperand(1)}, MI.getFlags());
} else {
auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
{MI.getOperand(1)}, MI.getFlags());
B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
}

MI.eraseFromParent();
}

#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Expand Up @@ -3293,6 +3293,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -2297,6 +2297,14 @@ def G_AMDGPU_FMAX_LEGACY : AMDGPUGenericInstruction {
let hasSideEffects = 0;
}

foreach N = 0-3 in {
def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0);
let hasSideEffects = 0;
}
}

// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
// operand Expects a MachineMemOperand in addition to explicit
// operands.
Expand Down
175 changes: 175 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir
@@ -0,0 +1,175 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s

---
name: uitofp_char_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: uitofp_char_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_UITOFP %2
$vgpr0 = COPY %3
...

---
name: uitofp_too_many_bits_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: uitofp_too_many_bits_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32)
; CHECK: $vgpr0 = COPY [[UITOFP]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 256
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_UITOFP %2
$vgpr0 = COPY %3
...

---
name: sitofp_char_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sitofp_char_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_SITOFP %2
$vgpr0 = COPY %3
...

---
name: sitofp_bits127_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sitofp_bits127_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 127
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_SITOFP %2
$vgpr0 = COPY %3
...

---
name: sitofp_bits128_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sitofp_bits128_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 128
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_SITOFP %2
$vgpr0 = COPY %3
...
---
name: sitofp_too_many_bits_to_f32
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sitofp_too_many_bits_to_f32
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[AND]](s32)
; CHECK: $vgpr0 = COPY [[SITOFP]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 256
%2:_(s32) = G_AND %0, %1
%3:_(s32) = G_SITOFP %2
$vgpr0 = COPY %3
...

---
name: uitofp_char_to_f16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: uitofp_char_to_f16
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_AND %0, %1
%3:_(s16) = G_UITOFP %2
%4:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
...

---
name: sitofp_char_to_f16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: sitofp_char_to_f16
; CHECK: liveins: $vgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; CHECK: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[AMDGPU_CVT_F32_UBYTE0_]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 255
%2:_(s32) = G_AND %0, %1
%3:_(s16) = G_SITOFP %2
%4:_(s32) = G_ANYEXT %3
$vgpr0 = COPY %4
...

0 comments on commit b27d255

Please sign in to comment.