Skip to content

Commit

Permalink
AMDGPU: Enable integer division bypass
Browse files Browse the repository at this point in the history
We probably want this, and I've meant to turn this on for a long
time. SC actually emits a special case to early-out for a 1
denominator, which perhaps should also be considered.
  • Loading branch information
arsenm committed Feb 19, 2020
1 parent ed07c89 commit 4bb0c8f
Show file tree
Hide file tree
Showing 8 changed files with 1,228 additions and 11 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Expand Up @@ -37,6 +37,11 @@ using namespace llvm;

#include "AMDGPUGenCallingConv.inc"

static cl::opt<bool> AMDGPUBypassSlowDiv(
"amdgpu-bypass-slow-div",
cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
cl::init(true));

// Find a larger type to do a load / store of a vector with.
EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
unsigned StoreSize = VT.getStoreSizeInBits();
Expand Down Expand Up @@ -482,6 +487,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
MaxStoresPerMemmove = 0xffffffff;
MaxStoresPerMemset = 0xffffffff;

// The expansion for 64-bit division is enormous.
if (AMDGPUBypassSlowDiv)
addBypassSlowDiv(64, 32);

setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare -amdgpu-bypass-slow-div=0 %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=GCN %s

define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; CHECK-LABEL: @udiv_i32(
Expand Down
1,208 changes: 1,208 additions & 0 deletions llvm/test/CodeGen/AMDGPU/bypass-div.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s

define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-LABEL: s_test_sdiv:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
@@ -1,4 +1,4 @@
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
;RUN: llc -march=r600 -mcpu=redwood -amdgpu-bypass-slow-div=0 < %s | FileCheck -check-prefix=EG %s

;EG-LABEL: {{^}}s_test_sdiv:
;EG: RECIP_UINT
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s

define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-LABEL: s_test_srem:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s

define amdgpu_kernel void @s_test_udiv_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-LABEL: s_test_udiv_i64:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx600 -amdgpu-bypass-slow-div=0 -amdgpu-codegenprepare-expand-div64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-IR %s

define amdgpu_kernel void @s_test_urem_i64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
; GCN-LABEL: s_test_urem_i64:
Expand Down

0 comments on commit 4bb0c8f

Please sign in to comment.