Skip to content

Commit

Permalink
[RISCV] Enable the TypePromotion pass from AArch64/ARM.
Browse files Browse the repository at this point in the history
This pass looks for unsigned icmps that have illegal types and tries
to widen the use/def graph to improve the placement of the zero
extends that type legalization would need to insert.

I've explicitly disabled it for i32 by adding a check for
isSExtCheaperThanZExt to the pass.

The generated code isn't perfect, but my data shows a net
dynamic instruction count improvement on spec2017 for both base and
Zba+Zbb+Zbs.
  • Loading branch information
topperc committed Feb 13, 2024
1 parent 9838c85 commit 7d40ea8
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 87 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/TypePromotion.cpp
Expand Up @@ -937,6 +937,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
return 0;

EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT))
return 0;
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
<< "for promoted type\n");
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Expand Up @@ -366,6 +366,7 @@ class RISCVPassConfig : public TargetPassConfig {

void addIRPasses() override;
bool addPreISel() override;
void addCodeGenPrepare() override;
bool addInstSelector() override;
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
Expand Down Expand Up @@ -452,6 +453,12 @@ bool RISCVPassConfig::addPreISel() {
return false;
}

void RISCVPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOptLevel::None)
addPass(createTypePromotionLegacyPass());
TargetPassConfig::addCodeGenPrepare();
}

bool RISCVPassConfig::addInstSelector() {
addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel()));

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Expand Up @@ -68,6 +68,7 @@
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: TLS Variable Hoist
; CHECK-NEXT: Type Promotion
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
Expand Down
92 changes: 64 additions & 28 deletions llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll
Expand Up @@ -254,21 +254,39 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind {
; ---------------------------------------------------------------------------- ;

define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
; RV32-LABEL: add_ultcmp_i16_i8:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, -128
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: sltiu a0, a0, 255
; RV32-NEXT: ret
; RV32I-LABEL: add_ultcmp_i16_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: addi a0, a0, -128
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sltiu a0, a0, 255
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ultcmp_i16_i8:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
; RV64-NEXT: ret
; RV64I-LABEL: add_ultcmp_i16_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: addi a0, a0, -128
; RV64I-NEXT: srli a0, a0, 8
; RV64I-NEXT: sltiu a0, a0, 255
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: add_ultcmp_i16_i8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: zext.h a0, a0
; RV32ZBB-NEXT: addi a0, a0, -128
; RV32ZBB-NEXT: srli a0, a0, 8
; RV32ZBB-NEXT: sltiu a0, a0, 255
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: add_ultcmp_i16_i8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: addi a0, a0, -128
; RV64ZBB-NEXT: srli a0, a0, 8
; RV64ZBB-NEXT: sltiu a0, a0, 255
; RV64ZBB-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8
ret i1 %tmp1
Expand Down Expand Up @@ -421,21 +439,39 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {

; Slightly more canonical variant
define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
; RV32-LABEL: add_ulecmp_i16_i8:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, -128
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: sltiu a0, a0, 255
; RV32-NEXT: ret
; RV32I-LABEL: add_ulecmp_i16_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: addi a0, a0, -128
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sltiu a0, a0, 255
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ulecmp_i16_i8:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
; RV64-NEXT: ret
; RV64I-LABEL: add_ulecmp_i16_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: addi a0, a0, -128
; RV64I-NEXT: srli a0, a0, 8
; RV64I-NEXT: sltiu a0, a0, 255
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: add_ulecmp_i16_i8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: zext.h a0, a0
; RV32ZBB-NEXT: addi a0, a0, -128
; RV32ZBB-NEXT: srli a0, a0, 8
; RV32ZBB-NEXT: sltiu a0, a0, 255
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: add_ulecmp_i16_i8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: addi a0, a0, -128
; RV64ZBB-NEXT: srli a0, a0, 8
; RV64ZBB-NEXT: sltiu a0, a0, 255
; RV64ZBB-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1
ret i1 %tmp1
Expand Down
30 changes: 24 additions & 6 deletions llvm/test/CodeGen/RISCV/signbit-test.ll
Expand Up @@ -303,7 +303,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind {
; RV32-NEXT: bnez a1, .LBB10_2
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: li a0, 42
; RV32-NEXT: .LBB10_2: # %f
; RV32-NEXT: ret
; RV32-NEXT: .LBB10_2:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_clear_mask_i16_i8:
Expand All @@ -312,7 +315,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind {
; RV64-NEXT: bnez a1, .LBB10_2
; RV64-NEXT: # %bb.1: # %t
; RV64-NEXT: li a0, 42
; RV64-NEXT: .LBB10_2: # %f
; RV64-NEXT: ret
; RV64-NEXT: .LBB10_2:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: ret
entry:
%a = and i16 %x, 128
Expand All @@ -332,7 +338,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind {
; RV32-NEXT: beqz a1, .LBB11_2
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: li a0, 42
; RV32-NEXT: .LBB11_2: # %f
; RV32-NEXT: ret
; RV32-NEXT: .LBB11_2:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_set_mask_i16_i8:
Expand All @@ -341,7 +350,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind {
; RV64-NEXT: beqz a1, .LBB11_2
; RV64-NEXT: # %bb.1: # %t
; RV64-NEXT: li a0, 42
; RV64-NEXT: .LBB11_2: # %f
; RV64-NEXT: ret
; RV64-NEXT: .LBB11_2:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: ret
entry:
%a = and i16 %x, 128
Expand All @@ -361,7 +373,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind {
; RV32-NEXT: beqz a1, .LBB12_2
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: li a0, 42
; RV32-NEXT: .LBB12_2: # %f
; RV32-NEXT: ret
; RV32-NEXT: .LBB12_2:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: ret
;
; RV64-LABEL: test_set_mask_i16_i7:
Expand All @@ -370,7 +385,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind {
; RV64-NEXT: beqz a1, .LBB12_2
; RV64-NEXT: # %bb.1: # %t
; RV64-NEXT: li a0, 42
; RV64-NEXT: .LBB12_2: # %f
; RV64-NEXT: ret
; RV64-NEXT: .LBB12_2:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: ret
entry:
%a = and i16 %x, 64
Expand Down
104 changes: 72 additions & 32 deletions llvm/test/CodeGen/RISCV/signed-truncation-check.ll
Expand Up @@ -254,23 +254,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
; ---------------------------------------------------------------------------- ;

define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
; RV32-LABEL: add_ugecmp_i16_i8:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, -128
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: sltiu a0, a0, 255
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: ret
; RV32I-LABEL: add_ugecmp_i16_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: addi a0, a0, -128
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sltiu a0, a0, 255
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ugecmp_i16_i8:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
; RV64I-LABEL: add_ugecmp_i16_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: addi a0, a0, -128
; RV64I-NEXT: srli a0, a0, 8
; RV64I-NEXT: sltiu a0, a0, 255
; RV64I-NEXT: xori a0, a0, 1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: add_ugecmp_i16_i8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: zext.h a0, a0
; RV32ZBB-NEXT: addi a0, a0, -128
; RV32ZBB-NEXT: srli a0, a0, 8
; RV32ZBB-NEXT: sltiu a0, a0, 255
; RV32ZBB-NEXT: xori a0, a0, 1
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: add_ugecmp_i16_i8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: addi a0, a0, -128
; RV64ZBB-NEXT: srli a0, a0, 8
; RV64ZBB-NEXT: sltiu a0, a0, 255
; RV64ZBB-NEXT: xori a0, a0, 1
; RV64ZBB-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8
ret i1 %tmp1
Expand Down Expand Up @@ -471,23 +491,43 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {

; Slightly more canonical variant
define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
; RV32-LABEL: add_ugtcmp_i16_i8:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, -128
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: sltiu a0, a0, 255
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: ret
; RV32I-LABEL: add_ugtcmp_i16_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: addi a0, a0, -128
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sltiu a0, a0, 255
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ugtcmp_i16_i8:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, -128
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: sltiu a0, a0, 255
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
; RV64I-LABEL: add_ugtcmp_i16_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: addi a0, a0, -128
; RV64I-NEXT: srli a0, a0, 8
; RV64I-NEXT: sltiu a0, a0, 255
; RV64I-NEXT: xori a0, a0, 1
; RV64I-NEXT: ret
;
; RV32ZBB-LABEL: add_ugtcmp_i16_i8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: zext.h a0, a0
; RV32ZBB-NEXT: addi a0, a0, -128
; RV32ZBB-NEXT: srli a0, a0, 8
; RV32ZBB-NEXT: sltiu a0, a0, 255
; RV32ZBB-NEXT: xori a0, a0, 1
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: add_ugtcmp_i16_i8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: addi a0, a0, -128
; RV64ZBB-NEXT: srli a0, a0, 8
; RV64ZBB-NEXT: sltiu a0, a0, 255
; RV64ZBB-NEXT: xori a0, a0, 1
; RV64ZBB-NEXT: ret
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
%tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1
ret i1 %tmp1
Expand Down

0 comments on commit 7d40ea8

Please sign in to comment.