Skip to content

Commit

Permalink
[LoongArch] Add codegen support for bswap
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D131352
  • Loading branch information
xen0n authored and SixWeining committed Aug 9, 2022
1 parent e8c807f commit f35cb7b
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 0 deletions.
35 changes: 35 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Expand Up @@ -69,6 +69,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
}

// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
// and i32 could still be byte-swapped relatively cheaply.
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}

static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
ISD::SETUGT, ISD::SETUGE};

Expand Down Expand Up @@ -131,6 +139,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
case ISD::BSWAP:
return SDValue();
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
case ISD::FP_TO_SINT:
Expand Down Expand Up @@ -418,6 +428,29 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
break;
}
case ISD::BSWAP: {
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
assert((VT == MVT::i16 || VT == MVT::i32) &&
"Unexpected custom legalization");
MVT GRLenVT = Subtarget.getGRLenVT();
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
SDValue Tmp;
switch (VT.getSizeInBits()) {
default:
llvm_unreachable("Unexpected operand width");
case 16:
Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
break;
case 32:
// Only LA64 will get to here due to the size mismatch between VT and
// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
break;
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
break;
}
}
}

Expand Down Expand Up @@ -847,6 +880,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVGR2FR_W_LA64)
NODE_NAME_CASE(MOVFR2GR_S_LA64)
NODE_NAME_CASE(FTINT)
NODE_NAME_CASE(REVB_2H)
NODE_NAME_CASE(REVB_2W)
}
#undef NODE_NAME_CASE
return nullptr;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Expand Up @@ -44,6 +44,9 @@ enum NodeType : unsigned {
BSTRINS,
BSTRPICK,

// Byte swapping operations
REVB_2H,
REVB_2W,
};
} // end namespace LoongArchISD

Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Expand Up @@ -55,6 +55,8 @@ def loongarch_bstrins
: SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
: SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>;
def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;

//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
Expand Down Expand Up @@ -816,6 +818,19 @@ def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
(BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>;
} // Predicates = [IsLA64]

/// Byte-swapping

def : Pat<(loongarch_revb_2h GPR:$rj), (REVB_2H GPR:$rj)>;

let Predicates = [IsLA32] in {
def : Pat<(bswap GPR:$rj), (ROTRI_W (REVB_2H GPR:$rj), 16)>;
} // Predicates = [IsLA32]

let Predicates = [IsLA64] in {
def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>;
def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>;
} // Predicates = [IsLA64]

/// Loads

multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> {
Expand Down
151 changes: 151 additions & 0 deletions llvm/test/CodeGen/LoongArch/bswap.ll
@@ -0,0 +1,151 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=loongarch32 --verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=LA32
; RUN: llc -mtriple=loongarch64 --verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=LA64

declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
declare i48 @llvm.bswap.i48(i48)
declare i64 @llvm.bswap.i64(i64)
declare i80 @llvm.bswap.i80(i80)
declare i128 @llvm.bswap.i128(i128)

define i16 @test_bswap_i16(i16 %a) nounwind {
; LA32-LABEL: test_bswap_i16:
; LA32: # %bb.0:
; LA32-NEXT: revb.2h $a0, $a0
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i16:
; LA64: # %bb.0:
; LA64-NEXT: revb.2h $a0, $a0
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %tmp
}

define i32 @test_bswap_i32(i32 %a) nounwind {
; LA32-LABEL: test_bswap_i32:
; LA32: # %bb.0:
; LA32-NEXT: revb.2h $a0, $a0
; LA32-NEXT: rotri.w $a0, $a0, 16
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i32:
; LA64: # %bb.0:
; LA64-NEXT: revb.2w $a0, $a0
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %tmp
}

define i64 @test_bswap_i64(i64 %a) nounwind {
; LA32-LABEL: test_bswap_i64:
; LA32: # %bb.0:
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a2, $a1, 16
; LA32-NEXT: revb.2h $a0, $a0
; LA32-NEXT: rotri.w $a1, $a0, 16
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i64:
; LA64: # %bb.0:
; LA64-NEXT: revb.d $a0, $a0
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %tmp
}

;; Bswap on non-native integer widths.

define i48 @test_bswap_i48(i48 %a) nounwind {
; LA32-LABEL: test_bswap_i48:
; LA32: # %bb.0:
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
; LA32-NEXT: srli.w $a1, $a1, 16
; LA32-NEXT: revb.2h $a0, $a0
; LA32-NEXT: rotri.w $a2, $a0, 16
; LA32-NEXT: slli.w $a0, $a2, 16
; LA32-NEXT: or $a0, $a1, $a0
; LA32-NEXT: srli.w $a1, $a2, 16
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i48:
; LA64: # %bb.0:
; LA64-NEXT: revb.d $a0, $a0
; LA64-NEXT: srli.d $a0, $a0, 16
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i48 @llvm.bswap.i48(i48 %a)
ret i48 %tmp
}

define i80 @test_bswap_i80(i80 %a) nounwind {
; LA32-LABEL: test_bswap_i80:
; LA32: # %bb.0:
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
; LA32-NEXT: ld.w $a3, $a1, 4
; LA32-NEXT: revb.2h $a3, $a3
; LA32-NEXT: rotri.w $a3, $a3, 16
; LA32-NEXT: srli.w $a4, $a3, 16
; LA32-NEXT: slli.w $a5, $a2, 16
; LA32-NEXT: or $a4, $a5, $a4
; LA32-NEXT: srli.w $a2, $a2, 16
; LA32-NEXT: st.h $a2, $a0, 8
; LA32-NEXT: st.w $a4, $a0, 4
; LA32-NEXT: slli.w $a2, $a3, 16
; LA32-NEXT: ld.w $a1, $a1, 8
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
; LA32-NEXT: srli.w $a1, $a1, 16
; LA32-NEXT: or $a1, $a1, $a2
; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i80:
; LA64: # %bb.0:
; LA64-NEXT: revb.d $a1, $a1
; LA64-NEXT: srli.d $a1, $a1, 48
; LA64-NEXT: revb.d $a2, $a0
; LA64-NEXT: slli.d $a0, $a2, 16
; LA64-NEXT: or $a0, $a1, $a0
; LA64-NEXT: srli.d $a1, $a2, 48
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i80 @llvm.bswap.i80(i80 %a)
ret i80 %tmp
}

define i128 @test_bswap_i128(i128 %a) nounwind {
; LA32-LABEL: test_bswap_i128:
; LA32: # %bb.0:
; LA32-NEXT: ld.w $a2, $a1, 0
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
; LA32-NEXT: st.w $a2, $a0, 12
; LA32-NEXT: ld.w $a2, $a1, 4
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
; LA32-NEXT: st.w $a2, $a0, 8
; LA32-NEXT: ld.w $a2, $a1, 8
; LA32-NEXT: revb.2h $a2, $a2
; LA32-NEXT: rotri.w $a2, $a2, 16
; LA32-NEXT: st.w $a2, $a0, 4
; LA32-NEXT: ld.w $a1, $a1, 12
; LA32-NEXT: revb.2h $a1, $a1
; LA32-NEXT: rotri.w $a1, $a1, 16
; LA32-NEXT: st.w $a1, $a0, 0
; LA32-NEXT: jirl $zero, $ra, 0
;
; LA64-LABEL: test_bswap_i128:
; LA64: # %bb.0:
; LA64-NEXT: revb.d $a2, $a1
; LA64-NEXT: revb.d $a1, $a0
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: jirl $zero, $ra, 0
%tmp = call i128 @llvm.bswap.i128(i128 %a)
ret i128 %tmp
}

0 comments on commit f35cb7b

Please sign in to comment.