881 changes: 858 additions & 23 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Large diffs are not rendered by default.

59 changes: 42 additions & 17 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,36 @@ bool isPositiveHalfWord(SDNode *N);
CALLR,

RET_FLAG, // Return with a flag operand.
BR_JT, // Jump table.
BARRIER, // Memory barrier
BR_JT, // Branch through jump table.
BARRIER, // Memory barrier.
JT, // Jump table.
CP, // Constant pool.
POPCOUNT,
COMBINE,
PACKHL,
JT,
CP,
VSPLATB,
VSPLATH,
SHUFFEB,
SHUFFEH,
SHUFFOB,
SHUFFOH,
VSXTBH,
VSXTBW,
VSRAW,
VSRAH,
VSRLW,
VSRLH,
VSHLW,
VSHLH,
VCMPBEQ,
VCMPBGT,
VCMPBGTU,
VCMPHEQ,
VCMPHGT,
VCMPHGTU,
VCMPWEQ,
VCMPWGT,
VCMPWGTU,
INSERT_ri,
INSERT_rd,
INSERT_riv,
Expand All @@ -73,17 +96,6 @@ bool isPositiveHalfWord(SDNode *N);
EXTRACTU_rd,
EXTRACTU_riv,
EXTRACTU_rdv,
WrapperCombineII,
WrapperCombineRR,
WrapperCombineRI_V4,
WrapperCombineIR_V4,
WrapperPackhl,
WrapperSplatB,
WrapperSplatH,
WrapperShuffEB,
WrapperShuffEH,
WrapperShuffOB,
WrapperShuffOH,
TC_RETURN,
EH_RETURN,
DCFETCH
Expand All @@ -98,6 +110,8 @@ bool isPositiveHalfWord(SDNode *N);
bool CanReturnSmallStruct(const Function* CalleeFn,
unsigned& RetSize) const;

void promoteLdStType(EVT VT, EVT PromotedLdStVT);

public:
const HexagonSubtarget *Subtarget;
explicit HexagonTargetLowering(const TargetMachine &TM,
Expand All @@ -123,10 +137,17 @@ bool isPositiveHalfWord(SDNode *N);

bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
// Should we expand the build vector with shuffles?
bool shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
Expand All @@ -150,9 +171,13 @@ bool isPositiveHalfWord(SDNode *N);
const SmallVectorImpl<SDValue> &OutVals,
SDValue Callee) const;

SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
Expand Down
51 changes: 51 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
}
bool
HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
const HexagonRegisterInfo &TRI = getRegisterInfo();
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
Expand All @@ -587,6 +589,55 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MBB.erase(MI);
return true;
}
case Hexagon::VMULW: {
// Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
unsigned DstReg = MI->getOperand(0).getReg();
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
MRI.clearKillFlags(Src1SubLo);
MRI.clearKillFlags(Src2SubHi);
MRI.clearKillFlags(Src2SubLo);
return true;
}
case Hexagon::VMULW_ACC: {
// Expand 64-bit vector multiply with addition into 2 scalar multiplies.
unsigned DstReg = MI->getOperand(0).getReg();
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src3Reg = MI->getOperand(3).getReg();
unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi).addReg(Src3SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo).addReg(Src3SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
MRI.clearKillFlags(Src1SubLo);
MRI.clearKillFlags(Src2SubHi);
MRI.clearKillFlags(Src2SubLo);
MRI.clearKillFlags(Src3SubHi);
MRI.clearKillFlags(Src3SubLo);
return true;
}
case Hexagon::TCRETURNi:
MI->setDesc(get(Hexagon::J2_jump));
return true;
Expand Down
418 changes: 418 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-anyextend.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Cannot select: 0x17300f0: v2i32 = any_extend"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout =
"e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
%_p_vec_full48 = load <4 x i8>, <4 x i8>* undef, align 8
%0 = zext <4 x i8> %_p_vec_full48 to <4 x i32>
store <4 x i32> %0, <4 x i32>* undef, align 8
unreachable
}
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-apint-truncate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Invalid APInt Truncate request".
; Used to fail with "Cannot select: 0x596010: v2i32 = sign_extend_inreg".

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
br label %polly.loop_header

polly.loop_after: ; preds = %polly.loop_header
unreachable

polly.loop_header: ; preds = %polly.loop_body, %entry
%0 = icmp sle i32 undef, 63
br i1 %0, label %polly.loop_body, label %polly.loop_after

polly.loop_body: ; preds = %polly.loop_header
%_p_vec_full = load <4 x i8>, <4 x i8>* undef, align 8
%1 = sext <4 x i8> %_p_vec_full to <4 x i32>
%p_vec = mul <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
%mulp_vec = add <4 x i32> %p_vec, <i32 21, i32 21, i32 21, i32 21>
store <4 x i32> %mulp_vec, <4 x i32>* undef, align 8
br label %polly.loop_header
}
61 changes: 61 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-bad-bitcast.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
; REQUIRES: asserts
; Check for successful compilation.

target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
target triple = "hexagon"

@input_buf = internal unnamed_addr constant [256 x i16] [i16 0, i16 0, i16 0, i16 1280, i16 2560, i16 4864, i16 7168, i16 9472, i16 11776, i16 12672, i16 13568, i16 14080, i16 15360, i16 15360, i16 15360, i16 15360, i16 15360, i16 15104, i16 14848, i16 14592, i16 14336, i16 14080, i16 14080, i16 13952, i16 13824, i16 13696, i16 13568, i16 13440, i16 13312, i16 13184, i16 13056, i16 12928, i16 12800, i16 12800, i16 12800, i16 12800, i16 12800, i16 12672, i16 12544, i16 12544, i16 12544, i16 12544, i16 12672, i16 12800, i16 12800, i16 12928, i16 13056, i16 13184, i16 13312, i16 13440, i16 13568, i16 13696, i16 13824, i16 14208, i16 14592, i16 14976, i16 15104, i16 15360, i16 15616, i16 15872, i16 16128, i16 16512, i16 16896, i16 17152, i16 17408, i16 17536, i16 17664, i16 17792, i16 17920, i16 18304, i16 18688, i16 19072, i16 19456, i16 19712, i16 19968, i16 20224, i16 20480, i16 20608, i16 20864, i16 20992, i16 21248, i16 21248, i16 21248, i16 21248, i16 21248, i16 21248, i16 21376, i16 21504, i16 21760, i16 21760, i16 21632, i16 21504, i16 21504, i16 21632, i16 21632, i16 21504, i16 21504, i16 21376, i16 21248, i16 21120, i16 20992, i16 20992, i16 20864, i16 20736, i16 20736, i16 20736, i16 20480, i16 20352, i16 20224, i16 20224, i16 20224, i16 20224, i16 20352, i16 20352, i16 20480, i16 20352, i16 20352, i16 20352, i16 20352, i16 20224, i16 20224, i16 20224, i16 20096, i16 20096, i16 19968, i16 19840, i16 19712, i16 19584, i16 19456, i16 19584, i16 19584, i16 19456, i16 19456, i16 19328, i16 19328, i16 19456, i16 19456, i16 19328, i16 19328, i16 19200, i16 19200, i16 19200, i16 19072, i16 19072, i16 18944, i16 18816, i16 18688, i16 18560, i16 18432, i16 18304, i16 18304, i16 18176, i16 18176, i16 18176, i16 18304, i16 18304, i16 18432, i16 18560, i16 18432, i16 18176, i16 17920, i16 17920, i16 17792, i16 17792, i16 17664, i16 17664, i16 17536, i16 17536, i16 17408, i16 17408, i16 17280, i16 17280, i16 17280, i16 17152, i16 17152, i16 17152, i16 17152, i16 17024, i16 17024, i16 16896, i16 16896, i16 16896, i16 16768, i16 16768, i16 16640, i16 16640, i16 16512, i16 16512, i16 16384, i16 16256, i16 16128, i16 16000, i16 15872, i16 15744, i16 15616, i16 15488, i16 15360, i16 15488, i16 15360, i16 15232, i16 15360, i16 15232, i16 15104, i16 14976, i16 14336, i16 14336, i16 14592, i16 14464, i16 13824, i16 13824, i16 13568, i16 13568, i16 13440, i16 13312, i16 13184, i16 13056, i16 13056, i16 13056, i16 12928, i16 12800, i16 12672, i16 12672, i16 12544, i16 12416, i16 12288, i16 12160, i16 11904, i16 11776, i16 11571, i16 11520, i16 11392, i16 11136, i16 10905, i16 10752, i16 10624, i16 10444, i16 10240, i16 9984, i16 9728, i16 9472, i16 9216, i16 8960, i16 8704, i16 8448, i16 8192, i16 7936, i16 7680, i16 7424, i16 7168, i16 6400, i16 5632, i16 4864, i16 3584, i16 1536, i16 0, i16 0], align 8

; Function Attrs: nounwind
define i32 @t_run_test() #0 {
entry:
%WaterLeveldB_out = alloca i16, align 2
br label %polly.stmt.for.body

for.body8: ; preds = %for.body8, %polly.loop_exit.loopexit
%i.120 = phi i32 [ 0, %polly.loop_exit.loopexit ], [ %inc11.24, %for.body8 ]
%call = call i32 bitcast (i32 (...)* @fxpBitAllocation to i32 (i32, i32, i32, i32, i16*, i32, i32, i32)*)(i32 0, i32 0, i32 256, i32 %conv9, i16* %WaterLeveldB_out, i32 0, i32 1920, i32 %i.120) #2
%inc11.24 = add i32 %i.120, 25
%exitcond.24 = icmp eq i32 %inc11.24, 500
br i1 %exitcond.24, label %for.end12, label %for.body8

for.end12: ; preds = %for.body8
ret i32 0

polly.loop_exit.loopexit: ; preds = %polly.stmt.for.body
%WaterLeveldB.1p_vsel.lcssa = phi <4 x i16> [ %WaterLeveldB.1p_vsel, %polly.stmt.for.body ]
%_low_half = shufflevector <4 x i16> %WaterLeveldB.1p_vsel.lcssa, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%_high_half = shufflevector <4 x i16> %WaterLeveldB.1p_vsel.lcssa, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
%0 = icmp sgt <2 x i16> %_low_half, %_high_half
%1 = select <2 x i1> %0, <2 x i16> %_low_half, <2 x i16> %_high_half
%2 = extractelement <2 x i16> %1, i32 0
%3 = extractelement <2 x i16> %1, i32 1
%4 = icmp sgt i16 %2, %3
%5 = select i1 %4, i16 %2, i16 %3
%conv9 = sext i16 %5 to i32
br label %for.body8

polly.stmt.for.body: ; preds = %entry, %polly.stmt.for.body
%WaterLeveldB.1p_vsel35 = phi <4 x i16> [ <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, %entry ], [ %WaterLeveldB.1p_vsel, %polly.stmt.for.body ]
%scevgep.phi = phi i16* [ getelementptr inbounds ([256 x i16], [256 x i16]* @input_buf, i32 0, i32 0), %entry ], [ %scevgep.inc, %polly.stmt.for.body ]
%polly.indvar = phi i32 [ 0, %entry ], [ %polly.indvar_next, %polly.stmt.for.body ]
%vector_ptr = bitcast i16* %scevgep.phi to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 8
%cmp2p_vicmp = icmp sgt <4 x i16> %_p_vec_full, %WaterLeveldB.1p_vsel35
%WaterLeveldB.1p_vsel = select <4 x i1> %cmp2p_vicmp, <4 x i16> %_p_vec_full, <4 x i16> %WaterLeveldB.1p_vsel35
%polly.indvar_next = add nsw i32 %polly.indvar, 4
%polly.loop_cond = icmp slt i32 %polly.indvar, 252
%scevgep.inc = getelementptr i16, i16* %scevgep.phi, i32 4
br i1 %polly.loop_cond, label %polly.stmt.for.body, label %polly.loop_exit.loopexit
}

declare i32 @fxpBitAllocation(...) #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.ident = !{!0}

!0 = !{!"QuIC LLVM Hexagon Clang version 3.1"}
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-bitcast-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; RUN: llc -march=hexagon < %s
; REQUIRES: asserts
; Used to fail with: Assertion `VT.getSizeInBits() == Operand.getValueType().getSizeInBits() && "Cannot BITCAST between types of different sizes!"' failed.

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
br label %while.body

while.body: ; preds = %if.then155, %if.then12, %entry
%cmp.i = icmp eq i8* undef, null
br i1 %cmp.i, label %lab_ci.exit, label %if.end.i

if.end.i: ; preds = %while.body
unreachable

lab_ci.exit: ; preds = %while.body
br i1 false, label %if.then, label %if.else

if.then: ; preds = %lab_ci.exit
unreachable

if.else: ; preds = %lab_ci.exit
br i1 undef, label %if.then12, label %if.else17

if.then12: ; preds = %if.else
br label %while.body

if.else17: ; preds = %if.else
br i1 false, label %if.then22, label %if.else35

if.then22: ; preds = %if.else17
unreachable

if.else35: ; preds = %if.else17
br i1 false, label %if.then40, label %if.else83

if.then40: ; preds = %if.else35
unreachable

if.else83: ; preds = %if.else35
br i1 false, label %if.then88, label %if.else150

if.then88: ; preds = %if.else83
unreachable

if.else150: ; preds = %if.else83
%cmp154 = icmp eq i32 undef, 0
br i1 %cmp154, label %if.then155, label %if.else208

if.then155: ; preds = %if.else150
%call191 = call i32 @strtol() nounwind
%conv192 = trunc i32 %call191 to i16
%_p_splat_one = insertelement <1 x i16> undef, i16 %conv192, i32 0
%_p_splat = shufflevector <1 x i16> %_p_splat_one, <1 x i16> undef, <2 x i32> zeroinitializer
%0 = sext <2 x i16> %_p_splat to <2 x i32>
%mul198p_vec = shl <2 x i32> %0, <i32 2, i32 2>
%1 = extractelement <2 x i32> %mul198p_vec, i32 0
store i32 %1, i32* null, align 4
br label %while.body

if.else208: ; preds = %if.else150
unreachable
}

declare i32 @strtol() nounwind
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-bitcast.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc -march=hexagon < %s
; REQUIRES: asserts
; Used to fail with "Cannot BITCAST between types of different sizes!"

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define void @foo() nounwind {
entry:
br label %while.body

while.body: ; preds = %if.then155, %if.then12, %if.then, %entry
br i1 undef, label %if.then, label %if.else

if.then: ; preds = %while.body
br label %while.body

if.else: ; preds = %while.body
br i1 undef, label %if.then12, label %if.else17

if.then12: ; preds = %if.else
br label %while.body

if.else17: ; preds = %if.else
br i1 false, label %if.then22, label %if.else35

if.then22: ; preds = %if.else17
unreachable

if.else35: ; preds = %if.else17
br i1 false, label %if.then40, label %if.else83

if.then40: ; preds = %if.else35
unreachable

if.else83: ; preds = %if.else35
br i1 false, label %if.then88, label %if.else150

if.then88: ; preds = %if.else83
unreachable

if.else150: ; preds = %if.else83
%cmp154 = icmp eq i32 undef, 0
br i1 %cmp154, label %if.then155, label %if.else208

if.then155: ; preds = %if.else150
%_p_splat.1 = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <2 x i32> zeroinitializer
%0 = sext <2 x i16> %_p_splat.1 to <2 x i32>
%mul198p_vec.1 = mul <2 x i32> %0, <i32 4, i32 4>
%1 = extractelement <2 x i32> %mul198p_vec.1, i32 0
store i32 %1, i32* undef, align 4
br label %while.body

if.else208: ; preds = %if.else150
unreachable
}
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; This one should generate a combine with two immediates.
; CHECK: combine(#7, #7)
@B = common global [400 x i32] zeroinitializer, align 8
@A = common global [400 x i32] zeroinitializer, align 8
@C = common global [400 x i32] zeroinitializer, align 8

define void @run() nounwind {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv23 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv23, 4
%p_arrayidx1 = getelementptr [400 x i32], [400 x i32]* @A, i32 0, i32 %polly.loopiv23
%p_arrayidx = getelementptr [400 x i32], [400 x i32]* @B, i32 0, i32 %polly.loopiv23
%vector_ptr = bitcast i32* %p_arrayidx to <4 x i32>*
%_p_vec_full = load <4 x i32>, <4 x i32>* %vector_ptr, align 8
%mulp_vec = mul <4 x i32> %_p_vec_full, <i32 7, i32 7, i32 7, i32 7>
%vector_ptr12 = bitcast i32* %p_arrayidx1 to <4 x i32>*
%_p_vec_full13 = load <4 x i32>, <4 x i32>* %vector_ptr12, align 8
%addp_vec = add <4 x i32> %_p_vec_full13, %mulp_vec
store <4 x i32> %addp_vec, <4 x i32>* %vector_ptr12, align 8
%0 = icmp slt i32 %polly.next_loopiv, 400
br i1 %0, label %polly.loop_body, label %polly.loop_after
}
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-cst-v4i8.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Make sure we can build the constant vector <1, 2, 3, 4>
; CHECK-DAG: ##B
; CHECK-DAG: ##A
@B = common global [400 x i8] zeroinitializer, align 8
@A = common global [400 x i8] zeroinitializer, align 8
@C = common global [400 x i8] zeroinitializer, align 8

define void @run() nounwind {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add i32 %polly.loopiv25, 4
%p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
%p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
%vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
%_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
%mulp_vec = mul <4 x i8> %_p_vec_full, <i8 1, i8 2, i8 3, i8 4>
%vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
%_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
%addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
%0 = icmp slt i32 %polly.next_loopiv, 400
br i1 %0, label %polly.loop_body, label %polly.loop_after
}
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-cst.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; Make sure we can build the constant vector <7, 7, 7, 7>
; CHECK: vaddub
@B = common global [400 x i8] zeroinitializer, align 8
@A = common global [400 x i8] zeroinitializer, align 8
@C = common global [400 x i8] zeroinitializer, align 8

define void @run() nounwind {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add i32 %polly.loopiv25, 4
%p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
%p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
%vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
%_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
%mulp_vec = mul <4 x i8> %_p_vec_full, <i8 7, i8 7, i8 7, i8 7>
%vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
%_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
%addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
%0 = icmp slt i32 %polly.next_loopiv, 400
br i1 %0, label %polly.loop_body, label %polly.loop_after
}
96 changes: 96 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-extract.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s

; Check that we do not generate extract.
; CHECK-NOT: extractu
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define void @foo(i32 %N, i32* nocapture %C, i16* nocapture %A, i16 signext %val) #0 {
entry:
%cmp14 = icmp eq i32 %N, 0
br i1 %cmp14, label %for.end11, label %for.cond1.preheader.single_entry.preheader

for.cond1.preheader.single_entry.preheader: ; preds = %entry
%0 = add i32 %N, -1
%leftover_lb = and i32 %0, -2
%p_conv4 = sext i16 %val to i32
br label %for.cond1.preheader.single_entry

for.cond1.preheader.single_entry: ; preds = %for.inc9, %for.cond1.preheader.single_entry.preheader
%indvar = phi i32 [ %indvar.next, %for.inc9 ], [ 0, %for.cond1.preheader.single_entry.preheader ]
%1 = mul i32 %indvar, %N
%.not = icmp slt i32 %N, 2
%.not41 = icmp slt i32 %leftover_lb, 1
%brmerge = or i1 %.not, %.not41
%.mux = select i1 %.not, i32 0, i32 %leftover_lb
br i1 %brmerge, label %polly.loop_header26.preheader, label %polly.loop_body.lr.ph

for.inc9.loopexit: ; preds = %polly.stmt.for.body331
br label %for.inc9

for.inc9: ; preds = %for.inc9.loopexit, %polly.loop_header26.preheader
%indvar.next = add i32 %indvar, 1
%exitcond40 = icmp eq i32 %indvar.next, %N
br i1 %exitcond40, label %for.end11.loopexit, label %for.cond1.preheader.single_entry

for.end11.loopexit: ; preds = %for.inc9
br label %for.end11

for.end11: ; preds = %for.end11.loopexit, %entry
ret void

polly.loop_body.lr.ph: ; preds = %for.cond1.preheader.single_entry
%2 = call i64 @llvm.hexagon.A2.combinew(i32 %1, i32 %1)
%3 = bitcast i64 %2 to <2 x i32>
%4 = extractelement <2 x i32> %3, i32 0
%5 = call i64 @llvm.hexagon.A2.combinew(i32 %p_conv4, i32 %p_conv4)
%6 = bitcast i64 %5 to <2 x i32>
%p_arrayidx8.gep = getelementptr i32, i32* %C, i32 %4
%p_arrayidx.gep = getelementptr i16, i16* %A, i32 %4
br label %polly.loop_body

polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
%p_arrayidx8.phi = phi i32* [ %p_arrayidx8.gep, %polly.loop_body.lr.ph ], [ %p_arrayidx8.inc, %polly.loop_body ]
%p_arrayidx.phi = phi i16* [ %p_arrayidx.gep, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
%polly.loopiv38 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv38, 2
%vector_ptr = bitcast i16* %p_arrayidx.phi to <2 x i16>*
%_p_vec_full = load <2 x i16>, <2 x i16>* %vector_ptr, align 2
%7 = sext <2 x i16> %_p_vec_full to <2 x i32>
%mul5p_vec = mul <2 x i32> %7, %6
%vector_ptr21 = bitcast i32* %p_arrayidx8.phi to <2 x i32>*
store <2 x i32> %mul5p_vec, <2 x i32>* %vector_ptr21, align 4
%8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
%p_arrayidx8.inc = getelementptr i32, i32* %p_arrayidx8.phi, i32 2
%p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 2
br i1 %8, label %polly.loop_body, label %polly.loop_header26.preheader.loopexit

polly.loop_header26.preheader.loopexit: ; preds = %polly.loop_body
br label %polly.loop_header26.preheader

polly.loop_header26.preheader: ; preds = %polly.loop_header26.preheader.loopexit, %for.cond1.preheader.single_entry
%polly.loopiv29.ph = phi i32 [ %.mux, %for.cond1.preheader.single_entry ], [ %leftover_lb, %polly.loop_header26.preheader.loopexit ]
%9 = icmp slt i32 %polly.loopiv29.ph, %N
br i1 %9, label %polly.stmt.for.body331.preheader, label %for.inc9

polly.stmt.for.body331.preheader: ; preds = %polly.loop_header26.preheader
br label %polly.stmt.for.body331

polly.stmt.for.body331: ; preds = %polly.stmt.for.body331.preheader, %polly.stmt.for.body331
%polly.loopiv2939 = phi i32 [ %polly.next_loopiv30, %polly.stmt.for.body331 ], [ %polly.loopiv29.ph, %polly.stmt.for.body331.preheader ]
%polly.next_loopiv30 = add nsw i32 %polly.loopiv2939, 1
%p_32 = add i32 %polly.loopiv2939, %1
%p_arrayidx833 = getelementptr i32, i32* %C, i32 %p_32
%p_arrayidx34 = getelementptr i16, i16* %A, i32 %p_32
%_p_scalar_ = load i16, i16* %p_arrayidx34, align 2
%p_conv = sext i16 %_p_scalar_ to i32
%p_mul5 = mul nsw i32 %p_conv, %p_conv4
store i32 %p_mul5, i32* %p_arrayidx833, align 4
%exitcond = icmp eq i32 %polly.next_loopiv30, %N
br i1 %exitcond, label %for.inc9.loopexit, label %polly.stmt.for.body331
}

declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-fma.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
; REQUIRES: asserts
; Used to fail with "SplitVectorResult #0: 0x16cbe60: v4f64 = fma"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @run() nounwind {
entry:
br label %polly.loop_header

polly.loop_after: ; preds = %polly.loop_header
ret void

polly.loop_header: ; preds = %polly.loop_body, %entry
%0 = icmp sle i32 undef, 399
br i1 %0, label %polly.loop_body, label %polly.loop_after

polly.loop_body: ; preds = %polly.loop_header
%_p_vec_full = load <4 x double>, <4 x double>* undef, align 8
%mulp_vec = fmul <4 x double> %_p_vec_full, <double 7.000000e+00, double 7.000000e+00, double 7.000000e+00, double 7.000000e+00>
%addp_vec = fadd <4 x double> undef, %mulp_vec
store <4 x double> %addp_vec, <4 x double>* undef, align 8
br label %polly.loop_header
}
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-illegal-type.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
; RUN: llc -march=hexagon < %s
; REQUIRES: asserts
; Used to fail with "Unexpected illegal type!"
; Used to fail with "Cannot select: ch = store x,x,x,<ST4[undef](align=8), trunc to v4i8>"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
br i1 undef, label %for.end, label %for.body

for.end: ; preds = %for.body
br label %for.body71

for.body71: ; preds = %for.body71, %for.end
br i1 undef, label %for.end96, label %for.body71

for.end96: ; preds = %for.body71
switch i32 undef, label %sw.epilog [
i32 1, label %for.cond375.preheader
i32 8, label %for.cond591
]

for.cond375.preheader: ; preds = %for.end96
br label %polly.loop_header228

for.cond591: ; preds = %for.end96
br label %for.body664

for.body664: ; preds = %for.body664, %for.cond591
br i1 undef, label %for.end670, label %for.body664

for.end670: ; preds = %for.body664
br label %sw.epilog

sw.epilog: ; preds = %for.end670, %for.end96
ret void

polly.loop_header228: ; preds = %polly.loop_header228, %for.cond375.preheader
%_p_splat_one = load <1 x i16>, <1 x i16>* undef, align 8
%_p_splat = shufflevector <1 x i16> %_p_splat_one, <1 x i16> %_p_splat_one, <4 x i32> zeroinitializer
%0 = trunc <4 x i16> %_p_splat to <4 x i8>
store <4 x i8> %0, <4 x i8>* undef, align 8
br label %polly.loop_header228
}
71 changes: 71 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-insert-extract-elt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
; RUN: llc -march=hexagon < %s
; Used to fail with an infinite recursion in the insn selection.
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon-unknown-linux-gnu"

%struct.elt = type { [2 x [4 x %struct.block]] }
%struct.block = type { [2 x i16] }

define void @foo(%struct.elt* noalias nocapture %p0, %struct.elt* noalias nocapture %p1) nounwind {
entry:
%arrayidx1 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 3
%arrayidx4 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 2
%arrayidx7 = getelementptr inbounds %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 3
%0 = bitcast %struct.block* %arrayidx7 to i32*
%1 = bitcast %struct.block* %arrayidx4 to i32*
%2 = load i32, i32* %0, align 4
store i32 %2, i32* %1, align 4
%3 = bitcast %struct.block* %arrayidx1 to i32*
store i32 %2, i32* %3, align 4
%arrayidx10 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 1
%arrayidx16 = getelementptr inbounds %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 2
%4 = bitcast %struct.block* %arrayidx16 to i32*
%5 = bitcast %struct.elt* %p1 to i32*
%6 = load i32, i32* %4, align 4
store i32 %6, i32* %5, align 4
%7 = bitcast %struct.block* %arrayidx10 to i32*
store i32 %6, i32* %7, align 4
%p_arrayidx26 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1
%p_arrayidx2632 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1
%p_arrayidx2633 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 1
%p_arrayidx2634 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 1
%p_arrayidx20 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1
%p_arrayidx2035 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1
%p_arrayidx2036 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 2, i32 0, i32 1
%p_arrayidx2037 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 3, i32 0, i32 1
%8 = lshr i32 %6, 16
%9 = trunc i32 %8 to i16
%_p_vec_ = insertelement <4 x i16> undef, i16 %9, i32 0
%_p_vec_39 = insertelement <4 x i16> %_p_vec_, i16 %9, i32 1
%10 = lshr i32 %2, 16
%11 = trunc i32 %10 to i16
%_p_vec_41 = insertelement <4 x i16> %_p_vec_39, i16 %11, i32 2
%_p_vec_43 = insertelement <4 x i16> %_p_vec_41, i16 %11, i32 3
%shlp_vec = shl <4 x i16> %_p_vec_43, <i16 1, i16 1, i16 1, i16 1>
%12 = extractelement <4 x i16> %shlp_vec, i32 0
store i16 %12, i16* %p_arrayidx20, align 2
%13 = extractelement <4 x i16> %shlp_vec, i32 1
store i16 %13, i16* %p_arrayidx2035, align 2
%14 = extractelement <4 x i16> %shlp_vec, i32 2
store i16 %14, i16* %p_arrayidx2036, align 2
%15 = extractelement <4 x i16> %shlp_vec, i32 3
store i16 %15, i16* %p_arrayidx2037, align 2
%_p_scalar_44 = load i16, i16* %p_arrayidx26, align 2
%_p_vec_45 = insertelement <4 x i16> undef, i16 %_p_scalar_44, i32 0
%_p_scalar_46 = load i16, i16* %p_arrayidx2632, align 2
%_p_vec_47 = insertelement <4 x i16> %_p_vec_45, i16 %_p_scalar_46, i32 1
%_p_scalar_48 = load i16, i16* %p_arrayidx2633, align 2
%_p_vec_49 = insertelement <4 x i16> %_p_vec_47, i16 %_p_scalar_48, i32 2
%_p_scalar_50 = load i16, i16* %p_arrayidx2634, align 2
%_p_vec_51 = insertelement <4 x i16> %_p_vec_49, i16 %_p_scalar_50, i32 3
%shl28p_vec = shl <4 x i16> %_p_vec_51, <i16 1, i16 1, i16 1, i16 1>
%16 = extractelement <4 x i16> %shl28p_vec, i32 0
store i16 %16, i16* %p_arrayidx26, align 2
%17 = extractelement <4 x i16> %shl28p_vec, i32 1
store i16 %17, i16* %p_arrayidx2632, align 2
%18 = extractelement <4 x i16> %shl28p_vec, i32 2
store i16 %18, i16* %p_arrayidx2633, align 2
%19 = extractelement <4 x i16> %shl28p_vec, i32 3
store i16 %19, i16* %p_arrayidx2634, align 2
ret void
}
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>", 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
br label %polly.loop_header

polly.loop_after: ; preds = %polly.loop_header
unreachable

polly.loop_header: ; preds = %polly.loop_body, %entry
%0 = icmp sle i32 undef, 63
br i1 %0, label %polly.loop_body, label %polly.loop_after

polly.loop_body: ; preds = %polly.loop_header
%_p_vec_full = load <2 x i8>, <2 x i8>* undef, align 8
%1 = sext <2 x i8> %_p_vec_full to <2 x i32>
%p_vec = mul <2 x i32> %1, <i32 3, i32 3>
%mulp_vec = add <2 x i32> %p_vec, <i32 21, i32 21>
store <2 x i32> %mulp_vec, <2 x i32>* undef, align 8
br label %polly.loop_header
}
76 changes: 76 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-load.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Cannot select: 0x16cf370: v2i16,ch = load"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

%struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958 = type { i8, i8, i8, i8, i8, i8, i16, i32, [8 x %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957] }
%struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957 = type { i8, i8, i16 }

define void @foo(%struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* %hc_ext_info) nounwind {
entry:
br i1 undef, label %if.end, label %if.then

if.then: ; preds = %entry
unreachable

if.end: ; preds = %entry
br i1 undef, label %if.end5, label %if.then3

if.then3: ; preds = %if.end
br label %if.end5

if.end5: ; preds = %if.then3, %if.end
%add.ptr = getelementptr inbounds %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958, %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* %hc_ext_info, i32 0, i32 8, i32 0
%add.ptr22 = getelementptr inbounds %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958, %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* null, i32 0, i32 8, i32 undef
br label %while.cond

while.cond: ; preds = %if.end419, %if.end5
%gre_chksum.0 = phi <2 x i8> [ undef, %if.end5 ], [ %gre_chksum.2, %if.end419 ]
%cmp23 = icmp ult %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957* null, %add.ptr
%cmp25 = icmp ult %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957* null, %add.ptr22
%sel1 = and i1 %cmp23, %cmp25
br i1 %sel1, label %while.body, label %while.end422

while.body: ; preds = %while.cond
switch i8 undef, label %if.end419 [
i8 5, label %if.then70
i8 3, label %if.then70
i8 2, label %if.then70
i8 1, label %if.then70
i8 0, label %if.then70
i8 4, label %if.then93
i8 6, label %if.then195
]

if.then70: ; preds = %while.body, %while.body, %while.body, %while.body, %while.body
unreachable

if.then93: ; preds = %while.body
unreachable

if.then195: ; preds = %while.body
br i1 undef, label %if.end274, label %if.then202

if.then202: ; preds = %if.then195
br label %while.body222

while.body222: ; preds = %while.body222, %if.then202
br i1 undef, label %if.end240, label %while.body222

if.end240: ; preds = %while.body222
%_p_vec_full100 = load <2 x i8>, <2 x i8>* undef, align 8
br label %if.end274

if.end274: ; preds = %if.end240, %if.then195
%gre_chksum.1 = phi <2 x i8> [ %gre_chksum.0, %if.then195 ], [ %_p_vec_full100, %if.end240 ]
br label %if.end419

if.end419: ; preds = %if.end274, %while.body
%gre_chksum.2 = phi <2 x i8> [ %gre_chksum.0, %while.body ], [ %gre_chksum.1, %if.end274 ]
br label %while.cond

while.end422: ; preds = %while.cond
ret void
}
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s

; Check that store is post-incremented.
; CHECK: memuh(r{{[0-9]+}} + {{ *}}#6{{ *}})
; CHECK: combine(r{{[0-9]+}}{{ *}},{{ *}}r{{[0-9]+}}{{ *}})
; CHECK: vaddh

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define void @matrix_add_const(i32 %N, i16* nocapture %A, i16 signext %val) #0 {
entry:
%cmp5 = icmp eq i32 %N, 0
br i1 %cmp5, label %for.end, label %polly.cond

for.end.loopexit: ; preds = %polly.stmt.for.body29
br label %for.end

for.end: ; preds = %for.end.loopexit, %polly.loop_header24.preheader, %entry
ret void

polly.cond: ; preds = %entry
%0 = icmp sgt i32 %N, 3
br i1 %0, label %polly.then, label %polly.loop_header24.preheader

polly.then: ; preds = %polly.cond
%1 = add i32 %N, -1
%leftover_lb = and i32 %1, -4
%2 = icmp sgt i32 %leftover_lb, 0
br i1 %2, label %polly.loop_body.lr.ph, label %polly.loop_header24.preheader

polly.loop_body.lr.ph: ; preds = %polly.then
%3 = insertelement <4 x i16> undef, i16 %val, i32 0
%4 = insertelement <4 x i16> %3, i16 %val, i32 1
%5 = insertelement <4 x i16> %4, i16 %val, i32 2
%6 = insertelement <4 x i16> %5, i16 %val, i32 3
br label %polly.loop_body

polly.loop_header24.preheader.loopexit: ; preds = %polly.loop_body
br label %polly.loop_header24.preheader

polly.loop_header24.preheader: ; preds = %polly.loop_header24.preheader.loopexit, %polly.then, %polly.cond
%polly.loopiv27.ph = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.then ], [ %leftover_lb, %polly.loop_header24.preheader.loopexit ]
%7 = icmp slt i32 %polly.loopiv27.ph, %N
br i1 %7, label %polly.stmt.for.body29.preheader, label %for.end

polly.stmt.for.body29.preheader: ; preds = %polly.loop_header24.preheader
br label %polly.stmt.for.body29

polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
%p_arrayidx.phi = phi i16* [ %A, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
%polly.loopiv34 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv34, 4
%vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
%addp_vec = add <4 x i16> %_p_vec_full, %6
store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr, align 2
%8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
%p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
br i1 %8, label %polly.loop_body, label %polly.loop_header24.preheader.loopexit

polly.stmt.for.body29: ; preds = %polly.stmt.for.body29.preheader, %polly.stmt.for.body29
%polly.loopiv2733 = phi i32 [ %polly.next_loopiv28, %polly.stmt.for.body29 ], [ %polly.loopiv27.ph, %polly.stmt.for.body29.preheader ]
%polly.next_loopiv28 = add nsw i32 %polly.loopiv2733, 1
%p_arrayidx30 = getelementptr i16, i16* %A, i32 %polly.loopiv2733
%_p_scalar_ = load i16, i16* %p_arrayidx30, align 2
%p_add = add i16 %_p_scalar_, %val
store i16 %p_add, i16* %p_arrayidx30, align 2
%exitcond = icmp eq i32 %polly.next_loopiv28, %N
br i1 %exitcond, label %for.end.loopexit, label %polly.stmt.for.body29
}

attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-mul-v2i16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vmpyh
; CHECK: vtrunewh

define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
entry:
%0 = mul <2 x i16> %a, %b
ret <2 x i16> %0
}
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-mul-v2i32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: mpyi
; CHECK: mpyi

define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
entry:
%0 = mul <2 x i32> %a, %b
ret <2 x i32> %0
}
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vmpyh
; CHECK: vmpyh
; CHECK: vtrunewh

define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
entry:
%0 = mul <4 x i16> %a, %b
ret <4 x i16> %0
}
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; CHECK: vmpybsu
; CHECK: vtrunehb

define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
entry:
%0 = mul <4 x i8> %a, %b
ret <4 x i8> %0
}
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; CHECK: vmpybsu
; CHECK: vmpybsu

define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
entry:
%0 = mul <8 x i8> %a, %b
ret <8 x i8> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-no-tfrs-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK-NOT: r1:0 = r1:0

define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
entry:
%0 = mul <4 x i16> %a, %b
ret <4 x i16> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-no-tfrs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK-NOT: r1:0 = combine(r1, r0)

define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
entry:
%0 = mul <4 x i8> %a, %b
ret <4 x i8> %0
}
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-packhl.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; Extracted from test/CodeGen/Generic/vector-casts.ll: used to loop indefinitely.
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: packhl

define void @a(<2 x double>* %p, <2 x i8>* %q) {
%t = load <2 x double>, <2 x double>* %p
%r = fptosi <2 x double> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-shift-imm.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASLW
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASRW
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-LSRW
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASLH
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASRH
; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-LSRH
;
; Make sure that the instructions with immediate operands are generated.
; CHECK-ASLW: vaslw({{.*}}, #9)
; CHECK-ASRW: vasrw({{.*}}, #8)
; CHECK-LSRW: vlsrw({{.*}}, #7)
; CHECK-ASLH: vaslh({{.*}}, #6)
; CHECK-ASRH: vasrh({{.*}}, #5)
; CHECK-LSRH: vlsrh({{.*}}, #4)

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define i64 @foo(i64 %x) nounwind readnone {
entry:
%0 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %x, i32 9)
%1 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %x, i32 8)
%2 = tail call i64 @llvm.hexagon.S2.lsr.i.vw(i64 %x, i32 7)
%3 = tail call i64 @llvm.hexagon.S2.asl.i.vh(i64 %x, i32 6)
%4 = tail call i64 @llvm.hexagon.S2.asr.i.vh(i64 %x, i32 5)
%5 = tail call i64 @llvm.hexagon.S2.lsr.i.vh(i64 %x, i32 4)
%add = add i64 %1, %0
%add1 = add i64 %add, %2
%add2 = add i64 %add1, %3
%add3 = add i64 %add2, %4
%add4 = add i64 %add3, %5
ret i64 %add4
}

declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) nounwind readnone
declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32) nounwind readnone
declare i64 @llvm.hexagon.S2.lsr.i.vw(i64, i32) nounwind readnone
declare i64 @llvm.hexagon.S2.asl.i.vh(i64, i32) nounwind readnone
declare i64 @llvm.hexagon.S2.asr.i.vh(i64, i32) nounwind readnone
declare i64 @llvm.hexagon.S2.lsr.i.vh(i64, i32) nounwind readnone

47 changes: 47 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-shuffle.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s

; Check that store is post-incremented.
; CHECK-NOT: extractu
; CHECK-NOT: insert
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define i32 @foo(i16* noalias nocapture %src, i16* noalias nocapture %dstImg, i32 %width, i32 %idx, i32 %flush) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.combinew(i32 %flush, i32 %flush)
%1 = bitcast i64 %0 to <2 x i32>
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret i32 0

polly.loop_body: ; preds = %entry, %polly.loop_body
%p_arrayidx35.phi = phi i16* [ %dstImg, %entry ], [ %p_arrayidx35.inc, %polly.loop_body ]
%p_arrayidx.phi = phi i16* [ %src, %entry ], [ %p_arrayidx.inc, %polly.loop_body ]
%polly.loopiv56 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv56, 4
%vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
%_high_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
%_low_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%2 = zext <2 x i16> %_low_half to <2 x i32>
%3 = zext <2 x i16> %_high_half to <2 x i32>
%add33p_vec = add <2 x i32> %2, %1
%add33p_vec48 = add <2 x i32> %3, %1
%4 = trunc <2 x i32> %add33p_vec to <2 x i16>
%5 = trunc <2 x i32> %add33p_vec48 to <2 x i16>
%_combined_vec = shufflevector <2 x i16> %4, <2 x i16> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%vector_ptr49 = bitcast i16* %p_arrayidx35.phi to <4 x i16>*
store <4 x i16> %_combined_vec, <4 x i16>* %vector_ptr49, align 2
%6 = icmp slt i32 %polly.next_loopiv, 1024
%p_arrayidx35.inc = getelementptr i16, i16* %p_arrayidx35.phi, i32 4
%p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
br i1 %6, label %polly.loop_body, label %polly.loop_after
}

declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }


16 changes: 16 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-splat.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; Extracted from test/CodeGen/Generic/vector.ll: used to loop indefinitely.
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; CHECK: combine

%i4 = type <4 x i32>

define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp = insertelement %i4 undef, i32 %X, i32 0 ; <%i4> [#uses=1]
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
%q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
51 changes: 51 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-store-v2i16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; RUN: llc -march=hexagon < %s
; Used to fail with: "Cannot select: 0x3bab680: ch = store <ST4[%lsr.iv522525], trunc to v2i16>
; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foobar() nounwind {
entry:
br label %for.cond7.preheader.single_entry.i

for.cond7.preheader.single_entry.i: ; preds = %for.cond7.preheader.single_entry.i, %entry
%exitcond72.i = icmp eq i32 undef, 64
br i1 %exitcond72.i, label %foo_32.exit, label %for.cond7.preheader.single_entry.i

foo_32.exit: ; preds = %for.cond7.preheader.single_entry.i
br label %for.body.i428

for.body.i428: ; preds = %for.body.i428, %foo_32.exit
br i1 undef, label %foo_12.exit, label %for.body.i428

foo_12.exit: ; preds = %for.body.i428
br label %for.body.i.i

for.body.i.i: ; preds = %for.body.i.i, %foo_12.exit
br i1 undef, label %foo_14.exit, label %for.body.i.i

foo_14.exit: ; preds = %for.body.i.i
br label %for.body

for.body: ; preds = %for.body, %foo_14.exit
br i1 undef, label %for.end, label %for.body

for.end: ; preds = %for.body
%storemerge294 = select i1 undef, i32 32767, i32 undef
%_p_splat_one386 = insertelement <1 x i32> undef, i32 %storemerge294, i32 0
%_p_splat387 = shufflevector <1 x i32> %_p_splat_one386, <1 x i32> undef, <2 x i32> zeroinitializer
br label %polly.loop_body377

polly.loop_after378: ; preds = %polly.loop_body377
unreachable

polly.loop_body377: ; preds = %polly.loop_body377, %for.end
%_p_vec_full384 = load <2 x i16>, <2 x i16>* undef, align 4
%0 = sext <2 x i16> %_p_vec_full384 to <2 x i32>
%mulp_vec = mul <2 x i32> %0, %_p_splat387
%shr100293p_vec = lshr <2 x i32> %mulp_vec, <i32 15, i32 15>
%1 = trunc <2 x i32> %shr100293p_vec to <2 x i16>
store <2 x i16> %1, <2 x i16>* undef, align 4
br i1 undef, label %polly.loop_body377, label %polly.loop_after378
}

42 changes: 42 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-truncate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Cannot select: 0x16cb7f0: v2i16 = truncate"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @Autocorr() nounwind {
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
br i1 undef, label %polly.loop_header43, label %for.body

do.cond: ; preds = %polly.loop_header
unreachable

do.end: ; preds = %polly.loop_after45
ret void

polly.loop_header: ; preds = %polly.loop_after45, %polly.loop_body
%0 = icmp sle i32 undef, 239
br i1 %0, label %polly.loop_body, label %do.cond

polly.loop_body: ; preds = %polly.loop_header
%p_25 = call i32 @llvm.hexagon.SI.to.SXTHI.asrh(i32 undef)
%1 = insertelement <4 x i32> undef, i32 %p_25, i32 3
%2 = trunc <4 x i32> %1 to <4 x i16>
store <4 x i16> %2, <4 x i16>* undef, align 8
br label %polly.loop_header

polly.loop_after45: ; preds = %polly.loop_header43
br i1 undef, label %polly.loop_header, label %do.end

polly.loop_header43: ; preds = %polly.loop_body44, %for.body
br i1 undef, label %polly.loop_body44, label %polly.loop_after45

polly.loop_body44: ; preds = %polly.loop_header43
br label %polly.loop_header43
}

declare i32 @llvm.hexagon.SI.to.SXTHI.asrh(i32) nounwind readnone
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaddb-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddub

define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
entry:
%0 = add <4 x i8> %a, %b
ret <4 x i8> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaddb.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddub

define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
entry:
%0 = add <8 x i8> %a, %b
ret <8 x i8> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaddh-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddh

define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
entry:
%0 = add <4 x i16> %a, %b
ret <4 x i16> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaddh.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddh

define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
entry:
%0 = add <2 x i16> %a, %b
ret <2 x i16> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaddw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaddw

define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
entry:
%0 = add <2 x i32> %a, %b
ret <2 x i32> %0
}
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vaslw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vaslw

target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo(i16* nocapture %v) nounwind {
entry:
%p_arrayidx = getelementptr i16, i16* %v, i32 4
%vector_ptr = bitcast i16* %p_arrayidx to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
%_high_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
%_low_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%0 = sext <2 x i16> %_low_half to <2 x i32>
%1 = sext <2 x i16> %_high_half to <2 x i32>
%shr6p_vec = shl <2 x i32> %0, <i32 2, i32 2>
%shr6p_vec19 = shl <2 x i32> %1, <i32 2, i32 2>
%addp_vec = add <2 x i32> %shr6p_vec, <i32 34, i32 34>
%addp_vec20 = add <2 x i32> %shr6p_vec19, <i32 34, i32 34>
%vector_ptr21 = bitcast i16* %v to <4 x i16>*
%_p_vec_full22 = load <4 x i16>, <4 x i16>* %vector_ptr21, align 2
%_high_half23 = shufflevector <4 x i16> %_p_vec_full22, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
%_low_half24 = shufflevector <4 x i16> %_p_vec_full22, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
%2 = zext <2 x i16> %_low_half24 to <2 x i32>
%3 = zext <2 x i16> %_high_half23 to <2 x i32>
%add3p_vec = add <2 x i32> %addp_vec, %2
%add3p_vec25 = add <2 x i32> %addp_vec20, %3
%4 = trunc <2 x i32> %add3p_vec to <2 x i16>
%5 = trunc <2 x i32> %add3p_vec25 to <2 x i16>
%_combined_vec = shufflevector <2 x i16> %4, <2 x i16> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i16> %_combined_vec, <4 x i16>* %vector_ptr21, align 2
ret void
}
279 changes: 279 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vshifts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s

; Check that store is post-incremented.
; CHECK: r{{[0-9]+:[0-9]+}} = vasrw(r{{[0-9]+:[0-9]+}}, r{{[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = vaslw(r{{[0-9]+:[0-9]+}}, r{{[0-9]+}})
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

define void @foo(i32* nocapture %buf, i32* nocapture %dest, i32 %offset, i32 %oddBlock, i32 %gb) #0 {
entry:
%0 = load i32, i32* %buf, align 4, !tbaa !0
%shr = ashr i32 %0, %gb
store i32 %shr, i32* %buf, align 4, !tbaa !0
%not.tobool = icmp eq i32 %oddBlock, 0
%1 = sub i32 %offset, %oddBlock
%2 = zext i1 %not.tobool to i32
%3 = and i32 %1, 7
%4 = add i32 %2, %3
%5 = add i32 %4, 8
%p_sub8 = sub nsw i32 31, %gb
%6 = insertelement <2 x i32> undef, i32 %p_sub8, i32 0
%7 = insertelement <2 x i32> %6, i32 %p_sub8, i32 1
%8 = bitcast <2 x i32> %7 to i64
%9 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %8, i32 1)
%10 = bitcast i64 %9 to <2 x i32>
%11 = tail call i64 @llvm.hexagon.A2.combinew(i32 -1, i32 -1)
%12 = bitcast i64 %11 to <2 x i32>
%sub12p_vec = add <2 x i32> %10, %12
%p_22 = add i32 %4, 64
%p_d.018 = getelementptr i32, i32* %dest, i32 %4
%p_d.01823 = getelementptr i32, i32* %dest, i32 %p_22
%p_25 = add i32 %4, 72
%p_arrayidx14 = getelementptr i32, i32* %dest, i32 %5
%p_arrayidx1426 = getelementptr i32, i32* %dest, i32 %p_25
%_p_scalar_ = load i32, i32* %p_d.018, align 4
%_p_vec_ = insertelement <2 x i32> undef, i32 %_p_scalar_, i32 0
%_p_scalar_27 = load i32, i32* %p_d.01823, align 4
%_p_vec_28 = insertelement <2 x i32> %_p_vec_, i32 %_p_scalar_27, i32 1
%13 = bitcast <2 x i32> %_p_vec_28 to i64
%14 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %13, i32 31)
%15 = bitcast i64 %14 to <2 x i32>
%shr9p_vec = ashr <2 x i32> %_p_vec_28, %7
%xorp_vec = xor <2 x i32> %15, %sub12p_vec
%16 = bitcast <2 x i32> %shr9p_vec to i64
%17 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %14, i64 %16)
%18 = bitcast <2 x i32> %xorp_vec to i64
%19 = tail call i64 @llvm.hexagon.C2.vmux(i32 %17, i64 %13, i64 %18)
%20 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %19, i32 %gb)
%21 = bitcast i64 %20 to <2 x i32>
%22 = extractelement <2 x i32> %21, i32 0
store i32 %22, i32* %p_arrayidx14, align 4
%23 = extractelement <2 x i32> %21, i32 1
store i32 %23, i32* %p_arrayidx1426, align 4
store i32 %22, i32* %p_d.018, align 4
store i32 %23, i32* %p_d.01823, align 4
%p_21.1 = add i32 %4, 128
%p_22.1 = add i32 %4, 192
%p_d.018.1 = getelementptr i32, i32* %dest, i32 %p_21.1
%p_d.01823.1 = getelementptr i32, i32* %dest, i32 %p_22.1
%p_24.1 = add i32 %4, 136
%p_25.1 = add i32 %4, 200
%p_arrayidx14.1 = getelementptr i32, i32* %dest, i32 %p_24.1
%p_arrayidx1426.1 = getelementptr i32, i32* %dest, i32 %p_25.1
%_p_scalar_.1 = load i32, i32* %p_d.018.1, align 4
%_p_vec_.1 = insertelement <2 x i32> undef, i32 %_p_scalar_.1, i32 0
%_p_scalar_27.1 = load i32, i32* %p_d.01823.1, align 4
%_p_vec_28.1 = insertelement <2 x i32> %_p_vec_.1, i32 %_p_scalar_27.1, i32 1
%24 = bitcast <2 x i32> %_p_vec_28.1 to i64
%25 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %24, i32 31)
%26 = bitcast i64 %25 to <2 x i32>
%shr9p_vec.1 = ashr <2 x i32> %_p_vec_28.1, %7
%xorp_vec.1 = xor <2 x i32> %26, %sub12p_vec
%27 = bitcast <2 x i32> %shr9p_vec.1 to i64
%28 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %25, i64 %27)
%29 = bitcast <2 x i32> %xorp_vec.1 to i64
%30 = tail call i64 @llvm.hexagon.C2.vmux(i32 %28, i64 %24, i64 %29)
%31 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %30, i32 %gb)
%32 = bitcast i64 %31 to <2 x i32>
%33 = extractelement <2 x i32> %32, i32 0
store i32 %33, i32* %p_arrayidx14.1, align 4
%34 = extractelement <2 x i32> %32, i32 1
store i32 %34, i32* %p_arrayidx1426.1, align 4
store i32 %33, i32* %p_d.018.1, align 4
store i32 %34, i32* %p_d.01823.1, align 4
%p_21.2 = add i32 %4, 256
%p_22.2 = add i32 %4, 320
%p_d.018.2 = getelementptr i32, i32* %dest, i32 %p_21.2
%p_d.01823.2 = getelementptr i32, i32* %dest, i32 %p_22.2
%p_24.2 = add i32 %4, 264
%p_25.2 = add i32 %4, 328
%p_arrayidx14.2 = getelementptr i32, i32* %dest, i32 %p_24.2
%p_arrayidx1426.2 = getelementptr i32, i32* %dest, i32 %p_25.2
%_p_scalar_.2 = load i32, i32* %p_d.018.2, align 4
%_p_vec_.2 = insertelement <2 x i32> undef, i32 %_p_scalar_.2, i32 0
%_p_scalar_27.2 = load i32, i32* %p_d.01823.2, align 4
%_p_vec_28.2 = insertelement <2 x i32> %_p_vec_.2, i32 %_p_scalar_27.2, i32 1
%35 = bitcast <2 x i32> %_p_vec_28.2 to i64
%36 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %35, i32 31)
%37 = bitcast i64 %36 to <2 x i32>
%shr9p_vec.2 = ashr <2 x i32> %_p_vec_28.2, %7
%xorp_vec.2 = xor <2 x i32> %37, %sub12p_vec
%38 = bitcast <2 x i32> %shr9p_vec.2 to i64
%39 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %36, i64 %38)
%40 = bitcast <2 x i32> %xorp_vec.2 to i64
%41 = tail call i64 @llvm.hexagon.C2.vmux(i32 %39, i64 %35, i64 %40)
%42 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %41, i32 %gb)
%43 = bitcast i64 %42 to <2 x i32>
%44 = extractelement <2 x i32> %43, i32 0
store i32 %44, i32* %p_arrayidx14.2, align 4
%45 = extractelement <2 x i32> %43, i32 1
store i32 %45, i32* %p_arrayidx1426.2, align 4
store i32 %44, i32* %p_d.018.2, align 4
store i32 %45, i32* %p_d.01823.2, align 4
%p_21.3 = add i32 %4, 384
%p_22.3 = add i32 %4, 448
%p_d.018.3 = getelementptr i32, i32* %dest, i32 %p_21.3
%p_d.01823.3 = getelementptr i32, i32* %dest, i32 %p_22.3
%p_24.3 = add i32 %4, 392
%p_25.3 = add i32 %4, 456
%p_arrayidx14.3 = getelementptr i32, i32* %dest, i32 %p_24.3
%p_arrayidx1426.3 = getelementptr i32, i32* %dest, i32 %p_25.3
%_p_scalar_.3 = load i32, i32* %p_d.018.3, align 4
%_p_vec_.3 = insertelement <2 x i32> undef, i32 %_p_scalar_.3, i32 0
%_p_scalar_27.3 = load i32, i32* %p_d.01823.3, align 4
%_p_vec_28.3 = insertelement <2 x i32> %_p_vec_.3, i32 %_p_scalar_27.3, i32 1
%46 = bitcast <2 x i32> %_p_vec_28.3 to i64
%47 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %46, i32 31)
%48 = bitcast i64 %47 to <2 x i32>
%shr9p_vec.3 = ashr <2 x i32> %_p_vec_28.3, %7
%xorp_vec.3 = xor <2 x i32> %48, %sub12p_vec
%49 = bitcast <2 x i32> %shr9p_vec.3 to i64
%50 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %47, i64 %49)
%51 = bitcast <2 x i32> %xorp_vec.3 to i64
%52 = tail call i64 @llvm.hexagon.C2.vmux(i32 %50, i64 %46, i64 %51)
%53 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %52, i32 %gb)
%54 = bitcast i64 %53 to <2 x i32>
%55 = extractelement <2 x i32> %54, i32 0
store i32 %55, i32* %p_arrayidx14.3, align 4
%56 = extractelement <2 x i32> %54, i32 1
store i32 %56, i32* %p_arrayidx1426.3, align 4
store i32 %55, i32* %p_d.018.3, align 4
store i32 %56, i32* %p_d.01823.3, align 4
%p_21.4 = add i32 %4, 512
%p_22.4 = add i32 %4, 576
%p_d.018.4 = getelementptr i32, i32* %dest, i32 %p_21.4
%p_d.01823.4 = getelementptr i32, i32* %dest, i32 %p_22.4
%p_24.4 = add i32 %4, 520
%p_25.4 = add i32 %4, 584
%p_arrayidx14.4 = getelementptr i32, i32* %dest, i32 %p_24.4
%p_arrayidx1426.4 = getelementptr i32, i32* %dest, i32 %p_25.4
%_p_scalar_.4 = load i32, i32* %p_d.018.4, align 4
%_p_vec_.4 = insertelement <2 x i32> undef, i32 %_p_scalar_.4, i32 0
%_p_scalar_27.4 = load i32, i32* %p_d.01823.4, align 4
%_p_vec_28.4 = insertelement <2 x i32> %_p_vec_.4, i32 %_p_scalar_27.4, i32 1
%57 = bitcast <2 x i32> %_p_vec_28.4 to i64
%58 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %57, i32 31)
%59 = bitcast i64 %58 to <2 x i32>
%shr9p_vec.4 = ashr <2 x i32> %_p_vec_28.4, %7
%xorp_vec.4 = xor <2 x i32> %59, %sub12p_vec
%60 = bitcast <2 x i32> %shr9p_vec.4 to i64
%61 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %58, i64 %60)
%62 = bitcast <2 x i32> %xorp_vec.4 to i64
%63 = tail call i64 @llvm.hexagon.C2.vmux(i32 %61, i64 %57, i64 %62)
%64 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %63, i32 %gb)
%65 = bitcast i64 %64 to <2 x i32>
%66 = extractelement <2 x i32> %65, i32 0
store i32 %66, i32* %p_arrayidx14.4, align 4
%67 = extractelement <2 x i32> %65, i32 1
store i32 %67, i32* %p_arrayidx1426.4, align 4
store i32 %66, i32* %p_d.018.4, align 4
store i32 %67, i32* %p_d.01823.4, align 4
%p_21.5 = add i32 %4, 640
%p_22.5 = add i32 %4, 704
%p_d.018.5 = getelementptr i32, i32* %dest, i32 %p_21.5
%p_d.01823.5 = getelementptr i32, i32* %dest, i32 %p_22.5
%p_24.5 = add i32 %4, 648
%p_25.5 = add i32 %4, 712
%p_arrayidx14.5 = getelementptr i32, i32* %dest, i32 %p_24.5
%p_arrayidx1426.5 = getelementptr i32, i32* %dest, i32 %p_25.5
%_p_scalar_.5 = load i32, i32* %p_d.018.5, align 4
%_p_vec_.5 = insertelement <2 x i32> undef, i32 %_p_scalar_.5, i32 0
%_p_scalar_27.5 = load i32, i32* %p_d.01823.5, align 4
%_p_vec_28.5 = insertelement <2 x i32> %_p_vec_.5, i32 %_p_scalar_27.5, i32 1
%68 = bitcast <2 x i32> %_p_vec_28.5 to i64
%69 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %68, i32 31)
%70 = bitcast i64 %69 to <2 x i32>
%shr9p_vec.5 = ashr <2 x i32> %_p_vec_28.5, %7
%xorp_vec.5 = xor <2 x i32> %70, %sub12p_vec
%71 = bitcast <2 x i32> %shr9p_vec.5 to i64
%72 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %69, i64 %71)
%73 = bitcast <2 x i32> %xorp_vec.5 to i64
%74 = tail call i64 @llvm.hexagon.C2.vmux(i32 %72, i64 %68, i64 %73)
%75 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %74, i32 %gb)
%76 = bitcast i64 %75 to <2 x i32>
%77 = extractelement <2 x i32> %76, i32 0
store i32 %77, i32* %p_arrayidx14.5, align 4
%78 = extractelement <2 x i32> %76, i32 1
store i32 %78, i32* %p_arrayidx1426.5, align 4
store i32 %77, i32* %p_d.018.5, align 4
store i32 %78, i32* %p_d.01823.5, align 4
%p_21.6 = add i32 %4, 768
%p_22.6 = add i32 %4, 832
%p_d.018.6 = getelementptr i32, i32* %dest, i32 %p_21.6
%p_d.01823.6 = getelementptr i32, i32* %dest, i32 %p_22.6
%p_24.6 = add i32 %4, 776
%p_25.6 = add i32 %4, 840
%p_arrayidx14.6 = getelementptr i32, i32* %dest, i32 %p_24.6
%p_arrayidx1426.6 = getelementptr i32, i32* %dest, i32 %p_25.6
%_p_scalar_.6 = load i32, i32* %p_d.018.6, align 4
%_p_vec_.6 = insertelement <2 x i32> undef, i32 %_p_scalar_.6, i32 0
%_p_scalar_27.6 = load i32, i32* %p_d.01823.6, align 4
%_p_vec_28.6 = insertelement <2 x i32> %_p_vec_.6, i32 %_p_scalar_27.6, i32 1
%79 = bitcast <2 x i32> %_p_vec_28.6 to i64
%80 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %79, i32 31)
%81 = bitcast i64 %80 to <2 x i32>
%shr9p_vec.6 = ashr <2 x i32> %_p_vec_28.6, %7
%xorp_vec.6 = xor <2 x i32> %81, %sub12p_vec
%82 = bitcast <2 x i32> %shr9p_vec.6 to i64
%83 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %80, i64 %82)
%84 = bitcast <2 x i32> %xorp_vec.6 to i64
%85 = tail call i64 @llvm.hexagon.C2.vmux(i32 %83, i64 %79, i64 %84)
%86 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %85, i32 %gb)
%87 = bitcast i64 %86 to <2 x i32>
%88 = extractelement <2 x i32> %87, i32 0
store i32 %88, i32* %p_arrayidx14.6, align 4
%89 = extractelement <2 x i32> %87, i32 1
store i32 %89, i32* %p_arrayidx1426.6, align 4
store i32 %88, i32* %p_d.018.6, align 4
store i32 %89, i32* %p_d.01823.6, align 4
%p_21.7 = add i32 %4, 896
%p_22.7 = add i32 %4, 960
%p_d.018.7 = getelementptr i32, i32* %dest, i32 %p_21.7
%p_d.01823.7 = getelementptr i32, i32* %dest, i32 %p_22.7
%p_24.7 = add i32 %4, 904
%p_25.7 = add i32 %4, 968
%p_arrayidx14.7 = getelementptr i32, i32* %dest, i32 %p_24.7
%p_arrayidx1426.7 = getelementptr i32, i32* %dest, i32 %p_25.7
%_p_scalar_.7 = load i32, i32* %p_d.018.7, align 4
%_p_vec_.7 = insertelement <2 x i32> undef, i32 %_p_scalar_.7, i32 0
%_p_scalar_27.7 = load i32, i32* %p_d.01823.7, align 4
%_p_vec_28.7 = insertelement <2 x i32> %_p_vec_.7, i32 %_p_scalar_27.7, i32 1
%90 = bitcast <2 x i32> %_p_vec_28.7 to i64
%91 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %90, i32 31)
%92 = bitcast i64 %91 to <2 x i32>
%shr9p_vec.7 = ashr <2 x i32> %_p_vec_28.7, %7
%xorp_vec.7 = xor <2 x i32> %92, %sub12p_vec
%93 = bitcast <2 x i32> %shr9p_vec.7 to i64
%94 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %91, i64 %93)
%95 = bitcast <2 x i32> %xorp_vec.7 to i64
%96 = tail call i64 @llvm.hexagon.C2.vmux(i32 %94, i64 %90, i64 %95)
%97 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %96, i32 %gb)
%98 = bitcast i64 %97 to <2 x i32>
%99 = extractelement <2 x i32> %98, i32 0
store i32 %99, i32* %p_arrayidx14.7, align 4
%100 = extractelement <2 x i32> %98, i32 1
store i32 %100, i32* %p_arrayidx1426.7, align 4
store i32 %99, i32* %p_d.018.7, align 4
store i32 %100, i32* %p_d.01823.7, align 4
ret void
}

declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32) #1

declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) #1

declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1

declare i32 @llvm.hexagon.A2.vcmpweq(i64, i64) #1

declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1

declare i64 @llvm.hexagon.S2.asl.r.vw(i64, i32) #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }

!0 = !{!"int", !1}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsplatb.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; Make sure we build the constant vector <7, 7, 7, 7> with a vsplatb.
; CHECK: vsplatb
@B = common global [400 x i8] zeroinitializer, align 8
@A = common global [400 x i8] zeroinitializer, align 8
@C = common global [400 x i8] zeroinitializer, align 8

define void @run() nounwind {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add i32 %polly.loopiv25, 4
%p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
%p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
%vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
%_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
%mulp_vec = mul <4 x i8> %_p_vec_full, <i8 7, i8 7, i8 7, i8 7>
%vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
%_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
%addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
%0 = icmp slt i32 %polly.next_loopiv, 400
br i1 %0, label %polly.loop_body, label %polly.loop_after
}
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsplath.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; Make sure we build the constant vector <7, 7, 7, 7> with a vsplath.
; CHECK: vsplath
@B = common global [400 x i16] zeroinitializer, align 8
@A = common global [400 x i16] zeroinitializer, align 8
@C = common global [400 x i16] zeroinitializer, align 8

define void @run() nounwind {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv26 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv26, 4
%p_arrayidx1 = getelementptr [400 x i16], [400 x i16]* @A, i32 0, i32 %polly.loopiv26
%p_arrayidx = getelementptr [400 x i16], [400 x i16]* @B, i32 0, i32 %polly.loopiv26
%vector_ptr = bitcast i16* %p_arrayidx to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 8
%mulp_vec = mul <4 x i16> %_p_vec_full, <i16 7, i16 7, i16 7, i16 7>
%vector_ptr15 = bitcast i16* %p_arrayidx1 to <4 x i16>*
%_p_vec_full16 = load <4 x i16>, <4 x i16>* %vector_ptr15, align 8
%addp_vec = add <4 x i16> %_p_vec_full16, %mulp_vec
store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr15, align 8
%0 = icmp slt i32 %polly.next_loopiv, 400
br i1 %0, label %polly.loop_body, label %polly.loop_after
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsubb-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vsubub

define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
entry:
%0 = sub <4 x i8> %a, %b
ret <4 x i8> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsubb.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vsubub

define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
entry:
%0 = sub <8 x i8> %a, %b
ret <8 x i8> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsubh-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vsubh

define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
entry:
%0 = sub <4 x i16> %a, %b
ret <4 x i16> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsubh.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vsubh

define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
entry:
%0 = sub <2 x i16> %a, %b
ret <2 x i16> %0
}
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-vsubw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: vsubw

define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
entry:
%0 = sub <2 x i32> %a, %b
ret <2 x i32> %0
}
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-xor.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s

; Check that the parsing succeeded.
; CHECK: r{{[0-9]+:[0-9]+}} = xor(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"

@window_size = global i32 65536, align 4
@prev = external global [0 x i16], align 8
@block_start = common global i32 0, align 4
@prev_length = common global i32 0, align 4
@strstart = common global i32 0, align 4
@match_start = common global i32 0, align 4
@max_chain_length = common global i32 0, align 4
@good_match = common global i32 0, align 4

define void @fill_window() #0 {
entry:
br label %polly.loop_body

polly.loop_after: ; preds = %polly.loop_body
ret void

polly.loop_body: ; preds = %entry, %polly.loop_body
%polly.loopiv36 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
%polly.next_loopiv = add nsw i32 %polly.loopiv36, 4
%p_arrayidx4 = getelementptr [0 x i16], [0 x i16]* @prev, i32 0, i32 %polly.loopiv36
%vector_ptr = bitcast i16* %p_arrayidx4 to <4 x i16>*
%_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
%cmp1p_vicmp = icmp slt <4 x i16> %_p_vec_full, zeroinitializer
%subp_vec = xor <4 x i16> %_p_vec_full, <i16 -32768, i16 -32768, i16 -32768, i16 -32768>
%sel1p_vsel = select <4 x i1> %cmp1p_vicmp, <4 x i16> %subp_vec, <4 x i16> zeroinitializer
store <4 x i16> %sel1p_vsel, <4 x i16>* %vector_ptr, align 2
%0 = icmp slt i32 %polly.next_loopiv, 32768
br i1 %0, label %polly.loop_body, label %polly.loop_after
}

attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/Hexagon/vect/vect-zeroextend.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: llc -march=hexagon < %s
; Used to fail with "Cannot select: 0x16cb2d0: v4i16 = zero_extend"

; ModuleID = 'bugpoint-reduced-simplified.bc'
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"

define void @foo() nounwind {
entry:
br i1 undef, label %for.cond30.preheader.lr.ph, label %for.end425

for.cond30.preheader.lr.ph: ; preds = %entry
br label %for.cond37.preheader

for.cond37.preheader: ; preds = %for.cond37.preheader, %for.cond30.preheader.lr.ph
%_p_vec_full = load <3 x i8>, <3 x i8>* undef, align 8
%0 = zext <3 x i8> %_p_vec_full to <3 x i16>
store <3 x i16> %0, <3 x i16>* undef, align 8
br label %for.cond37.preheader

for.end425: ; preds = %entry
ret void
}